diff --git a/.github/ISSUE_TEMPLATE/blank-issue.md b/.github/ISSUE_TEMPLATE/blank-issue.md new file mode 100644 index 0000000000..9aef3ebe63 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/blank-issue.md @@ -0,0 +1,4 @@ +--- +name: Blank Issue +about: Create a blank issue. +--- diff --git a/.github/ISSUE_TEMPLATE/clif-bug-report.md b/.github/ISSUE_TEMPLATE/clif-bug-report.md new file mode 100644 index 0000000000..3efc27d4f1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/clif-bug-report.md @@ -0,0 +1,16 @@ +--- +name: "Cranelift Bug report" +about: "Report a bug or a crash in Cranelift." +labels: 'bug' +--- + +Thanks for opening a bug report! Please answer the questions below +if they're relevant and delete this text before submitting. + +- What are the steps to reproduce the issue? Can you include a CLIF test case, + ideally reduced with the `bugpoint` clif-util command? +- What do you expect to happen? What does actually happen? Does it panic, and + if so, with which assertion? +- Which Cranelift version / commit hash / branch are you using? +- If relevant, can you include some extra information about your environment? + (Rust version, operating system, architecture...) diff --git a/.github/ISSUE_TEMPLATE/improvement.md b/.github/ISSUE_TEMPLATE/improvement.md new file mode 100644 index 0000000000..a4c6009249 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/improvement.md @@ -0,0 +1,28 @@ +--- +name: "Improvement" +about: "A feature request or code improvement." +--- + + + +#### Feature + + + +#### Benefit + + + +#### Implementation + + + +#### Alternatives + + diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..18ad42fa5b --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,18 @@ + diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f1679f2e79..80438ab912 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -52,7 +52,8 @@ jobs: - uses: ./.github/actions/install-rust with: toolchain: nightly - - run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs + - run: cargo doc --no-deps --all --exclude wasmtime-cli --exclude test-programs --exclude cranelift-codegen-meta + - run: cargo doc --package cranelift-codegen-meta --document-private-items - uses: actions/upload-artifact@v1 with: name: doc-api @@ -107,6 +108,16 @@ jobs: | shuf \ | head -n 100 \ | xargs cargo fuzz run differential --release --debug-assertions + - run: | + find fuzz/corpus/reader_parse_test -type f \ + | shuf \ + | head -n 100 \ + | xargs cargo fuzz run reader_parse_test --release --debug-assertions + - run: | + find fuzz/corpus/translate_module -type f \ + | shuf \ + | head -n 100 \ + | xargs cargo fuzz run translate_module --release --debug-assertions # Install wasm32-unknown-emscripten target, and ensure `crates/wasi-common` # compiles to Emscripten. @@ -209,6 +220,19 @@ jobs: env: RUST_BACKTRACE: 1 + # Verify that cranelift's code generation is deterministic + meta_determinist_check: + name: Meta deterministic check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + with: + submodules: true + - name: Install Rust + run: rustup update stable && rustup default stable + - run: cd cranelift/codegen && cargo build --features all-arch + - run: ci/ensure_deterministic_build.sh + # Builds a Python wheel (package) for Windows/Mac/Linux. Note that we're # careful to create binary-compatible releases here to old releases of # Windows/Mac/Linux. This will also build wheels for Python 3.6, 3.7 and 3.8. diff --git a/.gitignore b/.gitignore index daa279dbaf..4c37c4f83d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,13 +1,16 @@ *.bk -*.swp +*.pyc *.swo +*.swp *.swx -tags -target -.*.rustfmt -cranelift.dbg* -rusty-tags.* *~ +.*.rustfmt +.mypy_cache \#*\# +cranelift.dbg* +docs/_build docs/book .vscode/ +rusty-tags.* +tags +target diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ba67ac54da..54552904d7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,13 +1,12 @@ -# Contributing to Wasmtime +# Contributing to Wasmtime and/or Cranelift -Wasmtime is a [Bytecode Alliance] project, and follows the Bytecode Alliance's [Code of Conduct] and [Organizational Code of Conduct]. +Wasmtime and Cranelift are [Bytecode Alliance] projects. They follow the +Bytecode Alliance's [Code of Conduct] and [Organizational Code of Conduct]. -Wasmtime follows the same development style as Cranelift, so check out -[Cranelift's CONTRIBUTING.md]. Of course, for Wasmtime-specific issues, please -use the [Wasmtime issue tracker]. +For more information about contributing to these projects you can consult the +[online documentation] which should cover all sorts of topics. [Bytecode Alliance]: https://bytecodealliance.org/ [Code of Conduct]: CODE_OF_CONDUCT.md [Organizational Code of Conduct]: ORG_CODE_OF_CONDUCT.md -[Cranelift's CONTRIBUTING.md]: https://github.com/bytecodealliance/cranelift/blob/master/CONTRIBUTING.md -[Wasmtime issue tracker]: https://github.com/bytecodealliance/wasmtime/issues/new +[online documentation]: https://bytecodealliance.github.io/wasmtime/contributing.html diff --git a/Cargo.lock b/Cargo.lock index 1df0ef9d1b..debe8fbd6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" +[[package]] +name = "ahash" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f33b5018f120946c1dcf279194f238a9f146725593ead1c08fa47ff22b0b5d3" +dependencies = [ + "const-random", +] + [[package]] name = "aho-corasick" version = "0.7.8" @@ -68,6 +77,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" + [[package]] name = "autocfg" version = "1.0.0" @@ -305,6 +320,26 @@ dependencies = [ "cc", ] +[[package]] +name = "const-random" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f1af9ac737b2dd2d577701e59fd09ba34822f6f2ebdb30a7647405d9e55e16a" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e4c606eb459dd29f7c57b2e0879f2b6f14ee130918c2b78ccb58a9624e6c7a" +dependencies = [ + "getrandom", + "proc-macro-hack", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -324,8 +359,6 @@ dependencies = [ [[package]] name = "cranelift-bforest" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45a9c21f8042b9857bda93f6c1910b9f9f24100187a3d3d52f214a34e3dc5818" dependencies = [ "cranelift-entity", ] @@ -333,8 +366,6 @@ dependencies = [ [[package]] name = "cranelift-codegen" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7853f77a6e4a33c67a69c40f5e1bb982bd2dc5c4a22e17e67b65bbccf9b33b2e" dependencies = [ "byteorder", "cranelift-bforest", @@ -342,6 +373,7 @@ dependencies = [ "cranelift-codegen-shared", "cranelift-entity", "gimli", + "hashbrown", "log", "serde", "smallvec", @@ -352,8 +384,6 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "084cd6d5fb0d1da28acd72c199471bfb09acc703ec8f3bf07b1699584272a3b9" dependencies = [ "cranelift-codegen-shared", "cranelift-entity", @@ -362,14 +392,10 @@ dependencies = [ [[package]] name = "cranelift-codegen-shared" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "701b599783305a58c25027a4d73f2d6b599b2d8ef3f26677275f480b4d51e05d" [[package]] name = "cranelift-entity" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b88e792b28e1ebbc0187b72ba5ba880dad083abe9231a99d19604d10c9e73f38" dependencies = [ "serde", ] @@ -377,10 +403,9 @@ dependencies = [ [[package]] name = "cranelift-frontend" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "518344698fa6c976d853319218415fdfb4f1bc6b42d0b2e2df652e55dff1f778" dependencies = [ "cranelift-codegen", + "hashbrown", "log", "smallvec", "target-lexicon", @@ -389,27 +414,34 @@ dependencies = [ [[package]] name = "cranelift-native" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32daf082da21c0c05d93394ff4842c2ab7c4991b1f3186a1d952f8ac660edd0b" dependencies = [ "cranelift-codegen", "raw-cpuid", "target-lexicon", ] +[[package]] +name = "cranelift-reader" +version = "0.59.0" +dependencies = [ + "cranelift-codegen", + "target-lexicon", +] + [[package]] name = "cranelift-wasm" version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2aa816f554a3ef739a5d17ca3081a1f8983f04c944ea8ff60fb8d9dd8cd2d7b" dependencies = [ "cranelift-codegen", "cranelift-entity", "cranelift-frontend", + "hashbrown", "log", "serde", + "target-lexicon", "thiserror", "wasmparser 0.51.2", + "wat", ] [[package]] @@ -438,7 +470,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" dependencies = [ - "autocfg", + "autocfg 1.0.0", "cfg-if", "crossbeam-utils", "lazy_static", @@ -463,7 +495,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" dependencies = [ - "autocfg", + "autocfg 1.0.0", "cfg-if", "lazy_static", ] @@ -776,6 +808,16 @@ dependencies = [ "scroll", ] +[[package]] +name = "hashbrown" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e6073d0ca812575946eb5f35ff68dbe519907b25c42530389ff946dc84c6ead" +dependencies = [ + "ahash", + "autocfg 0.1.7", +] + [[package]] name = "heck" version = "0.3.1" @@ -815,7 +857,7 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "076f042c5b7b98f31d205f1249267e12a6518c1481e9dae9764af19b707d2292" dependencies = [ - "autocfg", + "autocfg 1.0.0", ] [[package]] @@ -1053,7 +1095,7 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95" dependencies = [ - "autocfg", + "autocfg 1.0.0", "num-traits", ] @@ -1063,7 +1105,7 @@ version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba" dependencies = [ - "autocfg", + "autocfg 1.0.0", "num-traits", ] @@ -1073,7 +1115,7 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfb0800a0291891dd9f4fe7bd9c19384f98f7fbe0cd0f39a2c6b88b9868bbc00" dependencies = [ - "autocfg", + "autocfg 1.0.0", "num-integer", "num-traits", ] @@ -1084,7 +1126,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da4dc79f9e6c81bef96148c8f6b8e72ad4541caa4a24373e900a36da07de03a3" dependencies = [ - "autocfg", + "autocfg 1.0.0", "num-integer", "num-traits", ] @@ -1095,7 +1137,7 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" dependencies = [ - "autocfg", + "autocfg 1.0.0", ] [[package]] @@ -2080,7 +2122,11 @@ name = "wasmtime-fuzz" version = "0.12.0" dependencies = [ "arbitrary 0.2.0", + "cranelift-codegen", + "cranelift-reader", + "cranelift-wasm", "libfuzzer-sys", + "target-lexicon", "wasmtime", "wasmtime-fuzzing", ] diff --git a/ci/ensure_deterministic_build.sh b/ci/ensure_deterministic_build.sh new file mode 100755 index 0000000000..ebed6cd1c7 --- /dev/null +++ b/ci/ensure_deterministic_build.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# This script makes sure that the meta crate deterministically generate files +# with a high probability. +# The current directory must be set to the repository's root. + +set -e + +BUILD_SCRIPT=$(find -wholename "./target/debug/build/cranelift-codegen-*/build-script-build") + +# First, run the script to generate a reference comparison. +rm -rf /tmp/reference +mkdir /tmp/reference +OUT_DIR=/tmp/reference TARGET=x86_64 $BUILD_SCRIPT + +# To make sure the build script doesn't depend on the current directory, we'll +# change the current working directory on every iteration. Make this easy to +# reproduce this locally by first copying the target/ directory into an initial +# temporary directory (and not move and lose the local clone's content). +rm -rf /tmp/src0 +mkdir /tmp/src0 + +echo Copying target directory... +cp -r ./target /tmp/src0/target +cd /tmp/src0 +echo "Done, starting loop." + +# Then, repeatedly make sure that the output is the same. +for i in {1..20} +do + # Move to a different directory, as explained above. + rm -rf /tmp/src$i + mkdir /tmp/src$i + mv ./* /tmp/src$i + cd /tmp/src$i + + rm -rf /tmp/try + mkdir /tmp/try + OUT_DIR=/tmp/try TARGET=x86_64 $BUILD_SCRIPT + diff -qr /tmp/reference /tmp/try +done diff --git a/clippy.toml b/clippy.toml index 152f137769..caabf12b77 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1 +1 @@ -doc-valid-idents = ["WebAssembly"] +doc-valid-idents = [ "WebAssembly", "NaN", "SetCC" ] diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml new file mode 100644 index 0000000000..c8eb79c27a --- /dev/null +++ b/cranelift/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "cranelift-tools" +authors = ["The Cranelift Project Developers"] +version = "0.59.0" +description = "Binaries for testing the Cranelift libraries" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +publish = false +edition = "2018" + +[[bin]] +name = "clif-util" +path = "src/clif-util.rs" + +[dependencies] +cfg-if = "0.1" +cranelift-codegen = { path = "codegen", version = "0.59.0" } +cranelift-entity = { path = "entity", version = "0.59.0" } +cranelift-reader = { path = "reader", version = "0.59.0" } +cranelift-frontend = { path = "frontend", version = "0.59.0" } +cranelift-serde = { path = "serde", version = "0.59.0", optional = true } +cranelift-wasm = { path = "wasm", version = "0.59.0", optional = true } +cranelift-native = { path = "native", version = "0.59.0" } +cranelift-filetests = { path = "filetests", version = "0.59.0" } +cranelift-module = { path = "module", version = "0.59.0" } +cranelift-faerie = { path = "faerie", version = "0.59.0" } +cranelift-object = { path = "object", version = "0.59.0" } +cranelift-simplejit = { path = "simplejit", version = "0.59.0" } +cranelift-preopt = { path = "preopt", version = "0.59.0" } +cranelift = { path = "umbrella", version = "0.59.0" } +filecheck = "0.4.0" +clap = "2.32.0" +serde = "1.0.8" +term = "0.6.1" +capstone = { version = "0.6.0", optional = true } +wat = { version = "1.0.7", optional = true } +target-lexicon = "0.10" +pretty_env_logger = "0.3.0" +file-per-thread-logger = "0.1.2" +indicatif = "0.13.0" +walkdir = "2.2" + +[features] +default = ["disas", "wasm", "cranelift-codegen/all-arch"] +disas = ["capstone"] +wasm = ["wat", "cranelift-wasm"] diff --git a/cranelift/README.md b/cranelift/README.md new file mode 100644 index 0000000000..fb852aad61 --- /dev/null +++ b/cranelift/README.md @@ -0,0 +1,183 @@ +Cranelift Code Generator +======================== + +**A [Bytecode Alliance][BA] project** + +Cranelift is a low-level retargetable code generator. It translates a +[target-independent intermediate +representation](https://cranelift.readthedocs.io/en/latest/ir.html) +into executable machine code. + +[BA]: https://bytecodealliance.org/ +[![Documentation Status](https://readthedocs.org/projects/cranelift/badge/?version=latest)](https://cranelift.readthedocs.io/en/latest/?badge=latest) +[![Build Status](https://github.com/bytecodealliance/cranelift/workflows/CI/badge.svg)](https://github.com/bytecodealliance/cranelift/actions) +[![Fuzzit Status](https://app.fuzzit.dev/badge?org_id=bytecodealliance)](https://app.fuzzit.dev/orgs/bytecodealliance/dashboard) +[![Chat](https://img.shields.io/badge/chat-zulip-brightgreen.svg)](https://bytecodealliance.zulipchat.com/#narrow/stream/217117-cranelift/topic/general) +![Minimum rustc 1.37](https://img.shields.io/badge/rustc-1.37+-green.svg) + +For more information, see [the +documentation](https://cranelift.readthedocs.io/en/latest/?badge=latest). + +For an example of how to use the JIT, see the [SimpleJIT Demo], which +implements a toy language. + +[SimpleJIT Demo]: https://github.com/bytecodealliance/simplejit-demo + +For an example of how to use Cranelift to run WebAssembly code, see +[Wasmtime], which implements a standalone, embeddable, VM using Cranelift. + +[Wasmtime]: https://github.com/bytecodealliance/wasmtime + +Status +------ + +Cranelift currently supports enough functionality to run a wide variety +of programs, including all the functionality needed to execute +WebAssembly MVP functions, although it needs to be used within an +external WebAssembly embedding to be part of a complete WebAssembly +implementation. + +The x86-64 backend is currently the most complete and stable; other +architectures are in various stages of development. Cranelift currently +supports both the System V AMD64 ABI calling convention used on many +platforms and the Windows x64 calling convention. The performance +of code produced by Cranelift is not yet impressive, though we have plans +to fix that. + +The core codegen crates have minimal dependencies, support no\_std mode +(see below), and do not require any host floating-point support, and +do not use callstack recursion. + +Cranelift does not yet perform mitigations for Spectre or related +security issues, though it may do so in the future. It does not +currently make any security-relevant instruction timing guarantees. It +has seen a fair amount of testing and fuzzing, although more work is +needed before it would be ready for a production use case. + +Cranelift's APIs are not yet stable. + +Cranelift currently requires Rust 1.37 or later to build. + +Contributing +------------ + +If you're interested in contributing to Cranelift: thank you! We have a +[contributing guide](CONTRIBUTING.md) which will help you getting involved in +the Cranelift project. + +Planned uses +------------ + +Cranelift is designed to be a code generator for WebAssembly, but it is +general enough to be useful elsewhere too. The initial planned uses that +affected its design are: + + - [WebAssembly compiler for the SpiderMonkey engine in + Firefox](spidermonkey.md#phase-1-webassembly). + - [Backend for the IonMonkey JavaScript JIT compiler in + Firefox](spidermonkey.md#phase-2-ionmonkey). + - [Debug build backend for the Rust compiler](rustc.md). + - [Wasmtime non-Web wasm engine](https://github.com/bytecodealliance/wasmtime). + +Building Cranelift +------------------ + +Cranelift uses a [conventional Cargo build +process](https://doc.rust-lang.org/cargo/guide/working-on-an-existing-project.html). + +Cranelift consists of a collection of crates, and uses a [Cargo +Workspace](https://doc.rust-lang.org/book/ch14-03-cargo-workspaces.html), +so for some cargo commands, such as `cargo test`, the `--all` is needed +to tell cargo to visit all of the crates. + +`test-all.sh` at the top level is a script which runs all the cargo +tests and also performs code format, lint, and documentation checks. + +
+Building with no_std + +The following crates support \`no\_std\`, although they do depend on liballoc: + - cranelift-entity + - cranelift-bforest + - cranelift-codegen + - cranelift-frontend + - cranelift-native + - cranelift-wasm + - cranelift-module + - cranelift-preopt + - cranelift + +To use no\_std mode, disable the std feature and enable the core +feature. This currently requires nightly rust. + +For example, to build \`cranelift-codegen\`: + +``` {.sourceCode .sh} +cd cranelift-codegen +cargo build --no-default-features --features core +``` + +Or, when using cranelift-codegen as a dependency (in Cargo.toml): + +``` {.sourceCode .} +[dependency.cranelift-codegen] +... +default-features = false +features = ["core"] +``` + +no\_std support is currently "best effort". We won't try to break it, +and we'll accept patches fixing problems, however we don't expect all +developers to build and test no\_std when submitting patches. +Accordingly, the ./test-all.sh script does not test no\_std. + +There is a separate ./test-no\_std.sh script that tests the no\_std +support in packages which support it. + +It's important to note that cranelift still needs liballoc to compile. +Thus, whatever environment is used must implement an allocator. + +Also, to allow the use of HashMaps with no\_std, an external crate +called hashmap\_core is pulled in (via the core feature). This is mostly +the same as std::collections::HashMap, except that it doesn't have DOS +protection. Just something to think about. + +
+ +
+Log configuration + +Cranelift uses the `log` crate to log messages at various levels. It doesn't +specify any maximal logging level, so embedders can choose what it should be; +however, this can have an impact of Cranelift's code size. You can use `log` +features to reduce the maximum logging level. For instance if you want to limit +the level of logging to `warn` messages and above in release mode: + +``` +[dependency.log] +... +features = ["release_max_level_warn"] +``` +
+ +
+Building the documentation + +Cranelift's documentation is [published online](https://cranelift.readthedocs.io/). + +To build the documentation locally, you need the [Sphinx documentation +generator](http://www.sphinx-doc.org/) as well as Python 3:: + + $ pip install sphinx sphinx-autobuild sphinx_rtd_theme + $ cd cranelift/docs + $ make html + $ open _build/html/index.html + +
+ +Editor Support +-------------- + +Editor support for working with Cranelift IR (clif) files: + + - Vim: https://github.com/bytecodealliance/cranelift.vim diff --git a/cranelift/bforest/Cargo.toml b/cranelift/bforest/Cargo.toml new file mode 100644 index 0000000000..11ea1836d0 --- /dev/null +++ b/cranelift/bforest/Cargo.toml @@ -0,0 +1,19 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-bforest" +version = "0.59.0" +description = "A forest of B+-trees" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +categories = ["no-std"] +readme = "README.md" +keywords = ["btree", "forest", "set", "map"] +edition = "2018" + +[dependencies] +cranelift-entity = { path = "../entity", version = "0.59.0", default-features = false } + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/bforest/LICENSE b/cranelift/bforest/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/bforest/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/bforest/README.md b/cranelift/bforest/README.md new file mode 100644 index 0000000000..391d6287d2 --- /dev/null +++ b/cranelift/bforest/README.md @@ -0,0 +1,12 @@ +This crate contains array-based data structures used by the core Cranelift code +generator which represent a set of small ordered sets or maps. + +**These are not general purpose data structures that are somehow magically faster that the +standard library's `BTreeSet` and `BTreeMap` types.** + +The tradeoffs are different: + +- Keys and values are expected to be small and copyable. We optimize for 32-bit types. +- A comparator object is used to compare keys, allowing smaller "context free" keys. +- Empty trees have a very small 32-bit footprint. +- All the trees in a forest can be cleared in constant time. diff --git a/cranelift/bforest/src/lib.rs b/cranelift/bforest/src/lib.rs new file mode 100644 index 0000000000..bc79ffc7d0 --- /dev/null +++ b/cranelift/bforest/src/lib.rs @@ -0,0 +1,199 @@ +//! A forest of B+-trees. +//! +//! This crate provides a data structures representing a set of small ordered sets or maps. +//! It is implemented as a forest of B+-trees all allocating nodes out of the same pool. +//! +//! **These are not general purpose data structures that are somehow magically faster that the +//! standard library's `BTreeSet` and `BTreeMap` types.** +//! +//! The tradeoffs are different: +//! +//! - Keys and values are expected to be small and copyable. We optimize for 32-bit types. +//! - A comparator object is used to compare keys, allowing smaller "context free" keys. +//! - Empty trees have a very small 32-bit footprint. +//! - All the trees in a forest can be cleared in constant time. + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +#[cfg(test)] +extern crate alloc; + +#[macro_use] +extern crate cranelift_entity as entity; +use crate::entity::packed_option; + +use core::borrow::BorrowMut; +use core::cmp::Ordering; + +mod map; +mod node; +mod path; +mod pool; +mod set; + +pub use self::map::{Map, MapCursor, MapForest, MapIter}; +pub use self::set::{Set, SetCursor, SetForest, SetIter}; + +use self::node::NodeData; +use self::path::Path; +use self::pool::NodePool; + +/// The maximum branching factor of an inner node in a B+-tree. +/// The minimum number of outgoing edges is `INNER_SIZE/2`. +const INNER_SIZE: usize = 8; + +/// Given the worst case branching factor of `INNER_SIZE/2` = 4, this is the +/// worst case path length from the root node to a leaf node in a tree with 2^32 +/// entries. We would run out of node references before we hit `MAX_PATH`. +const MAX_PATH: usize = 16; + +/// Key comparator. +/// +/// Keys don't need to implement `Ord`. They are compared using a comparator object which +/// provides a context for comparison. +pub trait Comparator +where + K: Copy, +{ + /// Compare keys `a` and `b`. + /// + /// This relation must provide a total ordering or the key space. + fn cmp(&self, a: K, b: K) -> Ordering; + + /// Binary search for `k` in an ordered slice. + /// + /// Assume that `s` is already sorted according to this ordering, search for the key `k`. + /// + /// Returns `Ok(idx)` if `k` was found in the slice or `Err(idx)` with the position where it + /// should be inserted to preserve the ordering. + fn search(&self, k: K, s: &[K]) -> Result { + s.binary_search_by(|x| self.cmp(*x, k)) + } +} + +/// Trivial comparator that doesn't actually provide any context. +impl Comparator for () +where + K: Copy + Ord, +{ + fn cmp(&self, a: K, b: K) -> Ordering { + a.cmp(&b) + } +} + +/// Family of types shared by the map and set forest implementations. +trait Forest { + /// The key type is present for both sets and maps. + type Key: Copy; + + /// The value type is `()` for sets. + type Value: Copy; + + /// An array of keys for the leaf nodes. + type LeafKeys: Copy + BorrowMut<[Self::Key]>; + + /// An array of values for the leaf nodes. + type LeafValues: Copy + BorrowMut<[Self::Value]>; + + /// Splat a single key into a whole array. + fn splat_key(key: Self::Key) -> Self::LeafKeys; + + /// Splat a single value inst a whole array + fn splat_value(value: Self::Value) -> Self::LeafValues; +} + +/// A reference to a B+-tree node. +#[derive(Clone, Copy, PartialEq, Eq)] +struct Node(u32); +entity_impl!(Node, "node"); + +/// Empty type to be used as the "value" in B-trees representing sets. +#[derive(Clone, Copy)] +struct SetValue(); + +/// Insert `x` into `s` at position `i`, pushing out the last element. +fn slice_insert(s: &mut [T], i: usize, x: T) { + for j in (i + 1..s.len()).rev() { + s[j] = s[j - 1]; + } + s[i] = x; +} + +/// Shift elements in `s` to the left by `n` positions. +fn slice_shift(s: &mut [T], n: usize) { + for j in 0..s.len() - n { + s[j] = s[j + n]; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + + /// An opaque reference to a basic block in a function. + #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] + pub struct Block(u32); + entity_impl!(Block, "block"); + + #[test] + fn comparator() { + let block1 = Block::new(1); + let block2 = Block::new(2); + let block3 = Block::new(3); + let block4 = Block::new(4); + let vals = [block1, block2, block4]; + let comp = (); + assert_eq!(comp.search(block1, &vals), Ok(0)); + assert_eq!(comp.search(block3, &vals), Err(2)); + assert_eq!(comp.search(block4, &vals), Ok(2)); + } + + #[test] + fn slice_insertion() { + let mut a = ['a', 'b', 'c', 'd']; + + slice_insert(&mut a[0..1], 0, 'e'); + assert_eq!(a, ['e', 'b', 'c', 'd']); + + slice_insert(&mut a, 0, 'a'); + assert_eq!(a, ['a', 'e', 'b', 'c']); + + slice_insert(&mut a, 3, 'g'); + assert_eq!(a, ['a', 'e', 'b', 'g']); + + slice_insert(&mut a, 1, 'h'); + assert_eq!(a, ['a', 'h', 'e', 'b']); + } + + #[test] + fn slice_shifting() { + let mut a = ['a', 'b', 'c', 'd']; + + slice_shift(&mut a[0..1], 1); + assert_eq!(a, ['a', 'b', 'c', 'd']); + + slice_shift(&mut a[1..], 1); + assert_eq!(a, ['a', 'c', 'd', 'd']); + + slice_shift(&mut a, 2); + assert_eq!(a, ['d', 'd', 'd', 'd']); + } +} diff --git a/cranelift/bforest/src/map.rs b/cranelift/bforest/src/map.rs new file mode 100644 index 0000000000..79ac018a98 --- /dev/null +++ b/cranelift/bforest/src/map.rs @@ -0,0 +1,923 @@ +//! Forest of maps. + +use super::{Comparator, Forest, Node, NodeData, NodePool, Path, INNER_SIZE}; +use crate::packed_option::PackedOption; +#[cfg(test)] +use alloc::string::String; +#[cfg(test)] +use core::fmt; +use core::marker::PhantomData; + +/// Tag type defining forest types for a map. +struct MapTypes(PhantomData<(K, V)>); + +impl Forest for MapTypes +where + K: Copy, + V: Copy, +{ + type Key = K; + type Value = V; + type LeafKeys = [K; INNER_SIZE - 1]; + type LeafValues = [V; INNER_SIZE - 1]; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; INNER_SIZE - 1] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; INNER_SIZE - 1] + } +} + +/// Memory pool for a forest of `Map` instances. +pub struct MapForest +where + K: Copy, + V: Copy, +{ + nodes: NodePool>, +} + +impl MapForest +where + K: Copy, + V: Copy, +{ + /// Create a new empty forest. + pub fn new() -> Self { + Self { + nodes: NodePool::new(), + } + } + + /// Clear all maps in the forest. + /// + /// All `Map` instances belong to this forest are invalidated and should no longer be used. + pub fn clear(&mut self) { + self.nodes.clear(); + } +} + +/// B-tree mapping from `K` to `V`. +/// +/// This is not a general-purpose replacement for `BTreeMap`. See the [module +/// documentation](index.html) for more information about design tradeoffs. +/// +/// Maps can be cloned, but that operation should only be used as part of cloning the whole forest +/// they belong to. *Cloning a map does not allocate new memory for the clone*. It creates an alias +/// of the same memory. +#[derive(Clone)] +pub struct Map +where + K: Copy, + V: Copy, +{ + root: PackedOption, + unused: PhantomData<(K, V)>, +} + +impl Map +where + K: Copy, + V: Copy, +{ + /// Make an empty map. + pub fn new() -> Self { + Self { + root: None.into(), + unused: PhantomData, + } + } + + /// Is this an empty map? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Get the value stored for `key`. + pub fn get>(&self, key: K, forest: &MapForest, comp: &C) -> Option { + self.root + .expand() + .and_then(|root| Path::default().find(key, root, &forest.nodes, comp)) + } + + /// Look up the value stored for `key`. + /// + /// If it exists, return the stored key-value pair. + /// + /// Otherwise, return the last key-value pair with a key that is less than or equal to `key`. + /// + /// If no stored keys are less than or equal to `key`, return `None`. + pub fn get_or_less>( + &self, + key: K, + forest: &MapForest, + comp: &C, + ) -> Option<(K, V)> { + self.root.expand().and_then(|root| { + let mut path = Path::default(); + match path.find(key, root, &forest.nodes, comp) { + Some(v) => Some((key, v)), + None => path.prev(root, &forest.nodes), + } + }) + } + + /// Insert `key, value` into the map and return the old value stored for `key`, if any. + pub fn insert>( + &mut self, + key: K, + value: V, + forest: &mut MapForest, + comp: &C, + ) -> Option { + self.cursor(forest, comp).insert(key, value) + } + + /// Remove `key` from the map and return the removed value for `key`, if any. + pub fn remove>( + &mut self, + key: K, + forest: &mut MapForest, + comp: &C, + ) -> Option { + let mut c = self.cursor(forest, comp); + if c.goto(key).is_some() { + c.remove() + } else { + None + } + } + + /// Remove all entries. + pub fn clear(&mut self, forest: &mut MapForest) { + if let Some(root) = self.root.take() { + forest.nodes.free_tree(root); + } + } + + /// Retains only the elements specified by the predicate. + /// + /// Remove all key-value pairs where the predicate returns false. + /// + /// The predicate is allowed to update the values stored in the map. + pub fn retain(&mut self, forest: &mut MapForest, mut predicate: F) + where + F: FnMut(K, &mut V) -> bool, + { + let mut path = Path::default(); + if let Some(root) = self.root.expand() { + path.first(root, &forest.nodes); + } + while let Some((node, entry)) = path.leaf_pos() { + let keep = { + let (ks, vs) = forest.nodes[node].unwrap_leaf_mut(); + predicate(ks[entry], &mut vs[entry]) + }; + if keep { + path.next(&forest.nodes); + } else { + self.root = path.remove(&mut forest.nodes).into(); + } + } + } + + /// Create a cursor for navigating this map. The cursor is initially positioned off the end of + /// the map. + pub fn cursor<'a, C: Comparator>( + &'a mut self, + forest: &'a mut MapForest, + comp: &'a C, + ) -> MapCursor<'a, K, V, C> { + MapCursor::new(self, forest, comp) + } + + /// Create an iterator traversing this map. The iterator type is `(K, V)`. + pub fn iter<'a>(&'a self, forest: &'a MapForest) -> MapIter<'a, K, V> { + MapIter { + root: self.root, + pool: &forest.nodes, + path: Path::default(), + } + } +} + +impl Default for Map +where + K: Copy, + V: Copy, +{ + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +impl Map +where + K: Copy + fmt::Display, + V: Copy, +{ + /// Verify consistency. + fn verify>(&self, forest: &MapForest, comp: &C) + where + NodeData>: fmt::Display, + { + if let Some(root) = self.root.expand() { + forest.nodes.verify_tree(root, comp); + } + } + + /// Get a text version of the path to `key`. + fn tpath>(&self, key: K, forest: &MapForest, comp: &C) -> String { + use alloc::string::ToString; + match self.root.expand() { + None => "map(empty)".to_string(), + Some(root) => { + let mut path = Path::default(); + path.find(key, root, &forest.nodes, comp); + path.to_string() + } + } + } +} + +/// A position in a `Map` used to navigate and modify the ordered map. +/// +/// A cursor always points at a key-value pair in the map, or "off the end" which is a position +/// after the last entry in the map. +pub struct MapCursor<'a, K, V, C> +where + K: 'a + Copy, + V: 'a + Copy, + C: 'a + Comparator, +{ + root: &'a mut PackedOption, + pool: &'a mut NodePool>, + comp: &'a C, + path: Path>, +} + +impl<'a, K, V, C> MapCursor<'a, K, V, C> +where + K: Copy, + V: Copy, + C: Comparator, +{ + /// Create a cursor with a default (off-the-end) location. + fn new(container: &'a mut Map, forest: &'a mut MapForest, comp: &'a C) -> Self { + Self { + root: &mut container.root, + pool: &mut forest.nodes, + comp, + path: Path::default(), + } + } + + /// Is this cursor pointing to an empty map? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Move cursor to the next key-value pair and return it. + /// + /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end + /// position. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] + pub fn next(&mut self) -> Option<(K, V)> { + self.path.next(self.pool) + } + + /// Move cursor to the previous key-value pair and return it. + /// + /// If the cursor is already pointing at the first entry, leave it there and return `None`. + pub fn prev(&mut self) -> Option<(K, V)> { + self.root + .expand() + .and_then(|root| self.path.prev(root, self.pool)) + } + + /// Get the current key, or `None` if the cursor is at the end. + pub fn key(&self) -> Option { + self.path + .leaf_pos() + .and_then(|(node, entry)| self.pool[node].unwrap_leaf().0.get(entry).cloned()) + } + + /// Get the current value, or `None` if the cursor is at the end. + pub fn value(&self) -> Option { + self.path + .leaf_pos() + .and_then(|(node, entry)| self.pool[node].unwrap_leaf().1.get(entry).cloned()) + } + + /// Get a mutable reference to the current value, or `None` if the cursor is at the end. + pub fn value_mut(&mut self) -> Option<&mut V> { + self.path + .leaf_pos() + .and_then(move |(node, entry)| self.pool[node].unwrap_leaf_mut().1.get_mut(entry)) + } + + /// Move this cursor to `key`. + /// + /// If `key` is in the map, place the cursor at `key` and return the corresponding value. + /// + /// If `key` is not in the set, place the cursor at the next larger element (or the end) and + /// return `None`. + pub fn goto(&mut self, elem: K) -> Option { + self.root.expand().and_then(|root| { + let v = self.path.find(elem, root, self.pool, self.comp); + if v.is_none() { + self.path.normalize(self.pool); + } + v + }) + } + + /// Move this cursor to the first element. + pub fn goto_first(&mut self) -> Option { + self.root.map(|root| self.path.first(root, self.pool).1) + } + + /// Insert `(key, value))` into the map and leave the cursor at the inserted pair. + /// + /// If the map did not contain `key`, return `None`. + /// + /// If `key` is already present, replace the existing with `value` and return the old value. + pub fn insert(&mut self, key: K, value: V) -> Option { + match self.root.expand() { + None => { + let root = self.pool.alloc_node(NodeData::leaf(key, value)); + *self.root = root.into(); + self.path.set_root_node(root); + None + } + Some(root) => { + // TODO: Optimize the case where `self.path` is already at the correct insert pos. + let old = self.path.find(key, root, self.pool, self.comp); + if old.is_some() { + *self.path.value_mut(self.pool) = value; + } else { + *self.root = self.path.insert(key, value, self.pool).into(); + } + old + } + } + } + + /// Remove the current entry (if any) and return the mapped value. + /// This advances the cursor to the next entry after the removed one. + pub fn remove(&mut self) -> Option { + let value = self.value(); + if value.is_some() { + *self.root = self.path.remove(self.pool).into(); + } + value + } +} + +/// An iterator visiting the key-value pairs of a `Map`. +pub struct MapIter<'a, K, V> +where + K: 'a + Copy, + V: 'a + Copy, +{ + root: PackedOption, + pool: &'a NodePool>, + path: Path>, +} + +impl<'a, K, V> Iterator for MapIter<'a, K, V> +where + K: 'a + Copy, + V: 'a + Copy, +{ + type Item = (K, V); + + fn next(&mut self) -> Option { + // We use `self.root` to indicate if we need to go to the first element. Reset to `None` + // once we've returned the first element. This also works for an empty tree since the + // `path.next()` call returns `None` when the path is empty. This also fuses the iterator. + match self.root.take() { + Some(root) => Some(self.path.first(root, self.pool)), + None => self.path.next(self.pool), + } + } +} + +#[cfg(test)] +impl<'a, K, V, C> MapCursor<'a, K, V, C> +where + K: Copy + fmt::Display, + V: Copy + fmt::Display, + C: Comparator, +{ + fn verify(&self) { + self.path.verify(self.pool); + self.root.map(|root| self.pool.verify_tree(root, self.comp)); + } + + /// Get a text version of the path to the current position. + fn tpath(&self) -> String { + use alloc::string::ToString; + self.path.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::super::NodeData; + use super::*; + use alloc::vec::Vec; + use core::mem; + + #[test] + fn node_size() { + // check that nodes are cache line sized when keys and values are 32 bits. + type F = MapTypes; + assert_eq!(mem::size_of::>(), 64); + } + + #[test] + fn empty() { + let mut f = MapForest::::new(); + f.clear(); + + let mut m = Map::::new(); + assert!(m.is_empty()); + m.clear(&mut f); + + assert_eq!(m.get(7, &f, &()), None); + assert_eq!(m.iter(&f).next(), None); + assert_eq!(m.get_or_less(7, &f, &()), None); + m.retain(&mut f, |_, _| unreachable!()); + + let mut c = m.cursor(&mut f, &()); + assert!(c.is_empty()); + assert_eq!(c.key(), None); + assert_eq!(c.value(), None); + assert_eq!(c.next(), None); + assert_eq!(c.prev(), None); + c.verify(); + assert_eq!(c.tpath(), ""); + assert_eq!(c.goto_first(), None); + assert_eq!(c.tpath(), ""); + } + + #[test] + fn inserting() { + let f = &mut MapForest::::new(); + let mut m = Map::::new(); + + // The first seven values stay in a single leaf node. + assert_eq!(m.insert(50, 5.0, f, &()), None); + assert_eq!(m.insert(50, 5.5, f, &()), Some(5.0)); + assert_eq!(m.insert(20, 2.0, f, &()), None); + assert_eq!(m.insert(80, 8.0, f, &()), None); + assert_eq!(m.insert(40, 4.0, f, &()), None); + assert_eq!(m.insert(60, 6.0, f, &()), None); + assert_eq!(m.insert(90, 9.0, f, &()), None); + assert_eq!(m.insert(200, 20.0, f, &()), None); + + m.verify(f, &()); + + assert_eq!( + m.iter(f).collect::>(), + [ + (20, 2.0), + (40, 4.0), + (50, 5.5), + (60, 6.0), + (80, 8.0), + (90, 9.0), + (200, 20.0), + ] + ); + + assert_eq!(m.get(0, f, &()), None); + assert_eq!(m.get(20, f, &()), Some(2.0)); + assert_eq!(m.get(30, f, &()), None); + assert_eq!(m.get(40, f, &()), Some(4.0)); + assert_eq!(m.get(50, f, &()), Some(5.5)); + assert_eq!(m.get(60, f, &()), Some(6.0)); + assert_eq!(m.get(70, f, &()), None); + assert_eq!(m.get(80, f, &()), Some(8.0)); + assert_eq!(m.get(100, f, &()), None); + + assert_eq!(m.get_or_less(0, f, &()), None); + assert_eq!(m.get_or_less(20, f, &()), Some((20, 2.0))); + assert_eq!(m.get_or_less(30, f, &()), Some((20, 2.0))); + assert_eq!(m.get_or_less(40, f, &()), Some((40, 4.0))); + assert_eq!(m.get_or_less(200, f, &()), Some((200, 20.0))); + assert_eq!(m.get_or_less(201, f, &()), Some((200, 20.0))); + + { + let mut c = m.cursor(f, &()); + assert_eq!(c.prev(), Some((200, 20.0))); + assert_eq!(c.prev(), Some((90, 9.0))); + assert_eq!(c.prev(), Some((80, 8.0))); + assert_eq!(c.prev(), Some((60, 6.0))); + assert_eq!(c.prev(), Some((50, 5.5))); + assert_eq!(c.prev(), Some((40, 4.0))); + assert_eq!(c.prev(), Some((20, 2.0))); + assert_eq!(c.prev(), None); + } + + // Test some removals where the node stays healthy. + assert_eq!(m.tpath(50, f, &()), "node0[2]"); + assert_eq!(m.tpath(80, f, &()), "node0[4]"); + assert_eq!(m.tpath(200, f, &()), "node0[6]"); + + assert_eq!(m.remove(80, f, &()), Some(8.0)); + assert_eq!(m.tpath(50, f, &()), "node0[2]"); + assert_eq!(m.tpath(80, f, &()), "node0[4]"); + assert_eq!(m.tpath(200, f, &()), "node0[5]"); + assert_eq!(m.remove(80, f, &()), None); + m.verify(f, &()); + + assert_eq!(m.remove(20, f, &()), Some(2.0)); + assert_eq!(m.tpath(50, f, &()), "node0[1]"); + assert_eq!(m.tpath(80, f, &()), "node0[3]"); + assert_eq!(m.tpath(200, f, &()), "node0[4]"); + assert_eq!(m.remove(20, f, &()), None); + m.verify(f, &()); + + // [ 40 50 60 90 200 ] + + { + let mut c = m.cursor(f, &()); + assert_eq!(c.goto_first(), Some(4.0)); + assert_eq!(c.key(), Some(40)); + assert_eq!(c.value(), Some(4.0)); + assert_eq!(c.next(), Some((50, 5.5))); + assert_eq!(c.next(), Some((60, 6.0))); + assert_eq!(c.next(), Some((90, 9.0))); + assert_eq!(c.next(), Some((200, 20.0))); + c.verify(); + assert_eq!(c.next(), None); + c.verify(); + } + + // Removals from the root leaf node beyond underflow. + assert_eq!(m.remove(200, f, &()), Some(20.0)); + assert_eq!(m.remove(40, f, &()), Some(4.0)); + assert_eq!(m.remove(60, f, &()), Some(6.0)); + m.verify(f, &()); + assert_eq!(m.remove(50, f, &()), Some(5.5)); + m.verify(f, &()); + assert_eq!(m.remove(90, f, &()), Some(9.0)); + m.verify(f, &()); + assert!(m.is_empty()); + } + + #[test] + fn split_level0_leaf() { + // Various ways of splitting a full leaf node at level 0. + let f = &mut MapForest::::new(); + + fn full_leaf(f: &mut MapForest) -> Map { + let mut m = Map::new(); + for n in 1..8 { + m.insert(n * 10, n as f32 * 1.1, f, &()); + } + m + } + + // Insert at front of leaf. + let mut m = full_leaf(f); + m.insert(5, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(5, f, &()), Some(4.2)); + + // Retain even entries, with altered values. + m.retain(f, |k, v| { + *v = (k / 10) as f32; + (k % 20) == 0 + }); + assert_eq!( + m.iter(f).collect::>(), + [(20, 2.0), (40, 4.0), (60, 6.0)] + ); + + // Insert at back of leaf. + let mut m = full_leaf(f); + m.insert(80, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(80, f, &()), Some(4.2)); + + // Insert before middle (40). + let mut m = full_leaf(f); + m.insert(35, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(35, f, &()), Some(4.2)); + + // Insert after middle (40). + let mut m = full_leaf(f); + m.insert(45, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(45, f, &()), Some(4.2)); + + m.clear(f); + assert!(m.is_empty()); + } + + #[test] + fn split_level1_leaf() { + // Various ways of splitting a full leaf node at level 1. + let f = &mut MapForest::::new(); + + // Return a map whose root node is a full inner node, and the leaf nodes are all full + // containing: + // + // 110, 120, ..., 170 + // 210, 220, ..., 270 + // ... + // 810, 820, ..., 870 + fn full(f: &mut MapForest) -> Map { + let mut m = Map::new(); + + // Start by inserting elements in order. + // This should leave 8 leaf nodes with 4 elements in each. + for row in 1..9 { + for col in 1..5 { + m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &()); + } + } + + // Then top up the leaf nodes without splitting them. + for row in 1..9 { + for col in 5..8 { + m.insert(row * 100 + col * 10, row as f32 + col as f32 * 0.1, f, &()); + } + } + + m + } + + let mut m = full(f); + // Verify geometry. Get get node2 as the root and leaves node0, 1, 3, ... + m.verify(f, &()); + assert_eq!(m.tpath(110, f, &()), "node2[0]--node0[0]"); + assert_eq!(m.tpath(140, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(210, f, &()), "node2[1]--node1[0]"); + assert_eq!(m.tpath(270, f, &()), "node2[1]--node1[6]"); + assert_eq!(m.tpath(310, f, &()), "node2[2]--node3[0]"); + assert_eq!(m.tpath(810, f, &()), "node2[7]--node8[0]"); + assert_eq!(m.tpath(870, f, &()), "node2[7]--node8[6]"); + + { + let mut c = m.cursor(f, &()); + assert_eq!(c.goto_first(), Some(1.1)); + assert_eq!(c.key(), Some(110)); + } + + // Front of first leaf. + m.insert(0, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(0, f, &()), Some(4.2)); + + // First leaf split 4-4 after appending to LHS. + f.clear(); + m = full(f); + m.insert(135, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(135, f, &()), Some(4.2)); + + // First leaf split 4-4 after prepending to RHS. + f.clear(); + m = full(f); + m.insert(145, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(145, f, &()), Some(4.2)); + + // First leaf split 4-4 after appending to RHS. + f.clear(); + m = full(f); + m.insert(175, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(175, f, &()), Some(4.2)); + + // Left-middle leaf split, ins LHS. + f.clear(); + m = full(f); + m.insert(435, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(435, f, &()), Some(4.2)); + + // Left-middle leaf split, ins RHS. + f.clear(); + m = full(f); + m.insert(445, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(445, f, &()), Some(4.2)); + + // Right-middle leaf split, ins LHS. + f.clear(); + m = full(f); + m.insert(535, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(535, f, &()), Some(4.2)); + + // Right-middle leaf split, ins RHS. + f.clear(); + m = full(f); + m.insert(545, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(545, f, &()), Some(4.2)); + + // Last leaf split, ins LHS. + f.clear(); + m = full(f); + m.insert(835, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(835, f, &()), Some(4.2)); + + // Last leaf split, ins RHS. + f.clear(); + m = full(f); + m.insert(845, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(845, f, &()), Some(4.2)); + + // Front of last leaf. + f.clear(); + m = full(f); + m.insert(805, 4.2, f, &()); + m.verify(f, &()); + assert_eq!(m.get(805, f, &()), Some(4.2)); + + m.clear(f); + m.verify(f, &()); + } + + // Make a tree with two barely healthy leaf nodes: + // [ 10 20 30 40 ] [ 50 60 70 80 ] + fn two_leaf(f: &mut MapForest) -> Map { + f.clear(); + let mut m = Map::new(); + for n in 1..9 { + m.insert(n * 10, n as f32, f, &()); + } + m + } + + #[test] + fn remove_level1() { + let f = &mut MapForest::::new(); + let mut m = two_leaf(f); + + // Verify geometry. + m.verify(f, &()); + assert_eq!(m.tpath(10, f, &()), "node2[0]--node0[0]"); + assert_eq!(m.tpath(40, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(50, f, &()), "node2[1]--node1[0]"); + assert_eq!(m.tpath(80, f, &()), "node2[1]--node1[3]"); + + // Remove the front entry from a node that stays healthy. + assert_eq!(m.insert(55, 5.5, f, &()), None); + assert_eq!(m.remove(50, f, &()), Some(5.0)); + m.verify(f, &()); + assert_eq!(m.tpath(49, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(55, f, &()), "node2[1]--node1[0]"); + + // Remove the front entry from the first leaf node: No critical key to update. + assert_eq!(m.insert(15, 1.5, f, &()), None); + assert_eq!(m.remove(10, f, &()), Some(1.0)); + m.verify(f, &()); + + // [ 15 20 30 40 ] [ 55 60 70 80 ] + + // Remove the front entry from a right-most node that underflows. + // No rebalancing for the right-most node. Still need critical key update. + assert_eq!(m.remove(55, f, &()), Some(5.5)); + m.verify(f, &()); + assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]"); + + // [ 15 20 30 40 ] [ 60 70 80 ] + + // Replenish the right leaf. + assert_eq!(m.insert(90, 9.0, f, &()), None); + assert_eq!(m.insert(100, 10.0, f, &()), None); + m.verify(f, &()); + assert_eq!(m.tpath(55, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(60, f, &()), "node2[1]--node1[0]"); + + // [ 15 20 30 40 ] [ 60 70 80 90 100 ] + + // Removing one entry from the left leaf should trigger a rebalancing from the right + // sibling. + assert_eq!(m.remove(20, f, &()), Some(2.0)); + m.verify(f, &()); + + // [ 15 30 40 60 ] [ 70 80 90 100 ] + // Check that the critical key was updated correctly. + assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(60, f, &()), "node2[0]--node0[3]"); + assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]"); + + // Remove front entry from the left-most leaf node, underflowing. + // This should cause two leaf nodes to be merged and the root node to go away. + assert_eq!(m.remove(15, f, &()), Some(1.5)); + m.verify(f, &()); + } + + #[test] + fn remove_level1_rightmost() { + let f = &mut MapForest::::new(); + let mut m = two_leaf(f); + + // [ 10 20 30 40 ] [ 50 60 70 80 ] + + // Remove entries from the right leaf. This doesn't trigger a rebalancing. + assert_eq!(m.remove(60, f, &()), Some(6.0)); + assert_eq!(m.remove(80, f, &()), Some(8.0)); + assert_eq!(m.remove(50, f, &()), Some(5.0)); + m.verify(f, &()); + + // [ 10 20 30 40 ] [ 70 ] + assert_eq!(m.tpath(50, f, &()), "node2[0]--node0[4]"); + assert_eq!(m.tpath(70, f, &()), "node2[1]--node1[0]"); + + // Removing the last entry from the right leaf should cause a collapse. + assert_eq!(m.remove(70, f, &()), Some(7.0)); + m.verify(f, &()); + } + + // Make a 3-level tree with barely healthy nodes. + // 1 root, 8 inner nodes, 7*4+5=33 leaf nodes, 4 entries each. + fn level3_sparse(f: &mut MapForest) -> Map { + f.clear(); + let mut m = Map::new(); + for n in 1..133 { + m.insert(n * 10, n as f32, f, &()); + } + m + } + + #[test] + fn level3_removes() { + let f = &mut MapForest::::new(); + let mut m = level3_sparse(f); + m.verify(f, &()); + + // Check geometry. + // Root: node11 + // [ node2 170 node10 330 node16 490 node21 650 node26 810 node31 970 node36 1130 node41 ] + // L1: node11 + assert_eq!(m.tpath(0, f, &()), "node11[0]--node2[0]--node0[0]"); + assert_eq!(m.tpath(10000, f, &()), "node11[7]--node41[4]--node40[4]"); + + // 650 is a critical key in the middle of the root. + assert_eq!(m.tpath(640, f, &()), "node11[3]--node21[3]--node19[3]"); + assert_eq!(m.tpath(650, f, &()), "node11[4]--node26[0]--node20[0]"); + + // Deleting 640 triggers a rebalance from node19 to node 20, cascading to n21 -> n26. + assert_eq!(m.remove(640, f, &()), Some(64.0)); + m.verify(f, &()); + assert_eq!(m.tpath(650, f, &()), "node11[3]--node26[3]--node20[3]"); + + // 1130 is in the first leaf of the last L1 node. Deleting it triggers a rebalance node35 + // -> node37, but no rebalance above where there is no right sibling. + assert_eq!(m.tpath(1130, f, &()), "node11[6]--node41[0]--node35[0]"); + assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node35[1]"); + assert_eq!(m.remove(1130, f, &()), Some(113.0)); + m.verify(f, &()); + assert_eq!(m.tpath(1140, f, &()), "node11[6]--node41[0]--node37[0]"); + } + + #[test] + fn insert_many() { + let f = &mut MapForest::::new(); + let mut m = Map::::new(); + + let mm = 4096; + let mut x = 0; + + for n in 0..mm { + assert_eq!(m.insert(x, n as f32, f, &()), None); + m.verify(f, &()); + + x = (x + n + 1) % mm; + } + + x = 0; + for n in 0..mm { + assert_eq!(m.get(x, f, &()), Some(n as f32)); + x = (x + n + 1) % mm; + } + + x = 0; + for n in 0..mm { + assert_eq!(m.remove(x, f, &()), Some(n as f32)); + m.verify(f, &()); + + x = (x + n + 1) % mm; + } + + assert!(m.is_empty()); + } +} diff --git a/cranelift/bforest/src/node.rs b/cranelift/bforest/src/node.rs new file mode 100644 index 0000000000..53a0dca386 --- /dev/null +++ b/cranelift/bforest/src/node.rs @@ -0,0 +1,806 @@ +//! B+-tree nodes. + +use super::{slice_insert, slice_shift, Forest, Node, SetValue, INNER_SIZE}; +use core::borrow::{Borrow, BorrowMut}; +use core::fmt; + +/// B+-tree node. +/// +/// A B+-tree has different node types for inner nodes and leaf nodes. Inner nodes contain M node +/// references and M-1 keys while leaf nodes contain N keys and values. Values for M and N are +/// chosen such that a node is exactly 64 bytes (a cache line) when keys and values are 32 bits +/// each. +/// +/// An inner node contains at least M/2 node references unless it is the right-most node at its +/// level. A leaf node contains at least N/2 keys unless it is the right-most leaf. +#[allow(dead_code)] // workaround for https://github.com/rust-lang/rust/issues/64362 +pub(super) enum NodeData { + Inner { + /// The number of keys in this node. + /// The number of node references is always one more. + size: u8, + + /// Keys discriminating sub-trees. + /// + /// The key in `keys[i]` is greater than all keys in `tree[i]` and less than or equal to + /// all keys in `tree[i+1]`. + keys: [F::Key; INNER_SIZE - 1], + + /// Sub-trees. + tree: [Node; INNER_SIZE], + }, + Leaf { + /// Number of key-value pairs in this node. + size: u8, + + // Key array. + keys: F::LeafKeys, + + // Value array. + vals: F::LeafValues, + }, + /// An unused node on the free list. + Free { next: Option }, +} + +// Implement `Clone` and `Copy` manually, because deriving them would also require `Forest` to +// implement `Clone`. +impl Copy for NodeData {} +impl Clone for NodeData { + fn clone(&self) -> Self { + *self + } +} + +impl NodeData { + /// Is this a free/unused node? + pub fn is_free(&self) -> bool { + match *self { + Self::Free { .. } => true, + _ => false, + } + } + + /// Get the number of entries in this node. + /// + /// This is the number of outgoing edges in an inner node, or the number of key-value pairs in + /// a leaf node. + pub fn entries(&self) -> usize { + match *self { + Self::Inner { size, .. } => usize::from(size) + 1, + Self::Leaf { size, .. } => usize::from(size), + Self::Free { .. } => panic!("freed node"), + } + } + + /// Create an inner node with a single key and two sub-trees. + pub fn inner(left: Node, key: F::Key, right: Node) -> Self { + // Splat the key and right node to the whole array. + // Saves us from inventing a default/reserved value. + let mut tree = [right; INNER_SIZE]; + tree[0] = left; + Self::Inner { + size: 1, + keys: [key; INNER_SIZE - 1], + tree, + } + } + + /// Create a leaf node with a single key-value pair. + pub fn leaf(key: F::Key, value: F::Value) -> Self { + Self::Leaf { + size: 1, + keys: F::splat_key(key), + vals: F::splat_value(value), + } + } + + /// Unwrap an inner node into two slices (keys, trees). + pub fn unwrap_inner(&self) -> (&[F::Key], &[Node]) { + match *self { + Self::Inner { + size, + ref keys, + ref tree, + } => { + let size = usize::from(size); + // TODO: We could probably use `get_unchecked()` here since `size` is always in + // range. + (&keys[0..size], &tree[0..=size]) + } + _ => panic!("Expected inner node"), + } + } + + /// Unwrap a leaf node into two slices (keys, values) of the same length. + pub fn unwrap_leaf(&self) -> (&[F::Key], &[F::Value]) { + match *self { + Self::Leaf { + size, + ref keys, + ref vals, + } => { + let size = usize::from(size); + let keys = keys.borrow(); + let vals = vals.borrow(); + // TODO: We could probably use `get_unchecked()` here since `size` is always in + // range. + (&keys[0..size], &vals[0..size]) + } + _ => panic!("Expected leaf node"), + } + } + + /// Unwrap a mutable leaf node into two slices (keys, values) of the same length. + pub fn unwrap_leaf_mut(&mut self) -> (&mut [F::Key], &mut [F::Value]) { + match *self { + Self::Leaf { + size, + ref mut keys, + ref mut vals, + } => { + let size = usize::from(size); + let keys = keys.borrow_mut(); + let vals = vals.borrow_mut(); + // TODO: We could probably use `get_unchecked_mut()` here since `size` is always in + // range. + (&mut keys[0..size], &mut vals[0..size]) + } + _ => panic!("Expected leaf node"), + } + } + + /// Get the critical key for a leaf node. + /// This is simply the first key. + pub fn leaf_crit_key(&self) -> F::Key { + match *self { + Self::Leaf { size, ref keys, .. } => { + debug_assert!(size > 0, "Empty leaf node"); + keys.borrow()[0] + } + _ => panic!("Expected leaf node"), + } + } + + /// Try to insert `(key, node)` at key-position `index` in an inner node. + /// This means that `key` is inserted at `keys[i]` and `node` is inserted at `tree[i + 1]`. + /// If the node is full, this leaves the node unchanged and returns false. + pub fn try_inner_insert(&mut self, index: usize, key: F::Key, node: Node) -> bool { + match *self { + Self::Inner { + ref mut size, + ref mut keys, + ref mut tree, + } => { + let sz = usize::from(*size); + debug_assert!(sz <= keys.len()); + debug_assert!(index <= sz, "Can't insert at {} with {} keys", index, sz); + + if let Some(ks) = keys.get_mut(0..=sz) { + *size = (sz + 1) as u8; + slice_insert(ks, index, key); + slice_insert(&mut tree[1..=sz + 1], index, node); + true + } else { + false + } + } + _ => panic!("Expected inner node"), + } + } + + /// Try to insert `key, value` at `index` in a leaf node, but fail and return false if the node + /// is full. + pub fn try_leaf_insert(&mut self, index: usize, key: F::Key, value: F::Value) -> bool { + match *self { + Self::Leaf { + ref mut size, + ref mut keys, + ref mut vals, + } => { + let sz = usize::from(*size); + let keys = keys.borrow_mut(); + let vals = vals.borrow_mut(); + debug_assert!(sz <= keys.len()); + debug_assert!(index <= sz); + + if let Some(ks) = keys.get_mut(0..=sz) { + *size = (sz + 1) as u8; + slice_insert(ks, index, key); + slice_insert(&mut vals[0..=sz], index, value); + true + } else { + false + } + } + _ => panic!("Expected leaf node"), + } + } + + /// Split off the second half of this node. + /// It is assumed that this a completely full inner or leaf node. + /// + /// The `insert_index` parameter is the position where an insertion was tried and failed. The + /// node will be split in half with a bias towards an even split after the insertion is retried. + pub fn split(&mut self, insert_index: usize) -> SplitOff { + match *self { + Self::Inner { + ref mut size, + ref keys, + ref tree, + } => { + debug_assert_eq!(usize::from(*size), keys.len(), "Node not full"); + + // Number of tree entries in the lhs node. + let l_ents = split_pos(tree.len(), insert_index + 1); + let r_ents = tree.len() - l_ents; + + // With INNER_SIZE=8, we get l_ents=4 and: + // + // self: [ n0 k0 n1 k1 n2 k2 n3 k3 n4 k4 n5 k5 n6 k6 n7 ] + // lhs: [ n0 k0 n1 k1 n2 k2 n3 ] + // crit_key = k3 (not present in either node) + // rhs: [ n4 k4 n5 k5 n6 k6 n7 ] + + // 1. Truncate the LHS. + *size = (l_ents - 1) as u8; + + // 2. Copy second half to `rhs_data`. + let mut r_keys = *keys; + r_keys[0..r_ents - 1].copy_from_slice(&keys[l_ents..]); + + let mut r_tree = *tree; + r_tree[0..r_ents].copy_from_slice(&tree[l_ents..]); + + SplitOff { + lhs_entries: l_ents, + rhs_entries: r_ents, + crit_key: keys[l_ents - 1], + rhs_data: Self::Inner { + size: (r_ents - 1) as u8, + keys: r_keys, + tree: r_tree, + }, + } + } + Self::Leaf { + ref mut size, + ref keys, + ref vals, + } => { + let o_keys = keys.borrow(); + let o_vals = vals.borrow(); + debug_assert_eq!(usize::from(*size), o_keys.len(), "Node not full"); + + let l_size = split_pos(o_keys.len(), insert_index); + let r_size = o_keys.len() - l_size; + + // 1. Truncate the LHS node at `l_size`. + *size = l_size as u8; + + // 2. Copy second half to `rhs_data`. + let mut r_keys = *keys; + r_keys.borrow_mut()[0..r_size].copy_from_slice(&o_keys[l_size..]); + + let mut r_vals = *vals; + r_vals.borrow_mut()[0..r_size].copy_from_slice(&o_vals[l_size..]); + + SplitOff { + lhs_entries: l_size, + rhs_entries: r_size, + crit_key: o_keys[l_size], + rhs_data: Self::Leaf { + size: r_size as u8, + keys: r_keys, + vals: r_vals, + }, + } + } + _ => panic!("Expected leaf node"), + } + } + + /// Remove the sub-tree at `index` from this inner node. + /// + /// Note that `index` refers to a sub-tree entry and not a key entry as it does for + /// `try_inner_insert()`. It is possible to remove the first sub-tree (which can't be inserted + /// by `try_inner_insert()`). + /// + /// Return an indication of the node's health (i.e. below half capacity). + pub fn inner_remove(&mut self, index: usize) -> Removed { + match *self { + Self::Inner { + ref mut size, + ref mut keys, + ref mut tree, + } => { + let ents = usize::from(*size) + 1; + debug_assert!(ents <= tree.len()); + debug_assert!(index < ents); + // Leave an invalid 0xff size when node becomes empty. + *size = ents.wrapping_sub(2) as u8; + if ents > 1 { + slice_shift(&mut keys[index.saturating_sub(1)..ents - 1], 1); + } + slice_shift(&mut tree[index..ents], 1); + Removed::new(index, ents - 1, tree.len()) + } + _ => panic!("Expected inner node"), + } + } + + /// Remove the key-value pair at `index` from this leaf node. + /// + /// Return an indication of the node's health (i.e. below half capacity). + pub fn leaf_remove(&mut self, index: usize) -> Removed { + match *self { + Self::Leaf { + ref mut size, + ref mut keys, + ref mut vals, + } => { + let sz = usize::from(*size); + let keys = keys.borrow_mut(); + let vals = vals.borrow_mut(); + *size -= 1; + slice_shift(&mut keys[index..sz], 1); + slice_shift(&mut vals[index..sz], 1); + Removed::new(index, sz - 1, keys.len()) + } + _ => panic!("Expected leaf node"), + } + } + + /// Balance this node with its right sibling. + /// + /// It is assumed that the current node has underflowed. Look at the right sibling node and do + /// one of two things: + /// + /// 1. Move all entries to the right node, leaving this node empty, or + /// 2. Distribute entries evenly between the two nodes. + /// + /// In the first case, `None` is returned. In the second case, the new critical key for the + /// right sibling node is returned. + pub fn balance(&mut self, crit_key: F::Key, rhs: &mut Self) -> Option { + match (self, rhs) { + ( + &mut Self::Inner { + size: ref mut l_size, + keys: ref mut l_keys, + tree: ref mut l_tree, + }, + &mut Self::Inner { + size: ref mut r_size, + keys: ref mut r_keys, + tree: ref mut r_tree, + }, + ) => { + let l_ents = usize::from(*l_size) + 1; + let r_ents = usize::from(*r_size) + 1; + let ents = l_ents + r_ents; + + if ents <= r_tree.len() { + // All entries will fit in the RHS node. + // We'll leave the LHS node empty, but first use it as a scratch space. + *l_size = 0; + // Insert `crit_key` between the two nodes. + l_keys[l_ents - 1] = crit_key; + l_keys[l_ents..ents - 1].copy_from_slice(&r_keys[0..r_ents - 1]); + r_keys[0..ents - 1].copy_from_slice(&l_keys[0..ents - 1]); + l_tree[l_ents..ents].copy_from_slice(&r_tree[0..r_ents]); + r_tree[0..ents].copy_from_slice(&l_tree[0..ents]); + *r_size = (ents - 1) as u8; + None + } else { + // The entries don't all fit in one node. Distribute some from RHS -> LHS. + // Split evenly with a bias to putting one entry in LHS. + let r_goal = ents / 2; + let l_goal = ents - r_goal; + debug_assert!(l_goal > l_ents, "Node must be underflowed"); + + l_keys[l_ents - 1] = crit_key; + l_keys[l_ents..l_goal - 1].copy_from_slice(&r_keys[0..l_goal - 1 - l_ents]); + l_tree[l_ents..l_goal].copy_from_slice(&r_tree[0..l_goal - l_ents]); + *l_size = (l_goal - 1) as u8; + + let new_crit = r_keys[r_ents - r_goal - 1]; + slice_shift(&mut r_keys[0..r_ents - 1], r_ents - r_goal); + slice_shift(&mut r_tree[0..r_ents], r_ents - r_goal); + *r_size = (r_goal - 1) as u8; + + Some(new_crit) + } + } + ( + &mut Self::Leaf { + size: ref mut l_size, + keys: ref mut l_keys, + vals: ref mut l_vals, + }, + &mut Self::Leaf { + size: ref mut r_size, + keys: ref mut r_keys, + vals: ref mut r_vals, + }, + ) => { + let l_ents = usize::from(*l_size); + let l_keys = l_keys.borrow_mut(); + let l_vals = l_vals.borrow_mut(); + let r_ents = usize::from(*r_size); + let r_keys = r_keys.borrow_mut(); + let r_vals = r_vals.borrow_mut(); + let ents = l_ents + r_ents; + + if ents <= r_vals.len() { + // We can fit all entries in the RHS node. + // We'll leave the LHS node empty, but first use it as a scratch space. + *l_size = 0; + l_keys[l_ents..ents].copy_from_slice(&r_keys[0..r_ents]); + r_keys[0..ents].copy_from_slice(&l_keys[0..ents]); + l_vals[l_ents..ents].copy_from_slice(&r_vals[0..r_ents]); + r_vals[0..ents].copy_from_slice(&l_vals[0..ents]); + *r_size = ents as u8; + None + } else { + // The entries don't all fit in one node. Distribute some from RHS -> LHS. + // Split evenly with a bias to putting one entry in LHS. + let r_goal = ents / 2; + let l_goal = ents - r_goal; + debug_assert!(l_goal > l_ents, "Node must be underflowed"); + + l_keys[l_ents..l_goal].copy_from_slice(&r_keys[0..l_goal - l_ents]); + l_vals[l_ents..l_goal].copy_from_slice(&r_vals[0..l_goal - l_ents]); + *l_size = l_goal as u8; + + slice_shift(&mut r_keys[0..r_ents], r_ents - r_goal); + slice_shift(&mut r_vals[0..r_ents], r_ents - r_goal); + *r_size = r_goal as u8; + + Some(r_keys[0]) + } + } + _ => panic!("Mismatched nodes"), + } + } +} + +/// Find the right split position for halving a full node with `len` entries to recover from a +/// failed insertion at `ins`. +/// +/// If `len` is even, we should split straight down the middle regardless of `len`. +/// +/// If `len` is odd, we should split the node such that the two halves are the same size after the +/// insertion is retried. +fn split_pos(len: usize, ins: usize) -> usize { + // Anticipate `len` being a compile time constant, so this all folds away when `len` is even. + if ins <= len / 2 { + len / 2 + } else { + (len + 1) / 2 + } +} + +/// The result of splitting off the second half of a node. +pub(super) struct SplitOff { + /// The number of entries left in the original node which becomes the left-hand-side of the + /// pair. This is the number of outgoing node edges for an inner node, and the number of + /// key-value pairs for a leaf node. + pub lhs_entries: usize, + + /// The number of entries in the new RHS node. + pub rhs_entries: usize, + + /// The critical key separating the LHS and RHS nodes. All keys in the LHS sub-tree are less + /// than the critical key, and all entries in the RHS sub-tree are greater or equal to the + /// critical key. + pub crit_key: F::Key, + + /// The RHS node data containing the elements that were removed from the original node (now the + /// LHS). + pub rhs_data: NodeData, +} + +/// The result of removing an entry from a node. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum Removed { + /// An entry was removed, and the node is still in good shape. + Healthy, + + /// The node is in good shape after removing the rightmost element. + Rightmost, + + /// The node has too few entries now, and it should be balanced with a sibling node. + Underflow, + + /// The last entry was removed. For an inner node, this means that the `keys` array is empty + /// and there is just a single sub-tree left. + Empty, +} + +impl Removed { + /// Create a `Removed` status from a size and capacity. + fn new(removed: usize, new_size: usize, capacity: usize) -> Self { + if 2 * new_size >= capacity { + if removed == new_size { + Self::Rightmost + } else { + Self::Healthy + } + } else if new_size > 0 { + Self::Underflow + } else { + Self::Empty + } + } +} + +// Display ": value" or nothing at all for `()`. +pub(super) trait ValDisp { + fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result; +} + +impl ValDisp for SetValue { + fn valfmt(&self, _: &mut fmt::Formatter) -> fmt::Result { + Ok(()) + } +} + +impl ValDisp for T { + fn valfmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, ":{}", self) + } +} + +impl fmt::Display for NodeData +where + F: Forest, + F::Key: fmt::Display, + F::Value: ValDisp, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Inner { size, keys, tree } => { + write!(f, "[ {}", tree[0])?; + for i in 0..usize::from(size) { + write!(f, " {} {}", keys[i], tree[i + 1])?; + } + write!(f, " ]") + } + Self::Leaf { size, keys, vals } => { + let keys = keys.borrow(); + let vals = vals.borrow(); + write!(f, "[")?; + for i in 0..usize::from(size) { + write!(f, " {}", keys[i])?; + vals[i].valfmt(f)?; + } + write!(f, " ]") + } + Self::Free { next: Some(n) } => write!(f, "[ free -> {} ]", n), + Self::Free { next: None } => write!(f, "[ free ]"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + use core::mem; + + // Forest impl for a set implementation. + struct TF(); + + impl Forest for TF { + type Key = char; + type Value = SetValue; + type LeafKeys = [char; 15]; + type LeafValues = [SetValue; 15]; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; 15] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; 15] + } + } + + #[test] + fn inner() { + let n1 = Node(1); + let n2 = Node(2); + let n3 = Node(3); + let n4 = Node(4); + let mut inner = NodeData::::inner(n1, 'c', n4); + assert_eq!(mem::size_of_val(&inner), 64); + assert_eq!(inner.to_string(), "[ node1 c node4 ]"); + + assert!(inner.try_inner_insert(0, 'a', n2)); + assert_eq!(inner.to_string(), "[ node1 a node2 c node4 ]"); + + assert!(inner.try_inner_insert(1, 'b', n3)); + assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]"); + + for i in 3..7 { + assert!(inner.try_inner_insert( + usize::from(i), + ('a' as u8 + i) as char, + Node(i as u32 + 2), + )); + } + assert_eq!( + inner.to_string(), + "[ node1 a node2 b node3 c node4 d node5 e node6 f node7 g node8 ]" + ); + + // Now the node is full and insertion should fail anywhere. + assert!(!inner.try_inner_insert(0, 'x', n3)); + assert!(!inner.try_inner_insert(4, 'x', n3)); + assert!(!inner.try_inner_insert(7, 'x', n3)); + + // Splitting should be independent of the hint because we have an even number of node + // references. + let saved = inner.clone(); + let sp = inner.split(1); + assert_eq!(sp.lhs_entries, 4); + assert_eq!(sp.rhs_entries, 4); + assert_eq!(sp.crit_key, 'd'); + // The critical key is not present in either of the resulting nodes. + assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]"); + assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]"); + + assert_eq!(inner.inner_remove(0), Removed::Underflow); + assert_eq!(inner.to_string(), "[ node2 b node3 c node4 ]"); + + assert_eq!(inner.inner_remove(1), Removed::Underflow); + assert_eq!(inner.to_string(), "[ node2 c node4 ]"); + + assert_eq!(inner.inner_remove(1), Removed::Underflow); + assert_eq!(inner.to_string(), "[ node2 ]"); + + assert_eq!(inner.inner_remove(0), Removed::Empty); + + inner = saved; + let sp = inner.split(6); + assert_eq!(sp.lhs_entries, 4); + assert_eq!(sp.rhs_entries, 4); + assert_eq!(sp.crit_key, 'd'); + assert_eq!(inner.to_string(), "[ node1 a node2 b node3 c node4 ]"); + assert_eq!(sp.rhs_data.to_string(), "[ node5 e node6 f node7 g node8 ]"); + } + + #[test] + fn leaf() { + let mut leaf = NodeData::::leaf('d', SetValue()); + assert_eq!(leaf.to_string(), "[ d ]"); + + assert!(leaf.try_leaf_insert(0, 'a', SetValue())); + assert_eq!(leaf.to_string(), "[ a d ]"); + assert!(leaf.try_leaf_insert(1, 'b', SetValue())); + assert!(leaf.try_leaf_insert(2, 'c', SetValue())); + assert_eq!(leaf.to_string(), "[ a b c d ]"); + for i in 4..15 { + assert!(leaf.try_leaf_insert(usize::from(i), ('a' as u8 + i) as char, SetValue())); + } + assert_eq!(leaf.to_string(), "[ a b c d e f g h i j k l m n o ]"); + + // Now the node is full and insertion should fail anywhere. + assert!(!leaf.try_leaf_insert(0, 'x', SetValue())); + assert!(!leaf.try_leaf_insert(8, 'x', SetValue())); + assert!(!leaf.try_leaf_insert(15, 'x', SetValue())); + + // The index given to `split` is not the split position, it's a hint for balancing the node. + let saved = leaf.clone(); + let sp = leaf.split(12); + assert_eq!(sp.lhs_entries, 8); + assert_eq!(sp.rhs_entries, 7); + assert_eq!(sp.crit_key, 'i'); + assert_eq!(leaf.to_string(), "[ a b c d e f g h ]"); + assert_eq!(sp.rhs_data.to_string(), "[ i j k l m n o ]"); + + assert!(leaf.try_leaf_insert(8, 'i', SetValue())); + assert_eq!(leaf.leaf_remove(2), Removed::Healthy); + assert_eq!(leaf.to_string(), "[ a b d e f g h i ]"); + assert_eq!(leaf.leaf_remove(7), Removed::Underflow); + assert_eq!(leaf.to_string(), "[ a b d e f g h ]"); + + leaf = saved; + let sp = leaf.split(7); + assert_eq!(sp.lhs_entries, 7); + assert_eq!(sp.rhs_entries, 8); + assert_eq!(sp.crit_key, 'h'); + assert_eq!(leaf.to_string(), "[ a b c d e f g ]"); + assert_eq!(sp.rhs_data.to_string(), "[ h i j k l m n o ]"); + } + + #[test] + fn optimal_split_pos() { + // An even split is easy. + assert_eq!(split_pos(8, 0), 4); + assert_eq!(split_pos(8, 8), 4); + + // Easy cases for odd splits. + assert_eq!(split_pos(7, 0), 3); + assert_eq!(split_pos(7, 7), 4); + + // If the insertion point is the same as the split position, we + // will append to the lhs node. + assert_eq!(split_pos(7, 3), 3); + assert_eq!(split_pos(7, 4), 4); + } + + #[test] + fn inner_balance() { + let n1 = Node(1); + let n2 = Node(2); + let n3 = Node(3); + let mut lhs = NodeData::::inner(n1, 'a', n2); + assert!(lhs.try_inner_insert(1, 'b', n3)); + assert_eq!(lhs.to_string(), "[ node1 a node2 b node3 ]"); + + let n11 = Node(11); + let n12 = Node(12); + let mut rhs = NodeData::::inner(n11, 'p', n12); + + for i in 1..4 { + assert!(rhs.try_inner_insert( + usize::from(i), + ('p' as u8 + i) as char, + Node(i as u32 + 12), + )); + } + assert_eq!( + rhs.to_string(), + "[ node11 p node12 q node13 r node14 s node15 ]" + ); + + // 3+5 elements fit in RHS. + assert_eq!(lhs.balance('o', &mut rhs), None); + assert_eq!( + rhs.to_string(), + "[ node1 a node2 b node3 o node11 p node12 q node13 r node14 s node15 ]" + ); + + // 2+8 elements are redistributed. + lhs = NodeData::::inner(Node(20), 'x', Node(21)); + assert_eq!(lhs.balance('y', &mut rhs), Some('o')); + assert_eq!( + lhs.to_string(), + "[ node20 x node21 y node1 a node2 b node3 ]" + ); + assert_eq!( + rhs.to_string(), + "[ node11 p node12 q node13 r node14 s node15 ]" + ); + } + + #[test] + fn leaf_balance() { + let mut lhs = NodeData::::leaf('a', SetValue()); + for i in 1..6 { + assert!(lhs.try_leaf_insert(usize::from(i), ('a' as u8 + i) as char, SetValue())); + } + assert_eq!(lhs.to_string(), "[ a b c d e f ]"); + + let mut rhs = NodeData::::leaf('0', SetValue()); + for i in 1..8 { + assert!(rhs.try_leaf_insert(usize::from(i), ('0' as u8 + i) as char, SetValue())); + } + assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]"); + + // 6+8 elements all fits in rhs. + assert_eq!(lhs.balance('0', &mut rhs), None); + assert_eq!(rhs.to_string(), "[ a b c d e f 0 1 2 3 4 5 6 7 ]"); + + assert!(lhs.try_leaf_insert(0, 'x', SetValue())); + assert!(lhs.try_leaf_insert(1, 'y', SetValue())); + assert!(lhs.try_leaf_insert(2, 'z', SetValue())); + assert_eq!(lhs.to_string(), "[ x y z ]"); + + // 3+14 elements need redistribution. + assert_eq!(lhs.balance('a', &mut rhs), Some('0')); + assert_eq!(lhs.to_string(), "[ x y z a b c d e f ]"); + assert_eq!(rhs.to_string(), "[ 0 1 2 3 4 5 6 7 ]"); + } +} diff --git a/cranelift/bforest/src/path.rs b/cranelift/bforest/src/path.rs new file mode 100644 index 0000000000..a55de6b2ae --- /dev/null +++ b/cranelift/bforest/src/path.rs @@ -0,0 +1,836 @@ +//! A path from the root of a B+-tree to a leaf node. + +use super::node::Removed; +use super::{slice_insert, slice_shift, Comparator, Forest, Node, NodeData, NodePool, MAX_PATH}; +use core::borrow::Borrow; +use core::marker::PhantomData; + +#[cfg(test)] +use core::fmt; + +pub(super) struct Path { + /// Number of path entries including the root and leaf nodes. + size: usize, + + /// Path of node references from the root to a leaf node. + node: [Node; MAX_PATH], + + /// Entry number in each node. + entry: [u8; MAX_PATH], + + unused: PhantomData, +} + +impl Default for Path { + fn default() -> Self { + Self { + size: 0, + node: [Node(0); MAX_PATH], + entry: [0; MAX_PATH], + unused: PhantomData, + } + } +} + +impl Path { + /// Reset path by searching for `key` starting from `root`. + /// + /// If `key` is in the tree, returns the corresponding value and leaved the path pointing at + /// the entry. Otherwise returns `None` and: + /// + /// - A key smaller than all stored keys returns a path to the first entry of the first leaf. + /// - A key larger than all stored keys returns a path to one beyond the last element of the + /// last leaf. + /// - A key between the stored keys of adjacent leaf nodes returns a path to one beyond the + /// last entry of the first of the leaf nodes. + /// + pub fn find( + &mut self, + key: F::Key, + root: Node, + pool: &NodePool, + comp: &dyn Comparator, + ) -> Option { + let mut node = root; + for level in 0.. { + self.size = level + 1; + self.node[level] = node; + match pool[node] { + NodeData::Inner { size, keys, tree } => { + // Invariant: `tree[i]` contains keys smaller than + // `keys[i]`, greater or equal to `keys[i-1]`. + let i = match comp.search(key, &keys[0..size.into()]) { + // We hit an existing key, so follow the >= branch. + Ok(i) => i + 1, + // Key is less than `keys[i]`, so follow the < branch. + Err(i) => i, + }; + self.entry[level] = i as u8; + node = tree[i]; + } + NodeData::Leaf { size, keys, vals } => { + // For a leaf we want either the found key or an insert position. + return match comp.search(key, &keys.borrow()[0..size.into()]) { + Ok(i) => { + self.entry[level] = i as u8; + Some(vals.borrow()[i]) + } + Err(i) => { + self.entry[level] = i as u8; + None + } + }; + } + NodeData::Free { .. } => panic!("Free {} reached from {}", node, root), + } + } + unreachable!(); + } + + /// Move path to the first entry of the tree starting at `root` and return it. + pub fn first(&mut self, root: Node, pool: &NodePool) -> (F::Key, F::Value) { + let mut node = root; + for level in 0.. { + self.size = level + 1; + self.node[level] = node; + self.entry[level] = 0; + match pool[node] { + NodeData::Inner { tree, .. } => node = tree[0], + NodeData::Leaf { keys, vals, .. } => return (keys.borrow()[0], vals.borrow()[0]), + NodeData::Free { .. } => panic!("Free {} reached from {}", node, root), + } + } + unreachable!(); + } + + /// Move this path to the next key-value pair and return it. + pub fn next(&mut self, pool: &NodePool) -> Option<(F::Key, F::Value)> { + match self.leaf_pos() { + None => return None, + Some((node, entry)) => { + let (keys, vals) = pool[node].unwrap_leaf(); + if entry + 1 < keys.len() { + self.entry[self.size - 1] += 1; + return Some((keys[entry + 1], vals[entry + 1])); + } + } + } + + // The current leaf node is exhausted. Move to the next one. + let leaf_level = self.size - 1; + self.next_node(leaf_level, pool).map(|node| { + let (keys, vals) = pool[node].unwrap_leaf(); + (keys[0], vals[0]) + }) + } + + /// Move this path to the previous key-value pair and return it. + /// + /// If the path is at the off-the-end position, go to the last key-value pair. + /// + /// If the path is already at the first key-value pair, leave it there and return `None`. + pub fn prev(&mut self, root: Node, pool: &NodePool) -> Option<(F::Key, F::Value)> { + // We use `size == 0` as a generic off-the-end position. + if self.size == 0 { + self.goto_subtree_last(0, root, pool); + let (node, entry) = self.leaf_pos().unwrap(); + let (keys, vals) = pool[node].unwrap_leaf(); + return Some((keys[entry], vals[entry])); + } + + match self.leaf_pos() { + None => return None, + Some((node, entry)) => { + if entry > 0 { + self.entry[self.size - 1] -= 1; + let (keys, vals) = pool[node].unwrap_leaf(); + return Some((keys[entry - 1], vals[entry - 1])); + } + } + } + + // The current leaf node is exhausted. Move to the previous one. + self.prev_leaf(pool).map(|node| { + let (keys, vals) = pool[node].unwrap_leaf(); + let e = self.leaf_entry(); + (keys[e], vals[e]) + }) + } + + /// Move path to the first entry of the next node at level, if one exists. + /// + /// Returns the new node if it exists. + /// + /// Reset the path to `size = 0` and return `None` if there is no next node. + fn next_node(&mut self, level: usize, pool: &NodePool) -> Option { + match self.right_sibling_branch_level(level, pool) { + None => { + self.size = 0; + None + } + Some(bl) => { + let (_, bnodes) = pool[self.node[bl]].unwrap_inner(); + self.entry[bl] += 1; + let mut node = bnodes[usize::from(self.entry[bl])]; + + for l in bl + 1..level { + self.node[l] = node; + self.entry[l] = 0; + node = pool[node].unwrap_inner().1[0]; + } + + self.node[level] = node; + self.entry[level] = 0; + Some(node) + } + } + } + + /// Move the path to the last entry of the previous leaf node, if one exists. + /// + /// Returns the new leaf node if it exists. + /// + /// Leave the path unchanged and returns `None` if we are already at the first leaf node. + fn prev_leaf(&mut self, pool: &NodePool) -> Option { + self.left_sibling_branch_level(self.size - 1).map(|bl| { + let entry = self.entry[bl] - 1; + self.entry[bl] = entry; + let (_, bnodes) = pool[self.node[bl]].unwrap_inner(); + self.goto_subtree_last(bl + 1, bnodes[usize::from(entry)], pool) + }) + } + + /// Move this path to the last position for the sub-tree at `level, root`. + fn goto_subtree_last(&mut self, level: usize, root: Node, pool: &NodePool) -> Node { + let mut node = root; + for l in level.. { + self.node[l] = node; + match pool[node] { + NodeData::Inner { size, ref tree, .. } => { + self.entry[l] = size; + node = tree[usize::from(size)]; + } + NodeData::Leaf { size, .. } => { + self.entry[l] = size - 1; + self.size = l + 1; + break; + } + NodeData::Free { .. } => panic!("Free {} reached from {}", node, root), + } + } + node + } + + /// Set the root node and point the path at the first entry of the node. + pub fn set_root_node(&mut self, root: Node) { + self.size = 1; + self.node[0] = root; + self.entry[0] = 0; + } + + /// Get the current leaf node and entry, if any. + pub fn leaf_pos(&self) -> Option<(Node, usize)> { + let i = self.size.wrapping_sub(1); + self.node.get(i).map(|&n| (n, self.entry[i].into())) + } + + /// Get the current leaf node. + fn leaf_node(&self) -> Node { + self.node[self.size - 1] + } + + /// Get the current entry in the leaf node. + fn leaf_entry(&self) -> usize { + self.entry[self.size - 1].into() + } + + /// Is this path pointing to the first entry in the tree? + /// This corresponds to the smallest key. + fn at_first_entry(&self) -> bool { + self.entry[0..self.size].iter().all(|&i| i == 0) + } + + /// Get a mutable reference to the current value. + /// This assumes that there is a current value. + pub fn value_mut<'a>(&self, pool: &'a mut NodePool) -> &'a mut F::Value { + &mut pool[self.leaf_node()].unwrap_leaf_mut().1[self.leaf_entry()] + } + + /// Insert the key-value pair at the current position. + /// The current position must be the correct insertion location for the key. + /// This function does not check for duplicate keys. Use `find` or similar for that. + /// Returns the new root node. + pub fn insert(&mut self, key: F::Key, value: F::Value, pool: &mut NodePool) -> Node { + if !self.try_leaf_insert(key, value, pool) { + self.split_and_insert(key, value, pool); + } + self.node[0] + } + + /// Try to insert `key, value` at the current position, but fail and return false if the leaf + /// node is full. + fn try_leaf_insert(&self, key: F::Key, value: F::Value, pool: &mut NodePool) -> bool { + let index = self.leaf_entry(); + + // The case `index == 0` should only ever happen when there are no earlier leaf nodes, + // otherwise we should have appended to the previous leaf node instead. This invariant + // means that we don't need to update keys stored in inner nodes here. + debug_assert!(index > 0 || self.at_first_entry()); + + pool[self.leaf_node()].try_leaf_insert(index, key, value) + } + + /// Split the current leaf node and then insert `key, value`. + /// This should only be used if `try_leaf_insert()` fails. + fn split_and_insert(&mut self, mut key: F::Key, value: F::Value, pool: &mut NodePool) { + let orig_root = self.node[0]; + + // Loop invariant: We need to split the node at `level` and then retry a failed insertion. + // The items to insert are either `(key, ins_node)` or `(key, value)`. + let mut ins_node = None; + let mut split; + for level in (0..self.size).rev() { + // Split the current node. + let mut node = self.node[level]; + let mut entry = self.entry[level].into(); + split = pool[node].split(entry); + let rhs_node = pool.alloc_node(split.rhs_data); + + // Should the path be moved to the new RHS node? + // Prefer the smaller node if we're right in the middle. + // Prefer to append to LHS all other things being equal. + // + // When inserting into an inner node (`ins_node.is_some()`), we must point to a valid + // entry in the current node since the new entry is inserted *after* the insert + // location. + if entry > split.lhs_entries + || (entry == split.lhs_entries + && (split.lhs_entries > split.rhs_entries || ins_node.is_some())) + { + node = rhs_node; + entry -= split.lhs_entries; + self.node[level] = node; + self.entry[level] = entry as u8; + } + + // Now that we have a not-full node, it must be possible to insert. + match ins_node { + None => { + let inserted = pool[node].try_leaf_insert(entry, key, value); + debug_assert!(inserted); + // If we inserted at the front of the new rhs_node leaf, we need to propagate + // the inserted key as the critical key instead of the previous front key. + if entry == 0 && node == rhs_node { + split.crit_key = key; + } + } + Some(n) => { + let inserted = pool[node].try_inner_insert(entry, key, n); + debug_assert!(inserted); + // The lower level was moved to the new RHS node, so make sure that is + // reflected here. + if n == self.node[level + 1] { + self.entry[level] += 1; + } + } + } + + // We are now done with the current level, but `rhs_node` must be inserted in the inner + // node above us. If we're already at level 0, the root node needs to be split. + key = split.crit_key; + ins_node = Some(rhs_node); + if level > 0 { + let pnode = &mut pool[self.node[level - 1]]; + let pentry = self.entry[level - 1].into(); + if pnode.try_inner_insert(pentry, key, rhs_node) { + // If this level level was moved to the new RHS node, update parent entry. + if node == rhs_node { + self.entry[level - 1] += 1; + } + return; + } + } + } + + // If we get here we have split the original root node and need to add an extra level. + let rhs_node = ins_node.expect("empty path"); + let root = pool.alloc_node(NodeData::inner(orig_root, key, rhs_node)); + let entry = if self.node[0] == rhs_node { 1 } else { 0 }; + self.size += 1; + slice_insert(&mut self.node[0..self.size], 0, root); + slice_insert(&mut self.entry[0..self.size], 0, entry); + } + + /// Remove the key-value pair at the current position and advance the path to the next + /// key-value pair, leaving the path in a normalized state. + /// + /// Return the new root node. + pub fn remove(&mut self, pool: &mut NodePool) -> Option { + let e = self.leaf_entry(); + match pool[self.leaf_node()].leaf_remove(e) { + Removed::Healthy => { + if e == 0 { + self.update_crit_key(pool) + } + Some(self.node[0]) + } + status => self.balance_nodes(status, pool), + } + } + + /// Get the critical key for the current node at `level`. + /// + /// The critical key is less than or equal to all keys in the sub-tree at `level` and greater + /// than all keys to the left of the current node at `level`. + /// + /// The left-most node at any level does not have a critical key. + fn current_crit_key(&self, level: usize, pool: &NodePool) -> Option { + // Find the level containing the critical key for the current node. + self.left_sibling_branch_level(level).map(|bl| { + let (keys, _) = pool[self.node[bl]].unwrap_inner(); + keys[usize::from(self.entry[bl]) - 1] + }) + } + + /// Update the critical key after removing the front entry of the leaf node. + fn update_crit_key(&mut self, pool: &mut NodePool) { + // Find the inner level containing the critical key for the current leaf node. + let crit_level = match self.left_sibling_branch_level(self.size - 1) { + None => return, + Some(l) => l, + }; + let crit_kidx = self.entry[crit_level] - 1; + + // Extract the new critical key from the leaf node. + let crit_key = pool[self.leaf_node()].leaf_crit_key(); + let crit_node = self.node[crit_level]; + + match pool[crit_node] { + NodeData::Inner { + size, ref mut keys, .. + } => { + debug_assert!(crit_kidx < size); + keys[usize::from(crit_kidx)] = crit_key; + } + _ => panic!("Expected inner node"), + } + } + + /// Given that the current leaf node is in an unhealthy (underflowed or even empty) status, + /// balance it with sibling nodes. + /// + /// Return the new root node. + fn balance_nodes(&mut self, status: Removed, pool: &mut NodePool) -> Option { + // The current leaf node is not in a healthy state, and its critical key may have changed + // too. + // + // Start by dealing with a changed critical key for the leaf level. + if status != Removed::Empty && self.leaf_entry() == 0 { + self.update_crit_key(pool); + } + + let leaf_level = self.size - 1; + if self.heal_level(status, leaf_level, pool) { + // Tree has become empty. + self.size = 0; + return None; + } + + // Discard the root node if it has shrunk to a single sub-tree. + let mut ns = 0; + while let NodeData::Inner { + size: 0, ref tree, .. + } = pool[self.node[ns]] + { + ns += 1; + self.node[ns] = tree[0]; + } + + if ns > 0 { + for l in 0..ns { + pool.free_node(self.node[l]); + } + + // Shift the whole array instead of just 0..size because `self.size` may be cleared + // here if the path is pointing off-the-end. + slice_shift(&mut self.node, ns); + slice_shift(&mut self.entry, ns); + + if self.size > 0 { + self.size -= ns; + } + } + + // Return the root node, even when `size=0` indicating that we're at the off-the-end + // position. + Some(self.node[0]) + } + + /// After removing an entry from the node at `level`, check its health and rebalance as needed. + /// + /// Leave the path up to and including `level` in a normalized state where all entries are in + /// bounds. + /// + /// Returns true if the tree becomes empty. + fn heal_level(&mut self, status: Removed, level: usize, pool: &mut NodePool) -> bool { + match status { + Removed::Healthy => {} + Removed::Rightmost => { + // The rightmost entry was removed from the current node, so move the path so it + // points at the first entry of the next node at this level. + debug_assert_eq!( + usize::from(self.entry[level]), + pool[self.node[level]].entries() + ); + self.next_node(level, pool); + } + Removed::Underflow => self.underflowed_node(level, pool), + Removed::Empty => return self.empty_node(level, pool), + } + false + } + + /// The current node at `level` has underflowed, meaning that it is below half capacity but + /// not completely empty. + /// + /// Handle this by balancing entries with the right sibling node. + /// + /// Leave the path up to and including `level` in a valid state that points to the same entry. + fn underflowed_node(&mut self, level: usize, pool: &mut NodePool) { + // Look for a right sibling node at this level. If none exists, we allow the underflowed + // node to persist as the right-most node at its level. + if let Some((crit_key, rhs_node)) = self.right_sibling(level, pool) { + // New critical key for the updated right sibling node. + let new_ck: Option; + let empty; + // Make a COPY of the sibling node to avoid fighting the borrow checker. + let mut rhs = pool[rhs_node]; + match pool[self.node[level]].balance(crit_key, &mut rhs) { + None => { + // Everything got moved to the RHS node. + new_ck = self.current_crit_key(level, pool); + empty = true; + } + Some(key) => { + // Entries moved from RHS node. + new_ck = Some(key); + empty = false; + } + } + // Put back the updated RHS node data. + pool[rhs_node] = rhs; + // Update the critical key for the RHS node unless it has become a left-most + // node. + if let Some(ck) = new_ck { + self.update_right_crit_key(level, ck, pool); + } + if empty { + let empty_tree = self.empty_node(level, pool); + debug_assert!(!empty_tree); + } + + // Any Removed::Rightmost state must have been cleared above by merging nodes. If the + // current entry[level] was one off the end of the node, it will now point at a proper + // entry. + debug_assert!(usize::from(self.entry[level]) < pool[self.node[level]].entries()); + } else if usize::from(self.entry[level]) >= pool[self.node[level]].entries() { + // There's no right sibling at this level, so the node can't be rebalanced. + // Check if we are in an off-the-end position. + self.size = 0; + } + } + + /// The current node at `level` has become empty. + /// + /// Remove the node from its parent node and leave the path in a normalized state. This means + /// that the path at this level will go through the right sibling of this node. + /// + /// If the current node has no right sibling, set `self.size = 0`. + /// + /// Returns true if the tree becomes empty. + fn empty_node(&mut self, level: usize, pool: &mut NodePool) -> bool { + pool.free_node(self.node[level]); + if level == 0 { + // We just deleted the root node, so the tree is now empty. + return true; + } + + // Get the right sibling node before recursively removing nodes. + let rhs_node = self.right_sibling(level, pool).map(|(_, n)| n); + + // Remove the current sub-tree from the parent node. + let pl = level - 1; + let pe = self.entry[pl].into(); + let status = pool[self.node[pl]].inner_remove(pe); + self.heal_level(status, pl, pool); + + // Finally update the path at this level. + match rhs_node { + // We'll leave `self.entry[level]` unchanged. It can be non-zero after moving node + // entries to the right sibling node. + Some(rhs) => self.node[level] = rhs, + // We have no right sibling, so we must have deleted the right-most + // entry. The path should be moved to the "off-the-end" position. + None => self.size = 0, + } + false + } + + /// Find the level where the right sibling to the current node at `level` branches off. + /// + /// This will be an inner node with two adjacent sub-trees: In one the current node at level is + /// a right-most node, in the other, the right sibling is a left-most node. + /// + /// Returns `None` if the current node is a right-most node so no right sibling exists. + fn right_sibling_branch_level(&self, level: usize, pool: &NodePool) -> Option { + (0..level).rposition(|l| match pool[self.node[l]] { + NodeData::Inner { size, .. } => self.entry[l] < size, + _ => panic!("Expected inner node"), + }) + } + + /// Find the level where the left sibling to the current node at `level` branches off. + fn left_sibling_branch_level(&self, level: usize) -> Option { + self.entry[0..level].iter().rposition(|&e| e != 0) + } + + /// Get the right sibling node to the current node at `level`. + /// Also return the critical key between the current node and the right sibling. + fn right_sibling(&self, level: usize, pool: &NodePool) -> Option<(F::Key, Node)> { + // Find the critical level: The deepest level where two sibling subtrees contain the + // current node and its right sibling. + self.right_sibling_branch_level(level, pool).map(|bl| { + // Extract the critical key and the `bl+1` node. + let be = usize::from(self.entry[bl]); + let crit_key; + let mut node; + { + let (keys, tree) = pool[self.node[bl]].unwrap_inner(); + crit_key = keys[be]; + node = tree[be + 1]; + } + + // Follow left-most links back down to `level`. + for _ in bl + 1..level { + node = pool[node].unwrap_inner().1[0]; + } + + (crit_key, node) + }) + } + + /// Update the critical key for the right sibling node at `level`. + fn update_right_crit_key(&self, level: usize, crit_key: F::Key, pool: &mut NodePool) { + let bl = self + .right_sibling_branch_level(level, pool) + .expect("No right sibling exists"); + match pool[self.node[bl]] { + NodeData::Inner { ref mut keys, .. } => { + keys[usize::from(self.entry[bl])] = crit_key; + } + _ => panic!("Expected inner node"), + } + } + + /// Normalize the path position such that it is either pointing at a real entry or `size=0` + /// indicating "off-the-end". + pub fn normalize(&mut self, pool: &mut NodePool) { + if let Some((leaf, entry)) = self.leaf_pos() { + if entry >= pool[leaf].entries() { + let leaf_level = self.size - 1; + self.next_node(leaf_level, pool); + } + } + } +} + +#[cfg(test)] +impl Path { + /// Check the internal consistency of this path. + pub fn verify(&self, pool: &NodePool) { + for level in 0..self.size { + match pool[self.node[level]] { + NodeData::Inner { size, tree, .. } => { + assert!( + level < self.size - 1, + "Expected leaf node at level {}", + level + ); + assert!( + self.entry[level] <= size, + "OOB inner entry {}/{} at level {}", + self.entry[level], + size, + level + ); + assert_eq!( + self.node[level + 1], + tree[usize::from(self.entry[level])], + "Node mismatch at level {}", + level + ); + } + NodeData::Leaf { size, .. } => { + assert_eq!(level, self.size - 1, "Expected inner node"); + assert!( + self.entry[level] <= size, + "OOB leaf entry {}/{}", + self.entry[level], + size, + ); + } + NodeData::Free { .. } => { + panic!("Free {} in path", self.node[level]); + } + } + } + } +} + +#[cfg(test)] +impl fmt::Display for Path { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.size == 0 { + write!(f, "") + } else { + write!(f, "{}[{}]", self.node[0], self.entry[0])?; + for i in 1..self.size { + write!(f, "--{}[{}]", self.node[i], self.entry[i])?; + } + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::super::{Forest, NodeData, NodePool}; + use super::*; + use core::cmp::Ordering; + + struct TC(); + + impl Comparator for TC { + fn cmp(&self, a: i32, b: i32) -> Ordering { + a.cmp(&b) + } + } + + struct TF(); + + impl Forest for TF { + type Key = i32; + type Value = char; + type LeafKeys = [i32; 7]; + type LeafValues = [char; 7]; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; 7] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; 7] + } + } + + #[test] + fn search_single_leaf() { + // Testing Path::new() for trees with a single leaf node. + let mut pool = NodePool::::new(); + let root = pool.alloc_node(NodeData::leaf(10, 'a')); + let mut p = Path::default(); + let comp = TC(); + + // Search for key less than stored key. + assert_eq!(p.find(5, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + + // Search for stored key. + assert_eq!(p.find(10, root, &pool, &comp), Some('a')); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + + // Search for key greater than stored key. + assert_eq!(p.find(15, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + + // Modify leaf node to contain two values. + match pool[root] { + NodeData::Leaf { + ref mut size, + ref mut keys, + ref mut vals, + } => { + *size = 2; + keys[1] = 20; + vals[1] = 'b'; + } + _ => unreachable!(), + } + + // Search for key between stored keys. + assert_eq!(p.find(15, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + + // Search for key greater than stored keys. + assert_eq!(p.find(25, root, &pool, &comp), None); + assert_eq!(p.size, 1); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 2); + } + + #[test] + fn search_single_inner() { + // Testing Path::new() for trees with a single inner node and two leaves. + let mut pool = NodePool::::new(); + let leaf1 = pool.alloc_node(NodeData::leaf(10, 'a')); + let leaf2 = pool.alloc_node(NodeData::leaf(20, 'b')); + let root = pool.alloc_node(NodeData::inner(leaf1, 20, leaf2)); + let mut p = Path::default(); + let comp = TC(); + + // Search for key less than stored keys. + assert_eq!(p.find(5, root, &pool, &comp), None); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + assert_eq!(p.node[1], leaf1); + assert_eq!(p.entry[1], 0); + + assert_eq!(p.find(10, root, &pool, &comp), Some('a')); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + assert_eq!(p.node[1], leaf1); + assert_eq!(p.entry[1], 0); + + // Midway between the two leaf nodes. + assert_eq!(p.find(15, root, &pool, &comp), None); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 0); + assert_eq!(p.node[1], leaf1); + assert_eq!(p.entry[1], 1); + + assert_eq!(p.find(20, root, &pool, &comp), Some('b')); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + assert_eq!(p.node[1], leaf2); + assert_eq!(p.entry[1], 0); + + assert_eq!(p.find(25, root, &pool, &comp), None); + assert_eq!(p.size, 2); + assert_eq!(p.node[0], root); + assert_eq!(p.entry[0], 1); + assert_eq!(p.node[1], leaf2); + assert_eq!(p.entry[1], 1); + } +} diff --git a/cranelift/bforest/src/pool.rs b/cranelift/bforest/src/pool.rs new file mode 100644 index 0000000000..e4744d2bcb --- /dev/null +++ b/cranelift/bforest/src/pool.rs @@ -0,0 +1,220 @@ +//! B+-tree node pool. + +#[cfg(test)] +use super::Comparator; +use super::{Forest, Node, NodeData}; +use crate::entity::PrimaryMap; +#[cfg(test)] +use core::fmt; +use core::ops::{Index, IndexMut}; + +/// A pool of nodes, including a free list. +pub(super) struct NodePool { + nodes: PrimaryMap>, + freelist: Option, +} + +impl NodePool { + /// Allocate a new empty pool of nodes. + pub fn new() -> Self { + Self { + nodes: PrimaryMap::new(), + freelist: None, + } + } + + /// Free all nodes. + pub fn clear(&mut self) { + self.nodes.clear(); + self.freelist = None; + } + + /// Allocate a new node containing `data`. + pub fn alloc_node(&mut self, data: NodeData) -> Node { + debug_assert!(!data.is_free(), "can't allocate free node"); + match self.freelist { + Some(node) => { + // Remove this node from the free list. + match self.nodes[node] { + NodeData::Free { next } => self.freelist = next, + _ => panic!("Invalid {} on free list", node), + } + self.nodes[node] = data; + node + } + None => { + // The free list is empty. Allocate a new node. + self.nodes.push(data) + } + } + } + + /// Free a node. + pub fn free_node(&mut self, node: Node) { + // Quick check for a double free. + debug_assert!(!self.nodes[node].is_free(), "{} is already free", node); + self.nodes[node] = NodeData::Free { + next: self.freelist, + }; + self.freelist = Some(node); + } + + /// Free the entire tree rooted at `node`. + pub fn free_tree(&mut self, node: Node) { + if let NodeData::Inner { size, tree, .. } = self[node] { + // Note that we have to capture `tree` by value to avoid borrow checker trouble. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_range_loop))] + for i in 0..usize::from(size + 1) { + // Recursively free sub-trees. This recursion can never be deeper than `MAX_PATH`, + // and since most trees have less than a handful of nodes, it is worthwhile to + // avoid the heap allocation for an iterative tree traversal. + self.free_tree(tree[i]); + } + } + self.free_node(node); + } +} + +#[cfg(test)] +impl NodePool { + /// Verify the consistency of the tree rooted at `node`. + pub fn verify_tree>(&self, node: Node, comp: &C) + where + NodeData: fmt::Display, + F::Key: fmt::Display, + { + use crate::entity::EntitySet; + use alloc::vec::Vec; + use core::borrow::Borrow; + use core::cmp::Ordering; + + // The root node can't be an inner node with just a single sub-tree. It should have been + // pruned. + if let NodeData::Inner { size, .. } = self[node] { + assert!(size > 0, "Root must have more than one sub-tree"); + } + + let mut done = match self[node] { + NodeData::Inner { size, .. } | NodeData::Leaf { size, .. } => { + EntitySet::with_capacity(size.into()) + } + _ => EntitySet::new(), + }; + + let mut todo = Vec::new(); + + // Todo-list entries are: + // 1. Optional LHS key which must be <= all node entries. + // 2. The node reference. + // 3. Optional RHS key which must be > all node entries. + todo.push((None, node, None)); + + while let Some((lkey, node, rkey)) = todo.pop() { + assert!(done.insert(node), "Node appears more than once in tree"); + let mut lower = lkey; + + match self[node] { + NodeData::Inner { size, keys, tree } => { + let size = size as usize; + let capacity = tree.len(); + let keys = &keys[0..size]; + + // Verify occupancy. + // Right-most nodes can be small, but others must be at least half full. + assert!( + rkey.is_none() || (size + 1) * 2 >= capacity, + "Only {}/{} entries in {}:{}, upper={}", + size + 1, + capacity, + node, + self[node], + rkey.unwrap() + ); + + // Queue up the sub-trees, checking for duplicates. + for i in 0..size + 1 { + // Get an upper bound for node[i]. + let upper = keys.get(i).cloned().or(rkey); + + // Check that keys are strictly monotonic. + if let (Some(a), Some(b)) = (lower, upper) { + assert_eq!( + comp.cmp(a, b), + Ordering::Less, + "Key order {} < {} failed in {}: {}", + a, + b, + node, + self[node] + ); + } + + // Queue up the sub-tree. + todo.push((lower, tree[i], upper)); + + // Set a lower bound for the next tree. + lower = upper; + } + } + NodeData::Leaf { size, keys, .. } => { + let size = size as usize; + let capacity = keys.borrow().len(); + let keys = &keys.borrow()[0..size]; + + // Verify occupancy. + // Right-most nodes can be small, but others must be at least half full. + assert!(size > 0, "Leaf {} is empty", node); + assert!( + rkey.is_none() || size * 2 >= capacity, + "Only {}/{} entries in {}:{}, upper={}", + size, + capacity, + node, + self[node], + rkey.unwrap() + ); + + for i in 0..size + 1 { + let upper = keys.get(i).cloned().or(rkey); + + // Check that keys are strictly monotonic. + if let (Some(a), Some(b)) = (lower, upper) { + let wanted = if i == 0 { + Ordering::Equal + } else { + Ordering::Less + }; + assert_eq!( + comp.cmp(a, b), + wanted, + "Key order for {} - {} failed in {}: {}", + a, + b, + node, + self[node] + ); + } + + // Set a lower bound for the next key. + lower = upper; + } + } + NodeData::Free { .. } => panic!("Free {} reached", node), + } + } + } +} + +impl Index for NodePool { + type Output = NodeData; + + fn index(&self, index: Node) -> &Self::Output { + self.nodes.index(index) + } +} + +impl IndexMut for NodePool { + fn index_mut(&mut self, index: Node) -> &mut Self::Output { + self.nodes.index_mut(index) + } +} diff --git a/cranelift/bforest/src/set.rs b/cranelift/bforest/src/set.rs new file mode 100644 index 0000000000..e7761a63d9 --- /dev/null +++ b/cranelift/bforest/src/set.rs @@ -0,0 +1,598 @@ +//! Forest of sets. + +use super::{Comparator, Forest, Node, NodeData, NodePool, Path, SetValue, INNER_SIZE}; +use crate::packed_option::PackedOption; +#[cfg(test)] +use alloc::string::String; +#[cfg(test)] +use core::fmt; +use core::marker::PhantomData; + +/// Tag type defining forest types for a set. +struct SetTypes(PhantomData); + +impl Forest for SetTypes +where + K: Copy, +{ + type Key = K; + type Value = SetValue; + type LeafKeys = [K; 2 * INNER_SIZE - 1]; + type LeafValues = [SetValue; 2 * INNER_SIZE - 1]; + + fn splat_key(key: Self::Key) -> Self::LeafKeys { + [key; 2 * INNER_SIZE - 1] + } + + fn splat_value(value: Self::Value) -> Self::LeafValues { + [value; 2 * INNER_SIZE - 1] + } +} + +/// Memory pool for a forest of `Set` instances. +pub struct SetForest +where + K: Copy, +{ + nodes: NodePool>, +} + +impl SetForest +where + K: Copy, +{ + /// Create a new empty forest. + pub fn new() -> Self { + Self { + nodes: NodePool::new(), + } + } + + /// Clear all sets in the forest. + /// + /// All `Set` instances belong to this forest are invalidated and should no longer be used. + pub fn clear(&mut self) { + self.nodes.clear(); + } +} + +/// B-tree representing an ordered set of `K`s using `C` for comparing elements. +/// +/// This is not a general-purpose replacement for `BTreeSet`. See the [module +/// documentation](index.html) for more information about design tradeoffs. +/// +/// Sets can be cloned, but that operation should only be used as part of cloning the whole forest +/// they belong to. *Cloning a set does not allocate new memory for the clone*. It creates an alias +/// of the same memory. +#[derive(Clone)] +pub struct Set +where + K: Copy, +{ + root: PackedOption, + unused: PhantomData, +} + +impl Set +where + K: Copy, +{ + /// Make an empty set. + pub fn new() -> Self { + Self { + root: None.into(), + unused: PhantomData, + } + } + + /// Is this an empty set? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Does the set contain `key`?. + pub fn contains>(&self, key: K, forest: &SetForest, comp: &C) -> bool { + self.root + .expand() + .and_then(|root| Path::default().find(key, root, &forest.nodes, comp)) + .is_some() + } + + /// Try to insert `key` into the set. + /// + /// If the set did not contain `key`, insert it and return true. + /// + /// If `key` is already present, don't change the set and return false. + pub fn insert>( + &mut self, + key: K, + forest: &mut SetForest, + comp: &C, + ) -> bool { + self.cursor(forest, comp).insert(key) + } + + /// Remove `key` from the set and return true. + /// + /// If `key` was not present in the set, return false. + pub fn remove>( + &mut self, + key: K, + forest: &mut SetForest, + comp: &C, + ) -> bool { + let mut c = self.cursor(forest, comp); + if c.goto(key) { + c.remove(); + true + } else { + false + } + } + + /// Remove all entries. + pub fn clear(&mut self, forest: &mut SetForest) { + if let Some(root) = self.root.take() { + forest.nodes.free_tree(root); + } + } + + /// Retains only the elements specified by the predicate. + /// + /// Remove all elements where the predicate returns false. + pub fn retain(&mut self, forest: &mut SetForest, mut predicate: F) + where + F: FnMut(K) -> bool, + { + let mut path = Path::default(); + if let Some(root) = self.root.expand() { + path.first(root, &forest.nodes); + } + while let Some((node, entry)) = path.leaf_pos() { + if predicate(forest.nodes[node].unwrap_leaf().0[entry]) { + path.next(&forest.nodes); + } else { + self.root = path.remove(&mut forest.nodes).into(); + } + } + } + + /// Create a cursor for navigating this set. The cursor is initially positioned off the end of + /// the set. + pub fn cursor<'a, C: Comparator>( + &'a mut self, + forest: &'a mut SetForest, + comp: &'a C, + ) -> SetCursor<'a, K, C> { + SetCursor::new(self, forest, comp) + } + + /// Create an iterator traversing this set. The iterator type is `K`. + pub fn iter<'a>(&'a self, forest: &'a SetForest) -> SetIter<'a, K> { + SetIter { + root: self.root, + pool: &forest.nodes, + path: Path::default(), + } + } +} + +impl Default for Set +where + K: Copy, +{ + fn default() -> Self { + Self::new() + } +} + +/// A position in a `Set` used to navigate and modify the ordered set. +/// +/// A cursor always points at an element in the set, or "off the end" which is a position after the +/// last element in the set. +pub struct SetCursor<'a, K, C> +where + K: 'a + Copy, + C: 'a + Comparator, +{ + root: &'a mut PackedOption, + pool: &'a mut NodePool>, + comp: &'a C, + path: Path>, +} + +impl<'a, K, C> SetCursor<'a, K, C> +where + K: Copy, + C: Comparator, +{ + /// Create a cursor with a default (invalid) location. + fn new(container: &'a mut Set, forest: &'a mut SetForest, comp: &'a C) -> Self { + Self { + root: &mut container.root, + pool: &mut forest.nodes, + comp, + path: Path::default(), + } + } + + /// Is this cursor pointing to an empty set? + pub fn is_empty(&self) -> bool { + self.root.is_none() + } + + /// Move cursor to the next element and return it. + /// + /// If the cursor reaches the end, return `None` and leave the cursor at the off-the-end + /// position. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] + pub fn next(&mut self) -> Option { + self.path.next(self.pool).map(|(k, _)| k) + } + + /// Move cursor to the previous element and return it. + /// + /// If the cursor is already pointing at the first element, leave it there and return `None`. + pub fn prev(&mut self) -> Option { + self.root + .expand() + .and_then(|root| self.path.prev(root, self.pool).map(|(k, _)| k)) + } + + /// Get the current element, or `None` if the cursor is at the end. + pub fn elem(&self) -> Option { + self.path + .leaf_pos() + .and_then(|(node, entry)| self.pool[node].unwrap_leaf().0.get(entry).cloned()) + } + + /// Move this cursor to `elem`. + /// + /// If `elem` is in the set, place the cursor at `elem` and return true. + /// + /// If `elem` is not in the set, place the cursor at the next larger element (or the end) and + /// return false. + pub fn goto(&mut self, elem: K) -> bool { + match self.root.expand() { + None => false, + Some(root) => { + if self.path.find(elem, root, self.pool, self.comp).is_some() { + true + } else { + self.path.normalize(self.pool); + false + } + } + } + } + + /// Move this cursor to the first element. + pub fn goto_first(&mut self) -> Option { + self.root.map(|root| self.path.first(root, self.pool).0) + } + + /// Try to insert `elem` into the set and leave the cursor at the inserted element. + /// + /// If the set did not contain `elem`, insert it and return true. + /// + /// If `elem` is already present, don't change the set, place the cursor at `goto(elem)`, and + /// return false. + pub fn insert(&mut self, elem: K) -> bool { + match self.root.expand() { + None => { + let root = self.pool.alloc_node(NodeData::leaf(elem, SetValue())); + *self.root = root.into(); + self.path.set_root_node(root); + true + } + Some(root) => { + // TODO: Optimize the case where `self.path` is already at the correct insert pos. + if self.path.find(elem, root, self.pool, self.comp).is_none() { + *self.root = self.path.insert(elem, SetValue(), self.pool).into(); + true + } else { + false + } + } + } + } + + /// Remove the current element (if any) and return it. + /// This advances the cursor to the next element after the removed one. + pub fn remove(&mut self) -> Option { + let elem = self.elem(); + if elem.is_some() { + *self.root = self.path.remove(self.pool).into(); + } + elem + } +} + +#[cfg(test)] +impl<'a, K, C> SetCursor<'a, K, C> +where + K: Copy + fmt::Display, + C: Comparator, +{ + fn verify(&self) { + self.path.verify(self.pool); + self.root.map(|root| self.pool.verify_tree(root, self.comp)); + } + + /// Get a text version of the path to the current position. + fn tpath(&self) -> String { + use alloc::string::ToString; + self.path.to_string() + } +} + +/// An iterator visiting the elements of a `Set`. +pub struct SetIter<'a, K> +where + K: 'a + Copy, +{ + root: PackedOption, + pool: &'a NodePool>, + path: Path>, +} + +impl<'a, K> Iterator for SetIter<'a, K> +where + K: 'a + Copy, +{ + type Item = K; + + fn next(&mut self) -> Option { + // We use `self.root` to indicate if we need to go to the first element. Reset to `None` + // once we've returned the first element. This also works for an empty tree since the + // `path.next()` call returns `None` when the path is empty. This also fuses the iterator. + match self.root.take() { + Some(root) => Some(self.path.first(root, self.pool).0), + None => self.path.next(self.pool).map(|(k, _)| k), + } + } +} + +#[cfg(test)] +mod tests { + use super::super::NodeData; + use super::*; + use alloc::vec::Vec; + use core::mem; + + #[test] + fn node_size() { + // check that nodes are cache line sized when keys are 32 bits. + type F = SetTypes; + assert_eq!(mem::size_of::>(), 64); + } + + #[test] + fn empty() { + let mut f = SetForest::::new(); + f.clear(); + + let mut s = Set::::new(); + assert!(s.is_empty()); + s.clear(&mut f); + assert!(!s.contains(7, &f, &())); + + // Iterator for an empty set. + assert_eq!(s.iter(&f).next(), None); + + s.retain(&mut f, |_| unreachable!()); + + let mut c = SetCursor::new(&mut s, &mut f, &()); + c.verify(); + assert_eq!(c.elem(), None); + + assert_eq!(c.goto_first(), None); + assert_eq!(c.tpath(), ""); + } + + #[test] + fn simple_cursor() { + let mut f = SetForest::::new(); + let mut s = Set::::new(); + let mut c = SetCursor::new(&mut s, &mut f, &()); + + assert!(c.insert(50)); + c.verify(); + assert_eq!(c.elem(), Some(50)); + + assert!(c.insert(100)); + c.verify(); + assert_eq!(c.elem(), Some(100)); + + assert!(c.insert(10)); + c.verify(); + assert_eq!(c.elem(), Some(10)); + + // Basic movement. + assert_eq!(c.next(), Some(50)); + assert_eq!(c.next(), Some(100)); + assert_eq!(c.next(), None); + assert_eq!(c.next(), None); + assert_eq!(c.prev(), Some(100)); + assert_eq!(c.prev(), Some(50)); + assert_eq!(c.prev(), Some(10)); + assert_eq!(c.prev(), None); + assert_eq!(c.prev(), None); + + assert!(c.goto(50)); + assert_eq!(c.elem(), Some(50)); + assert_eq!(c.remove(), Some(50)); + c.verify(); + + assert_eq!(c.elem(), Some(100)); + assert_eq!(c.remove(), Some(100)); + c.verify(); + assert_eq!(c.elem(), None); + assert_eq!(c.remove(), None); + c.verify(); + } + + #[test] + fn two_level_sparse_tree() { + let mut f = SetForest::::new(); + let mut s = Set::::new(); + let mut c = SetCursor::new(&mut s, &mut f, &()); + + // Insert enough elements that we get a two-level tree. + // Each leaf node holds 8 elements + assert!(c.is_empty()); + for i in 0..50 { + assert!(c.insert(i)); + assert_eq!(c.elem(), Some(i)); + } + assert!(!c.is_empty()); + + assert_eq!(c.goto_first(), Some(0)); + assert_eq!(c.tpath(), "node2[0]--node0[0]"); + + assert_eq!(c.prev(), None); + for i in 1..50 { + assert_eq!(c.next(), Some(i)); + } + assert_eq!(c.next(), None); + for i in (0..50).rev() { + assert_eq!(c.prev(), Some(i)); + } + assert_eq!(c.prev(), None); + + assert!(c.goto(25)); + for i in 25..50 { + assert_eq!(c.remove(), Some(i)); + assert!(!c.is_empty()); + c.verify(); + } + + for i in (0..25).rev() { + assert!(!c.is_empty()); + assert_eq!(c.elem(), None); + assert_eq!(c.prev(), Some(i)); + assert_eq!(c.remove(), Some(i)); + c.verify(); + } + assert_eq!(c.elem(), None); + assert!(c.is_empty()); + } + + #[test] + fn three_level_sparse_tree() { + let mut f = SetForest::::new(); + let mut s = Set::::new(); + let mut c = SetCursor::new(&mut s, &mut f, &()); + + // Insert enough elements that we get a 3-level tree. + // Each leaf node holds 8 elements when filled up sequentially. + // Inner nodes hold 8 node pointers. + assert!(c.is_empty()); + for i in 0..150 { + assert!(c.insert(i)); + assert_eq!(c.elem(), Some(i)); + } + assert!(!c.is_empty()); + + assert!(c.goto(0)); + assert_eq!(c.tpath(), "node11[0]--node2[0]--node0[0]"); + + assert_eq!(c.prev(), None); + for i in 1..150 { + assert_eq!(c.next(), Some(i)); + } + assert_eq!(c.next(), None); + for i in (0..150).rev() { + assert_eq!(c.prev(), Some(i)); + } + assert_eq!(c.prev(), None); + + assert!(c.goto(125)); + for i in 125..150 { + assert_eq!(c.remove(), Some(i)); + assert!(!c.is_empty()); + c.verify(); + } + + for i in (0..125).rev() { + assert!(!c.is_empty()); + assert_eq!(c.elem(), None); + assert_eq!(c.prev(), Some(i)); + assert_eq!(c.remove(), Some(i)); + c.verify(); + } + assert_eq!(c.elem(), None); + assert!(c.is_empty()); + } + + // Generate a densely populated 4-level tree. + // + // Level 1: 1 root + // Level 2: 8 inner + // Level 3: 64 inner + // Level 4: 512 leafs, up to 7680 elements + // + // A 3-level tree can hold at most 960 elements. + fn dense4l(f: &mut SetForest) -> Set { + f.clear(); + let mut s = Set::new(); + + // Insert 400 elements in 7 passes over the range to avoid the half-full leaf node pattern + // that comes from sequential insertion. This will generate a normal leaf layer. + for n in 0..4000 { + assert!(s.insert((n * 7) % 4000, f, &())); + } + s + } + + #[test] + fn four_level() { + let mut f = SetForest::::new(); + let mut s = dense4l(&mut f); + + assert_eq!( + s.iter(&f).collect::>()[0..10], + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + ); + + let mut c = s.cursor(&mut f, &()); + + c.verify(); + + // Peel off a whole sub-tree of the root by deleting from the front. + // The 900 element is near the front of the second sub-tree. + assert!(c.goto(900)); + assert_eq!(c.tpath(), "node48[1]--node47[0]--node26[0]--node20[4]"); + assert!(c.goto(0)); + for i in 0..900 { + assert!(!c.is_empty()); + assert_eq!(c.remove(), Some(i)); + } + c.verify(); + assert_eq!(c.elem(), Some(900)); + + // Delete backwards from somewhere in the middle. + assert!(c.goto(3000)); + for i in (2000..3000).rev() { + assert_eq!(c.prev(), Some(i)); + assert_eq!(c.remove(), Some(i)); + assert_eq!(c.elem(), Some(3000)); + } + c.verify(); + + // Remove everything in a scattered manner, triggering many collapsing patterns. + for i in 0..4000 { + if c.goto((i * 7) % 4000) { + c.remove(); + } + } + assert!(c.is_empty()); + } + + #[test] + fn four_level_clear() { + let mut f = SetForest::::new(); + let mut s = dense4l(&mut f); + s.clear(&mut f); + } +} diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml new file mode 100644 index 0000000000..ce8773b67c --- /dev/null +++ b/cranelift/codegen/Cargo.toml @@ -0,0 +1,74 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-codegen" +version = "0.59.0" +description = "Low-level code generator library" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +categories = ["no-std"] +readme = "README.md" +keywords = ["compile", "compiler", "jit"] +build = "build.rs" +edition = "2018" + +[dependencies] +cranelift-codegen-shared = { path = "./shared", version = "0.59.0" } +cranelift-entity = { path = "../entity", version = "0.59.0" } +cranelift-bforest = { path = "../bforest", version = "0.59.0" } +hashbrown = { version = "0.6", optional = true } +target-lexicon = "0.10" +log = { version = "0.4.6", default-features = false } +serde = { version = "1.0.94", features = ["derive"], optional = true } +gimli = { version = "0.20.0", default-features = false, features = ["write"], optional = true } +smallvec = { version = "1.0.0" } +thiserror = "1.0.4" +byteorder = { version = "1.3.2", default-features = false } +# It is a goal of the cranelift-codegen crate to have minimal external dependencies. +# Please don't add any unless they are essential to the task of creating binary +# machine code. Integration tests that need external dependencies can be +# accomodated in `tests`. + +[build-dependencies] +cranelift-codegen-meta = { path = "meta", version = "0.59.0" } + +[features] +default = ["std", "unwind"] + +# The "std" feature enables use of libstd. The "core" feature enables use +# of some minimal std-like replacement libraries. At least one of these two +# features need to be enabled. +std = [] + +# The "core" features enables use of "hashbrown" since core doesn't have +# a HashMap implementation, and a workaround for Cargo #4866. +core = ["hashbrown"] + +# This enables some additional functions useful for writing tests, but which +# can significantly increase the size of the library. +testing_hooks = [] + +# This enables unwind info generation functionality. +unwind = ["gimli"] + +# ISA targets for which we should build. +# If no ISA targets are explicitly enabled, the ISA target for the host machine is enabled. +x86 = [] +arm32 = [] +arm64 = [] +riscv = [] + +# Option to enable all architectures. +all-arch = [ + "x86", + "arm32", + "arm64", + "riscv" +] + +# For dependent crates that want to serialize some parts of cranelift +enable-serde = ["serde"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/codegen/LICENSE b/cranelift/codegen/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/codegen/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/codegen/README.md b/cranelift/codegen/README.md new file mode 100644 index 0000000000..18b9756aad --- /dev/null +++ b/cranelift/codegen/README.md @@ -0,0 +1,2 @@ +This crate contains the core Cranelift code generator. It translates code from an +intermediate representation into executable machine code. diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs new file mode 100644 index 0000000000..bb14364050 --- /dev/null +++ b/cranelift/codegen/build.rs @@ -0,0 +1,74 @@ +// Build script. +// +// This program is run by Cargo when building cranelift-codegen. It is used to generate Rust code from +// the language definitions in the cranelift-codegen/meta directory. +// +// Environment: +// +// OUT_DIR +// Directory where generated files should be placed. +// +// TARGET +// Target triple provided by Cargo. +// +// The build script expects to be run from the directory where this build.rs file lives. The +// current directory is used to find the sources. + +use cranelift_codegen_meta as meta; + +use std::env; +use std::process; +use std::time::Instant; + +fn main() { + let start_time = Instant::now(); + + let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set"); + let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set"); + + // Configure isa targets cfg. + let isa_targets = meta::isa::Isa::all() + .iter() + .cloned() + .filter(|isa| { + let env_key = format!("CARGO_FEATURE_{}", isa.to_string().to_uppercase()); + env::var(env_key).is_ok() + }) + .collect::>(); + + let isas = if isa_targets.is_empty() { + // Try to match native target. + let target_name = target_triple.split('-').next().unwrap(); + let isa = meta::isa_from_arch(&target_name).expect("error when identifying target"); + println!("cargo:rustc-cfg=feature=\"{}\"", isa); + vec![isa] + } else { + isa_targets + }; + + let cur_dir = env::current_dir().expect("Can't access current working directory"); + let crate_dir = cur_dir.as_path(); + + // Make sure we rebuild if this build script changes (will not happen with + // if the path to this file contains non-UTF-8 bytes). + println!( + "cargo:rerun-if-changed={}", + crate_dir.join("build.rs").to_str().unwrap() + ); + + if let Err(err) = meta::generate(&isas, &out_dir) { + eprintln!("Error: {}", err); + process::exit(1); + } + + if env::var("CRANELIFT_VERBOSE").is_ok() { + for isa in &isas { + println!("cargo:warning=Includes support for {} ISA", isa.to_string()); + } + println!( + "cargo:warning=Build step took {:?}.", + Instant::now() - start_time + ); + println!("cargo:warning=Generated files are in {}", out_dir); + } +} diff --git a/cranelift/codegen/meta/Cargo.toml b/cranelift/codegen/meta/Cargo.toml new file mode 100644 index 0000000000..cc60048144 --- /dev/null +++ b/cranelift/codegen/meta/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "cranelift-codegen-meta" +authors = ["The Cranelift Project Developers"] +version = "0.59.0" +description = "Metaprogram for cranelift-codegen code generator library" +license = "Apache-2.0 WITH LLVM-exception" +repository = "https://github.com/bytecodealliance/cranelift" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-codegen-shared = { path = "../shared", version = "0.59.0" } +cranelift-entity = { path = "../../entity", version = "0.59.0" } + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } + +[package.metadata.docs.rs] +rustdoc-args = [ "--document-private-items" ] diff --git a/cranelift/codegen/meta/LICENSE b/cranelift/codegen/meta/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/codegen/meta/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/codegen/meta/README.md b/cranelift/codegen/meta/README.md new file mode 100644 index 0000000000..c0c8648b29 --- /dev/null +++ b/cranelift/codegen/meta/README.md @@ -0,0 +1,2 @@ +This crate contains the metaprogram used by cranelift-codegen. It's not +useful on its own. diff --git a/cranelift/codegen/meta/src/cdsl/ast.rs b/cranelift/codegen/meta/src/cdsl/ast.rs new file mode 100644 index 0000000000..82cdbad762 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/ast.rs @@ -0,0 +1,753 @@ +use crate::cdsl::instructions::{InstSpec, Instruction, InstructionPredicate}; +use crate::cdsl::operands::{OperandKind, OperandKindFields}; +use crate::cdsl::types::ValueType; +use crate::cdsl::typevar::{TypeSetBuilder, TypeVar}; + +use cranelift_entity::{entity_impl, PrimaryMap, SparseMap, SparseMapValue}; + +use std::fmt; +use std::iter::IntoIterator; + +pub(crate) enum Expr { + Var(VarIndex), + Literal(Literal), +} + +impl Expr { + pub fn maybe_literal(&self) -> Option<&Literal> { + match &self { + Expr::Literal(lit) => Some(lit), + _ => None, + } + } + + pub fn maybe_var(&self) -> Option { + if let Expr::Var(var) = &self { + Some(*var) + } else { + None + } + } + + pub fn unwrap_var(&self) -> VarIndex { + self.maybe_var() + .expect("tried to unwrap a non-Var content in Expr::unwrap_var") + } + + pub fn to_rust_code(&self, var_pool: &VarPool) -> String { + match self { + Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(), + Expr::Literal(literal) => literal.to_rust_code(), + } + } +} + +/// An AST definition associates a set of variables with the values produced by an expression. +pub(crate) struct Def { + pub apply: Apply, + pub defined_vars: Vec, +} + +impl Def { + pub fn to_comment_string(&self, var_pool: &VarPool) -> String { + let results = self + .defined_vars + .iter() + .map(|&x| var_pool.get(x).name.as_str()) + .collect::>(); + + let results = if results.len() == 1 { + results[0].to_string() + } else { + format!("({})", results.join(", ")) + }; + + format!("{} := {}", results, self.apply.to_comment_string(var_pool)) + } +} + +pub(crate) struct DefPool { + pool: PrimaryMap, +} + +impl DefPool { + pub fn new() -> Self { + Self { + pool: PrimaryMap::new(), + } + } + pub fn get(&self, index: DefIndex) -> &Def { + self.pool.get(index).unwrap() + } + pub fn next_index(&self) -> DefIndex { + self.pool.next_key() + } + pub fn create_inst(&mut self, apply: Apply, defined_vars: Vec) -> DefIndex { + self.pool.push(Def { + apply, + defined_vars, + }) + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct DefIndex(u32); +entity_impl!(DefIndex); + +/// A definition which would lead to generate a block creation. +#[derive(Clone)] +pub(crate) struct Block { + /// Instruction index after which the block entry is set. + pub location: DefIndex, + /// Variable holding the new created block. + pub name: VarIndex, +} + +pub(crate) struct BlockPool { + pool: SparseMap, +} + +impl SparseMapValue for Block { + fn key(&self) -> DefIndex { + self.location + } +} + +impl BlockPool { + pub fn new() -> Self { + Self { + pool: SparseMap::new(), + } + } + pub fn get(&self, index: DefIndex) -> Option<&Block> { + self.pool.get(index) + } + pub fn create_block(&mut self, name: VarIndex, location: DefIndex) { + if self.pool.contains_key(location) { + panic!("Attempt to insert 2 blocks after the same instruction") + } + self.pool.insert(Block { location, name }); + } + pub fn is_empty(&self) -> bool { + self.pool.is_empty() + } +} + +// Implement IntoIterator such that we can iterate over blocks which are in the block pool. +impl<'a> IntoIterator for &'a BlockPool { + type Item = <&'a SparseMap as IntoIterator>::Item; + type IntoIter = <&'a SparseMap as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.pool.into_iter() + } +} + +#[derive(Clone, Debug)] +pub(crate) enum Literal { + /// A value of an enumerated immediate operand. + /// + /// Some immediate operand kinds like `intcc` and `floatcc` have an enumerated range of values + /// corresponding to a Rust enum type. An `Enumerator` object is an AST leaf node representing one + /// of the values. + Enumerator { + rust_type: &'static str, + value: &'static str, + }, + + /// A bitwise value of an immediate operand, used for bitwise exact floating point constants. + Bits { rust_type: &'static str, value: u64 }, + + /// A value of an integer immediate operand. + Int(i64), + + /// A empty list of variable set of arguments. + EmptyVarArgs, +} + +impl Literal { + pub fn enumerator_for(kind: &OperandKind, value: &'static str) -> Self { + let value = match &kind.fields { + OperandKindFields::ImmEnum(values) => values.get(value).unwrap_or_else(|| { + panic!( + "nonexistent value '{}' in enumeration '{}'", + value, kind.rust_type + ) + }), + _ => panic!("enumerator is for enum values"), + }; + Literal::Enumerator { + rust_type: kind.rust_type, + value, + } + } + + pub fn bits(kind: &OperandKind, bits: u64) -> Self { + match kind.fields { + OperandKindFields::ImmValue => {} + _ => panic!("bits_of is for immediate scalar types"), + } + Literal::Bits { + rust_type: kind.rust_type, + value: bits, + } + } + + pub fn constant(kind: &OperandKind, value: i64) -> Self { + match kind.fields { + OperandKindFields::ImmValue => {} + _ => panic!("constant is for immediate scalar types"), + } + Literal::Int(value) + } + + pub fn empty_vararg() -> Self { + Literal::EmptyVarArgs + } + + pub fn to_rust_code(&self) -> String { + match self { + Literal::Enumerator { rust_type, value } => format!("{}::{}", rust_type, value), + Literal::Bits { rust_type, value } => format!("{}::with_bits({:#x})", rust_type, value), + Literal::Int(val) => val.to_string(), + Literal::EmptyVarArgs => "&[]".into(), + } + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum PatternPosition { + Source, + Destination, +} + +/// A free variable. +/// +/// When variables are used in `XForms` with source and destination patterns, they are classified +/// as follows: +/// +/// Input values: Uses in the source pattern with no preceding def. These may appear as inputs in +/// the destination pattern too, but no new inputs can be introduced. +/// +/// Output values: Variables that are defined in both the source and destination pattern. These +/// values may have uses outside the source pattern, and the destination pattern must compute the +/// same value. +/// +/// Intermediate values: Values that are defined in the source pattern, but not in the destination +/// pattern. These may have uses outside the source pattern, so the defining instruction can't be +/// deleted immediately. +/// +/// Temporary values are defined only in the destination pattern. +pub(crate) struct Var { + pub name: String, + + /// The `Def` defining this variable in a source pattern. + pub src_def: Option, + + /// The `Def` defining this variable in a destination pattern. + pub dst_def: Option, + + /// TypeVar representing the type of this variable. + type_var: Option, + + /// Is this the original type variable, or has it be redefined with set_typevar? + is_original_type_var: bool, +} + +impl Var { + fn new(name: String) -> Self { + Self { + name, + src_def: None, + dst_def: None, + type_var: None, + is_original_type_var: false, + } + } + + /// Is this an input value to the src pattern? + pub fn is_input(&self) -> bool { + self.src_def.is_none() && self.dst_def.is_none() + } + + /// Is this an output value, defined in both src and dst patterns? + pub fn is_output(&self) -> bool { + self.src_def.is_some() && self.dst_def.is_some() + } + + /// Is this an intermediate value, defined only in the src pattern? + pub fn is_intermediate(&self) -> bool { + self.src_def.is_some() && self.dst_def.is_none() + } + + /// Is this a temp value, defined only in the dst pattern? + pub fn is_temp(&self) -> bool { + self.src_def.is_none() && self.dst_def.is_some() + } + + /// Get the def of this variable according to the position. + pub fn get_def(&self, position: PatternPosition) -> Option { + match position { + PatternPosition::Source => self.src_def, + PatternPosition::Destination => self.dst_def, + } + } + + pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) { + assert!( + self.get_def(position).is_none(), + format!("redefinition of variable {}", self.name) + ); + match position { + PatternPosition::Source => { + self.src_def = Some(def); + } + PatternPosition::Destination => { + self.dst_def = Some(def); + } + } + } + + /// Get the type variable representing the type of this variable. + pub fn get_or_create_typevar(&mut self) -> TypeVar { + match &self.type_var { + Some(tv) => tv.clone(), + None => { + // Create a new type var in which we allow all types. + let tv = TypeVar::new( + format!("typeof_{}", self.name), + format!("Type of the pattern variable {:?}", self), + TypeSetBuilder::all(), + ); + self.type_var = Some(tv.clone()); + self.is_original_type_var = true; + tv + } + } + } + pub fn get_typevar(&self) -> Option { + self.type_var.clone() + } + pub fn set_typevar(&mut self, tv: TypeVar) { + self.is_original_type_var = if let Some(previous_tv) = &self.type_var { + *previous_tv == tv + } else { + false + }; + self.type_var = Some(tv); + } + + /// Check if this variable has a free type variable. If not, the type of this variable is + /// computed from the type of another variable. + pub fn has_free_typevar(&self) -> bool { + match &self.type_var { + Some(tv) => tv.base.is_none() && self.is_original_type_var, + None => false, + } + } + + pub fn to_rust_code(&self) -> String { + self.name.clone() + } + fn rust_type(&self) -> String { + self.type_var.as_ref().unwrap().to_rust_code() + } +} + +impl fmt::Debug for Var { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt.write_fmt(format_args!( + "Var({}{}{})", + self.name, + if self.src_def.is_some() { ", src" } else { "" }, + if self.dst_def.is_some() { ", dst" } else { "" } + )) + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct VarIndex(u32); +entity_impl!(VarIndex); + +pub(crate) struct VarPool { + pool: PrimaryMap, +} + +impl VarPool { + pub fn new() -> Self { + Self { + pool: PrimaryMap::new(), + } + } + pub fn get(&self, index: VarIndex) -> &Var { + self.pool.get(index).unwrap() + } + pub fn get_mut(&mut self, index: VarIndex) -> &mut Var { + self.pool.get_mut(index).unwrap() + } + pub fn create(&mut self, name: impl Into) -> VarIndex { + self.pool.push(Var::new(name.into())) + } +} + +/// Contains constants created in the AST that must be inserted into the true [ConstantPool] when +/// the legalizer code is generated. The constant data is named in the order it is inserted; +/// inserting data using [insert] will avoid duplicates. +/// +/// [ConstantPool]: ../../../cranelift_codegen/ir/constant/struct.ConstantPool.html +/// [insert]: ConstPool::insert +pub(crate) struct ConstPool { + pool: Vec>, +} + +impl ConstPool { + /// Create an empty constant pool. + pub fn new() -> Self { + Self { pool: vec![] } + } + + /// Create a name for a constant from its position in the pool. + fn create_name(position: usize) -> String { + format!("const{}", position) + } + + /// Insert constant data into the pool, returning the name of the variable used to reference it. + /// This method will search for data that matches the new data and return the existing constant + /// name to avoid duplicates. + pub fn insert(&mut self, data: Vec) -> String { + let possible_position = self.pool.iter().position(|d| d == &data); + let position = if let Some(found_position) = possible_position { + found_position + } else { + let new_position = self.pool.len(); + self.pool.push(data); + new_position + }; + ConstPool::create_name(position) + } + + /// Iterate over the name/value pairs in the pool. + pub fn iter(&self) -> impl Iterator)> { + self.pool + .iter() + .enumerate() + .map(|(i, v)| (ConstPool::create_name(i), v)) + } +} + +/// Apply an instruction to arguments. +/// +/// An `Apply` AST expression is created by using function call syntax on instructions. This +/// applies to both bound and unbound polymorphic instructions. +pub(crate) struct Apply { + pub inst: Instruction, + pub args: Vec, + pub value_types: Vec, +} + +impl Apply { + pub fn new(target: InstSpec, args: Vec) -> Self { + let (inst, value_types) = match target { + InstSpec::Inst(inst) => (inst, Vec::new()), + InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types), + }; + + // Apply should only operate on concrete value types, not "any". + let value_types = value_types + .into_iter() + .map(|vt| vt.expect("shouldn't be Any")) + .collect(); + + // Basic check on number of arguments. + assert!( + inst.operands_in.len() == args.len(), + format!("incorrect number of arguments in instruction {}", inst.name) + ); + + // Check that the kinds of Literals arguments match the expected operand. + for &imm_index in &inst.imm_opnums { + let arg = &args[imm_index]; + if let Some(literal) = arg.maybe_literal() { + let op = &inst.operands_in[imm_index]; + match &op.kind.fields { + OperandKindFields::ImmEnum(values) => { + if let Literal::Enumerator { value, .. } = literal { + assert!( + values.iter().any(|(_key, v)| v == value), + "Nonexistent enum value '{}' passed to field of kind '{}' -- \ + did you use the right enum?", + value, + op.kind.rust_type + ); + } else { + panic!( + "Passed non-enum field value {:?} to field of kind {}", + literal, op.kind.rust_type + ); + } + } + OperandKindFields::ImmValue => match &literal { + Literal::Enumerator { value, .. } => panic!( + "Expected immediate value in immediate field of kind '{}', \ + obtained enum value '{}'", + op.kind.rust_type, value + ), + Literal::Bits { .. } | Literal::Int(_) | Literal::EmptyVarArgs => {} + }, + _ => { + panic!( + "Literal passed to non-literal field of kind {}", + op.kind.rust_type + ); + } + } + } + } + + Self { + inst, + args, + value_types, + } + } + + fn to_comment_string(&self, var_pool: &VarPool) -> String { + let args = self + .args + .iter() + .map(|arg| arg.to_rust_code(var_pool)) + .collect::>() + .join(", "); + + let mut inst_and_bound_types = vec![self.inst.name.to_string()]; + inst_and_bound_types.extend(self.value_types.iter().map(|vt| vt.to_string())); + let inst_name = inst_and_bound_types.join("."); + + format!("{}({})", inst_name, args) + } + + pub fn inst_predicate(&self, var_pool: &VarPool) -> InstructionPredicate { + let mut pred = InstructionPredicate::new(); + for (format_field, &op_num) in self + .inst + .format + .imm_fields + .iter() + .zip(self.inst.imm_opnums.iter()) + { + let arg = &self.args[op_num]; + if arg.maybe_var().is_some() { + // Ignore free variables for now. + continue; + } + pred = pred.and(InstructionPredicate::new_is_field_equal_ast( + &*self.inst.format, + format_field, + arg.to_rust_code(var_pool), + )); + } + + // Add checks for any bound secondary type variables. We can't check the controlling type + // variable this way since it may not appear as the type of an operand. + if self.value_types.len() > 1 { + let poly = self + .inst + .polymorphic_info + .as_ref() + .expect("must have polymorphic info if it has bounded types"); + for (bound_type, type_var) in + self.value_types[1..].iter().zip(poly.other_typevars.iter()) + { + pred = pred.and(InstructionPredicate::new_typevar_check( + &self.inst, type_var, bound_type, + )); + } + } + + pred + } + + /// Same as `inst_predicate()`, but also check the controlling type variable. + pub fn inst_predicate_with_ctrl_typevar(&self, var_pool: &VarPool) -> InstructionPredicate { + let mut pred = self.inst_predicate(var_pool); + + if !self.value_types.is_empty() { + let bound_type = &self.value_types[0]; + let poly = self.inst.polymorphic_info.as_ref().unwrap(); + let type_check = if poly.use_typevar_operand { + InstructionPredicate::new_typevar_check(&self.inst, &poly.ctrl_typevar, bound_type) + } else { + InstructionPredicate::new_ctrl_typevar_check(&bound_type) + }; + pred = pred.and(type_check); + } + + pred + } + + pub fn rust_builder(&self, defined_vars: &[VarIndex], var_pool: &VarPool) -> String { + let mut args = self + .args + .iter() + .map(|expr| expr.to_rust_code(var_pool)) + .collect::>() + .join(", "); + + // Do we need to pass an explicit type argument? + if let Some(poly) = &self.inst.polymorphic_info { + if !poly.use_typevar_operand { + args = format!("{}, {}", var_pool.get(defined_vars[0]).rust_type(), args); + } + } + + format!("{}({})", self.inst.snake_name(), args) + } +} + +// Simple helpers for legalize actions construction. + +pub(crate) enum DummyExpr { + Var(DummyVar), + Literal(Literal), + Constant(DummyConstant), + Apply(InstSpec, Vec), + Block(DummyVar), +} + +#[derive(Clone)] +pub(crate) struct DummyVar { + pub name: String, +} + +impl Into for DummyVar { + fn into(self) -> DummyExpr { + DummyExpr::Var(self) + } +} +impl Into for Literal { + fn into(self) -> DummyExpr { + DummyExpr::Literal(self) + } +} + +#[derive(Clone)] +pub(crate) struct DummyConstant(pub(crate) Vec); + +pub(crate) fn constant(data: Vec) -> DummyConstant { + DummyConstant(data) +} + +impl Into for DummyConstant { + fn into(self) -> DummyExpr { + DummyExpr::Constant(self) + } +} + +pub(crate) fn var(name: &str) -> DummyVar { + DummyVar { + name: name.to_owned(), + } +} + +pub(crate) struct DummyDef { + pub expr: DummyExpr, + pub defined_vars: Vec, +} + +pub(crate) struct ExprBuilder { + expr: DummyExpr, +} + +impl ExprBuilder { + pub fn apply(inst: InstSpec, args: Vec) -> Self { + let expr = DummyExpr::Apply(inst, args); + Self { expr } + } + + pub fn assign_to(self, defined_vars: Vec) -> DummyDef { + DummyDef { + expr: self.expr, + defined_vars, + } + } + + pub fn block(name: DummyVar) -> Self { + let expr = DummyExpr::Block(name); + Self { expr } + } +} + +macro_rules! def_rhs { + // inst(a, b, c) + ($inst:ident($($src:expr),*)) => { + ExprBuilder::apply($inst.into(), vec![$($src.clone().into()),*]) + }; + + // inst.type(a, b, c) + ($inst:ident.$type:ident($($src:expr),*)) => { + ExprBuilder::apply($inst.bind($type).into(), vec![$($src.clone().into()),*]) + }; +} + +// Helper macro to define legalization recipes. +macro_rules! def { + // x = ... + ($dest:ident = $($tt:tt)*) => { + def_rhs!($($tt)*).assign_to(vec![$dest.clone()]) + }; + + // (x, y, ...) = ... + (($($dest:ident),*) = $($tt:tt)*) => { + def_rhs!($($tt)*).assign_to(vec![$($dest.clone()),*]) + }; + + // An instruction with no results. + ($($tt:tt)*) => { + def_rhs!($($tt)*).assign_to(Vec::new()) + } +} + +// Helper macro to define legalization recipes. +macro_rules! block { + // a basic block definition, splitting the current block in 2. + ($block: ident) => { + ExprBuilder::block($block).assign_to(Vec::new()) + }; +} + +#[cfg(test)] +mod tests { + use crate::cdsl::ast::ConstPool; + + #[test] + fn const_pool_returns_var_names() { + let mut c = ConstPool::new(); + assert_eq!(c.insert([0, 1, 2].to_vec()), "const0"); + assert_eq!(c.insert([1, 2, 3].to_vec()), "const1"); + } + + #[test] + fn const_pool_avoids_duplicates() { + let data = [0, 1, 2].to_vec(); + let mut c = ConstPool::new(); + assert_eq!(c.pool.len(), 0); + + assert_eq!(c.insert(data.clone()), "const0"); + assert_eq!(c.pool.len(), 1); + + assert_eq!(c.insert(data), "const0"); + assert_eq!(c.pool.len(), 1); + } + + #[test] + fn const_pool_iterates() { + let mut c = ConstPool::new(); + c.insert([0, 1, 2].to_vec()); + c.insert([3, 4, 5].to_vec()); + + let mut iter = c.iter(); + assert_eq!(iter.next(), Some(("const0".to_owned(), &vec![0, 1, 2]))); + assert_eq!(iter.next(), Some(("const1".to_owned(), &vec![3, 4, 5]))); + assert_eq!(iter.next(), None); + } +} diff --git a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs new file mode 100644 index 0000000000..7d119b00ce --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs @@ -0,0 +1,88 @@ +use std::collections::{hash_map, HashMap, HashSet}; +use std::iter::FromIterator; + +use crate::cdsl::encodings::Encoding; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::cdsl::xform::{TransformGroup, TransformGroupIndex}; + +pub(crate) struct CpuMode { + pub name: &'static str, + default_legalize: Option, + monomorphic_legalize: Option, + typed_legalize: HashMap, + pub encodings: Vec, +} + +impl CpuMode { + pub fn new(name: &'static str) -> Self { + Self { + name, + default_legalize: None, + monomorphic_legalize: None, + typed_legalize: HashMap::new(), + encodings: Vec::new(), + } + } + + pub fn set_encodings(&mut self, encodings: Vec) { + assert!(self.encodings.is_empty(), "clobbering encodings"); + self.encodings = encodings; + } + + pub fn legalize_monomorphic(&mut self, group: &TransformGroup) { + assert!(self.monomorphic_legalize.is_none()); + self.monomorphic_legalize = Some(group.id); + } + pub fn legalize_default(&mut self, group: &TransformGroup) { + assert!(self.default_legalize.is_none()); + self.default_legalize = Some(group.id); + } + pub fn legalize_value_type(&mut self, lane_type: impl Into, group: &TransformGroup) { + assert!(self + .typed_legalize + .insert(lane_type.into(), group.id) + .is_none()); + } + pub fn legalize_type(&mut self, lane_type: impl Into, group: &TransformGroup) { + assert!(self + .typed_legalize + .insert(lane_type.into().into(), group.id) + .is_none()); + } + + pub fn get_default_legalize_code(&self) -> TransformGroupIndex { + self.default_legalize + .expect("a finished CpuMode must have a default legalize code") + } + pub fn get_legalize_code_for(&self, typ: &Option) -> TransformGroupIndex { + match typ { + Some(typ) => self + .typed_legalize + .get(typ) + .copied() + .unwrap_or_else(|| self.get_default_legalize_code()), + None => self + .monomorphic_legalize + .unwrap_or_else(|| self.get_default_legalize_code()), + } + } + pub fn get_legalized_types(&self) -> hash_map::Keys { + self.typed_legalize.keys() + } + + /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly + /// reachable set of TransformGroup this TargetIsa uses. + pub fn direct_transform_groups(&self) -> Vec { + let mut set = HashSet::new(); + if let Some(i) = &self.default_legalize { + set.insert(*i); + } + if let Some(i) = &self.monomorphic_legalize { + set.insert(*i); + } + set.extend(self.typed_legalize.values().cloned()); + let mut ret = Vec::from_iter(set); + ret.sort(); + ret + } +} diff --git a/cranelift/codegen/meta/src/cdsl/encodings.rs b/cranelift/codegen/meta/src/cdsl/encodings.rs new file mode 100644 index 0000000000..f66746f92f --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/encodings.rs @@ -0,0 +1,179 @@ +use crate::cdsl::instructions::{ + InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode, + InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny, +}; +use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::SettingPredicateNumber; +use crate::cdsl::types::ValueType; +use std::rc::Rc; +use std::string::ToString; + +/// Encoding for a concrete instruction. +/// +/// An `Encoding` object ties an instruction opcode with concrete type variables together with an +/// encoding recipe and encoding encbits. +/// +/// The concrete instruction can be in three different forms: +/// +/// 1. A naked opcode: `trap` for non-polymorphic instructions. +/// 2. With bound type variables: `iadd.i32` for polymorphic instructions. +/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`. +/// +/// If the instruction is polymorphic, all type variables must be provided. +pub(crate) struct EncodingContent { + /// The `Instruction` or `BoundInstruction` being encoded. + inst: InstSpec, + + /// The `EncodingRecipe` to use. + pub recipe: EncodingRecipeNumber, + + /// Additional encoding bits to be interpreted by `recipe`. + pub encbits: u16, + + /// An instruction predicate that must be true to allow selecting this encoding. + pub inst_predicate: Option, + + /// An ISA predicate that must be true to allow selecting this encoding. + pub isa_predicate: Option, + + /// The value type this encoding has been bound to, for encodings of polymorphic instructions. + pub bound_type: Option, +} + +impl EncodingContent { + pub fn inst(&self) -> &Instruction { + self.inst.inst() + } + pub fn to_rust_comment(&self, recipes: &Recipes) -> String { + format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits) + } +} + +pub(crate) type Encoding = Rc; + +pub(crate) struct EncodingBuilder { + inst: InstSpec, + recipe: EncodingRecipeNumber, + encbits: u16, + inst_predicate: Option, + isa_predicate: Option, + bound_type: Option, +} + +impl EncodingBuilder { + pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self { + let (inst_predicate, bound_type) = match &inst { + InstSpec::Bound(inst) => { + let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars; + + assert_eq!( + inst.value_types.len(), + other_typevars.len() + 1, + "partially bound polymorphic instruction" + ); + + // Add secondary type variables to the instruction predicate. + let value_types = &inst.value_types; + let mut inst_predicate: Option = None; + for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) { + let value_type = match value_type { + ValueTypeOrAny::Any => continue, + ValueTypeOrAny::ValueType(vt) => vt, + }; + let type_predicate = + InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type); + inst_predicate = Some(type_predicate.into()); + } + + // Add immediate value predicates + for (immediate_value, immediate_operand) in inst + .immediate_values + .iter() + .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate())) + { + let immediate_predicate = InstructionPredicate::new_is_field_equal( + &inst.inst.format, + immediate_operand.kind.rust_field_name, + immediate_value.to_string(), + ); + inst_predicate = if let Some(type_predicate) = inst_predicate { + Some(type_predicate.and(immediate_predicate)) + } else { + Some(immediate_predicate.into()) + } + } + + let ctrl_type = value_types[0] + .clone() + .expect("Controlling type shouldn't be Any"); + (inst_predicate, Some(ctrl_type)) + } + + InstSpec::Inst(inst) => { + assert!( + inst.polymorphic_info.is_none(), + "unbound polymorphic instruction" + ); + (None, None) + } + }; + + Self { + inst, + recipe, + encbits, + inst_predicate, + isa_predicate: None, + bound_type, + } + } + + pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self { + let inst_predicate = Some(match self.inst_predicate { + Some(node) => node.and(inst_predicate), + None => inst_predicate.into(), + }); + self.inst_predicate = inst_predicate; + self + } + + pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self { + assert!(self.isa_predicate.is_none()); + self.isa_predicate = Some(isa_predicate); + self + } + + pub fn build( + self, + recipes: &Recipes, + inst_pred_reg: &mut InstructionPredicateRegistry, + ) -> Encoding { + let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred)); + + let inst = self.inst.inst(); + assert!( + Rc::ptr_eq(&inst.format, &recipes[self.recipe].format), + format!( + "Inst {} and recipe {} must have the same format!", + inst.name, recipes[self.recipe].name + ) + ); + + assert_eq!( + inst.is_branch && !inst.is_indirect_branch, + recipes[self.recipe].branch_range.is_some(), + "Inst {}'s is_branch contradicts recipe {} branch_range!", + inst.name, + recipes[self.recipe].name + ); + + Rc::new(EncodingContent { + inst: self.inst, + recipe: self.recipe, + encbits: self.encbits, + inst_predicate, + isa_predicate: self.isa_predicate, + bound_type: self.bound_type, + }) + } +} diff --git a/cranelift/codegen/meta/src/cdsl/formats.rs b/cranelift/codegen/meta/src/cdsl/formats.rs new file mode 100644 index 0000000000..e713a8bccb --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/formats.rs @@ -0,0 +1,171 @@ +use crate::cdsl::operands::OperandKind; +use std::fmt; +use std::rc::Rc; + +/// An immediate field in an instruction format. +/// +/// This corresponds to a single member of a variant of the `InstructionData` +/// data type. +#[derive(Debug)] +pub(crate) struct FormatField { + /// Immediate operand kind. + pub kind: OperandKind, + + /// Member name in InstructionData variant. + pub member: &'static str, +} + +/// Every instruction opcode has a corresponding instruction format which determines the number of +/// operands and their kinds. Instruction formats are identified structurally, i.e., the format of +/// an instruction is derived from the kinds of operands used in its declaration. +/// +/// The instruction format stores two separate lists of operands: Immediates and values. Immediate +/// operands (including entity references) are represented as explicit members in the +/// `InstructionData` variants. The value operands are stored differently, depending on how many +/// there are. Beyond a certain point, instruction formats switch to an external value list for +/// storing value arguments. Value lists can hold an arbitrary number of values. +/// +/// All instruction formats must be predefined in the meta shared/formats.rs module. +#[derive(Debug)] +pub(crate) struct InstructionFormat { + /// Instruction format name in CamelCase. This is used as a Rust variant name in both the + /// `InstructionData` and `InstructionFormat` enums. + pub name: &'static str, + + pub num_value_operands: usize, + + pub has_value_list: bool, + + pub imm_fields: Vec, + + /// Index of the value input operand that is used to infer the controlling type variable. By + /// default, this is `0`, the first `value` operand. The index is relative to the values only, + /// ignoring immediate operands. + pub typevar_operand: Option, +} + +/// A tuple serving as a key to deduplicate InstructionFormat. +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct FormatStructure { + pub num_value_operands: usize, + pub has_value_list: bool, + /// Tuples of (Rust field name / Rust type) for each immediate field. + pub imm_field_names: Vec<(&'static str, &'static str)>, +} + +impl fmt::Display for InstructionFormat { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let imm_args = self + .imm_fields + .iter() + .map(|field| format!("{}: {}", field.member, field.kind.rust_type)) + .collect::>() + .join(", "); + fmt.write_fmt(format_args!( + "{}(imms=({}), vals={})", + self.name, imm_args, self.num_value_operands + ))?; + Ok(()) + } +} + +impl InstructionFormat { + pub fn imm_by_name(&self, name: &'static str) -> &FormatField { + self.imm_fields + .iter() + .find(|&field| field.member == name) + .unwrap_or_else(|| { + panic!( + "unexpected immediate field named {} in instruction format {}", + name, self.name + ) + }) + } + + /// Returns a tuple that uniquely identifies the structure. + pub fn structure(&self) -> FormatStructure { + FormatStructure { + num_value_operands: self.num_value_operands, + has_value_list: self.has_value_list, + imm_field_names: self + .imm_fields + .iter() + .map(|field| (field.kind.rust_field_name, field.kind.rust_type)) + .collect::>(), + } + } +} + +pub(crate) struct InstructionFormatBuilder { + name: &'static str, + num_value_operands: usize, + has_value_list: bool, + imm_fields: Vec, + typevar_operand: Option, +} + +impl InstructionFormatBuilder { + pub fn new(name: &'static str) -> Self { + Self { + name, + num_value_operands: 0, + has_value_list: false, + imm_fields: Vec::new(), + typevar_operand: None, + } + } + + pub fn value(mut self) -> Self { + self.num_value_operands += 1; + self + } + + pub fn varargs(mut self) -> Self { + self.has_value_list = true; + self + } + + pub fn imm(mut self, operand_kind: &OperandKind) -> Self { + let field = FormatField { + kind: operand_kind.clone(), + member: operand_kind.rust_field_name, + }; + self.imm_fields.push(field); + self + } + + pub fn imm_with_name(mut self, member: &'static str, operand_kind: &OperandKind) -> Self { + let field = FormatField { + kind: operand_kind.clone(), + member, + }; + self.imm_fields.push(field); + self + } + + pub fn typevar_operand(mut self, operand_index: usize) -> Self { + assert!(self.typevar_operand.is_none()); + assert!(self.has_value_list || operand_index < self.num_value_operands); + self.typevar_operand = Some(operand_index); + self + } + + pub fn build(self) -> Rc { + let typevar_operand = if self.typevar_operand.is_some() { + self.typevar_operand + } else if self.has_value_list || self.num_value_operands > 0 { + // Default to the first value operand, if there's one. + Some(0) + } else { + None + }; + + Rc::new(InstructionFormat { + name: self.name, + num_value_operands: self.num_value_operands, + has_value_list: self.has_value_list, + imm_fields: self.imm_fields, + typevar_operand, + }) + } +} diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs new file mode 100644 index 0000000000..d8d9c81466 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -0,0 +1,1385 @@ +use cranelift_codegen_shared::condcodes::IntCC; +use cranelift_entity::{entity_impl, PrimaryMap}; + +use std::collections::HashMap; +use std::fmt; +use std::fmt::{Display, Error, Formatter}; +use std::rc::Rc; + +use crate::cdsl::camel_case; +use crate::cdsl::formats::{FormatField, InstructionFormat}; +use crate::cdsl::operands::Operand; +use crate::cdsl::type_inference::Constraint; +use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType}; +use crate::cdsl::typevar::TypeVar; + +use crate::shared::formats::Formats; +use crate::shared::types::{Bool, Float, Int, Reference}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct OpcodeNumber(u32); +entity_impl!(OpcodeNumber); + +pub(crate) type AllInstructions = PrimaryMap; + +pub(crate) struct InstructionGroupBuilder<'all_inst> { + all_instructions: &'all_inst mut AllInstructions, + own_instructions: Vec, +} + +impl<'all_inst> InstructionGroupBuilder<'all_inst> { + pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self { + Self { + all_instructions, + own_instructions: Vec::new(), + } + } + + pub fn push(&mut self, builder: InstructionBuilder) { + let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32()); + let inst = builder.build(opcode_number); + // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent. + self.own_instructions.push(inst.clone()); + self.all_instructions.push(inst); + } + + pub fn build(self) -> InstructionGroup { + InstructionGroup { + instructions: self.own_instructions, + } + } +} + +/// Every instruction must belong to exactly one instruction group. A given +/// target architecture can support instructions from multiple groups, and it +/// does not necessarily support all instructions in a group. +pub(crate) struct InstructionGroup { + instructions: Vec, +} + +impl InstructionGroup { + pub fn by_name(&self, name: &'static str) -> &Instruction { + self.instructions + .iter() + .find(|inst| inst.name == name) + .unwrap_or_else(|| panic!("unexisting instruction with name {}", name)) + } +} + +/// Instructions can have parameters bound to them to specialize them for more specific encodings +/// (e.g. the encoding for adding two float types may be different than that of adding two +/// integer types) +pub(crate) trait Bindable { + /// Bind a parameter to an instruction + fn bind(&self, parameter: impl Into) -> BoundInstruction; +} + +#[derive(Debug)] +pub(crate) struct PolymorphicInfo { + pub use_typevar_operand: bool, + pub ctrl_typevar: TypeVar, + pub other_typevars: Vec, +} + +#[derive(Debug)] +pub(crate) struct InstructionContent { + /// Instruction mnemonic, also becomes opcode name. + pub name: String, + pub camel_name: String, + pub opcode_number: OpcodeNumber, + + /// Documentation string. + pub doc: String, + + /// Input operands. This can be a mix of SSA value operands and other operand kinds. + pub operands_in: Vec, + /// Output operands. The output operands must be SSA values or `variable_args`. + pub operands_out: Vec, + /// Instruction-specific TypeConstraints. + pub constraints: Vec, + + /// Instruction format, automatically derived from the input operands. + pub format: Rc, + + /// One of the input or output operands is a free type variable. None if the instruction is not + /// polymorphic, set otherwise. + pub polymorphic_info: Option, + + /// Indices in operands_in of input operands that are values. + pub value_opnums: Vec, + /// Indices in operands_in of input operands that are immediates or entities. + pub imm_opnums: Vec, + /// Indices in operands_out of output operands that are values. + pub value_results: Vec, + + /// True for instructions that terminate the block. + pub is_terminator: bool, + /// True for all branch or jump instructions. + pub is_branch: bool, + /// True for all indirect branch or jump instructions.', + pub is_indirect_branch: bool, + /// Is this a call instruction? + pub is_call: bool, + /// Is this a return instruction? + pub is_return: bool, + /// Is this a ghost instruction? + pub is_ghost: bool, + /// Can this instruction read from memory? + pub can_load: bool, + /// Can this instruction write to memory? + pub can_store: bool, + /// Can this instruction cause a trap? + pub can_trap: bool, + /// Does this instruction have other side effects besides can_* flags? + pub other_side_effects: bool, + /// Does this instruction write to CPU flags? + pub writes_cpu_flags: bool, +} + +impl InstructionContent { + pub fn snake_name(&self) -> &str { + if &self.name == "return" { + "return_" + } else { + &self.name + } + } + + pub fn all_typevars(&self) -> Vec<&TypeVar> { + match &self.polymorphic_info { + Some(poly) => { + let mut result = vec![&poly.ctrl_typevar]; + result.extend(&poly.other_typevars); + result + } + None => Vec::new(), + } + } +} + +pub(crate) type Instruction = Rc; + +impl Bindable for Instruction { + fn bind(&self, parameter: impl Into) -> BoundInstruction { + BoundInstruction::new(self).bind(parameter) + } +} + +impl fmt::Display for InstructionContent { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + if !self.operands_out.is_empty() { + let operands_out = self + .operands_out + .iter() + .map(|op| op.name) + .collect::>() + .join(", "); + fmt.write_str(&operands_out)?; + fmt.write_str(" = ")?; + } + + fmt.write_str(&self.name)?; + + if !self.operands_in.is_empty() { + let operands_in = self + .operands_in + .iter() + .map(|op| op.name) + .collect::>() + .join(", "); + fmt.write_str(" ")?; + fmt.write_str(&operands_in)?; + } + + Ok(()) + } +} + +pub(crate) struct InstructionBuilder { + name: String, + doc: String, + format: Rc, + operands_in: Option>, + operands_out: Option>, + constraints: Option>, + + // See Instruction comments for the meaning of these fields. + is_terminator: bool, + is_branch: bool, + is_indirect_branch: bool, + is_call: bool, + is_return: bool, + is_ghost: bool, + can_load: bool, + can_store: bool, + can_trap: bool, + other_side_effects: bool, +} + +impl InstructionBuilder { + pub fn new>(name: S, doc: S, format: &Rc) -> Self { + Self { + name: name.into(), + doc: doc.into(), + format: format.clone(), + operands_in: None, + operands_out: None, + constraints: None, + + is_terminator: false, + is_branch: false, + is_indirect_branch: false, + is_call: false, + is_return: false, + is_ghost: false, + can_load: false, + can_store: false, + can_trap: false, + other_side_effects: false, + } + } + + pub fn operands_in(mut self, operands: Vec<&Operand>) -> Self { + assert!(self.operands_in.is_none()); + self.operands_in = Some(operands.iter().map(|x| (*x).clone()).collect()); + self + } + + pub fn operands_out(mut self, operands: Vec<&Operand>) -> Self { + assert!(self.operands_out.is_none()); + self.operands_out = Some(operands.iter().map(|x| (*x).clone()).collect()); + self + } + + pub fn constraints(mut self, constraints: Vec) -> Self { + assert!(self.constraints.is_none()); + self.constraints = Some(constraints); + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_terminator(mut self, val: bool) -> Self { + self.is_terminator = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_branch(mut self, val: bool) -> Self { + self.is_branch = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_indirect_branch(mut self, val: bool) -> Self { + self.is_indirect_branch = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_call(mut self, val: bool) -> Self { + self.is_call = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_return(mut self, val: bool) -> Self { + self.is_return = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_ghost(mut self, val: bool) -> Self { + self.is_ghost = val; + self + } + + pub fn can_load(mut self, val: bool) -> Self { + self.can_load = val; + self + } + + pub fn can_store(mut self, val: bool) -> Self { + self.can_store = val; + self + } + + pub fn can_trap(mut self, val: bool) -> Self { + self.can_trap = val; + self + } + + pub fn other_side_effects(mut self, val: bool) -> Self { + self.other_side_effects = val; + self + } + + fn build(self, opcode_number: OpcodeNumber) -> Instruction { + let operands_in = self.operands_in.unwrap_or_else(Vec::new); + let operands_out = self.operands_out.unwrap_or_else(Vec::new); + + let mut value_opnums = Vec::new(); + let mut imm_opnums = Vec::new(); + for (i, op) in operands_in.iter().enumerate() { + if op.is_value() { + value_opnums.push(i); + } else if op.is_immediate_or_entityref() { + imm_opnums.push(i); + } else { + assert!(op.is_varargs()); + } + } + + let value_results = operands_out + .iter() + .enumerate() + .filter_map(|(i, op)| if op.is_value() { Some(i) } else { None }) + .collect(); + + verify_format(&self.name, &operands_in, &self.format); + + let polymorphic_info = + verify_polymorphic(&operands_in, &operands_out, &self.format, &value_opnums); + + // Infer from output operands whether an instruction clobbers CPU flags or not. + let writes_cpu_flags = operands_out.iter().any(|op| op.is_cpu_flags()); + + let camel_name = camel_case(&self.name); + + Rc::new(InstructionContent { + name: self.name, + camel_name, + opcode_number, + doc: self.doc, + operands_in, + operands_out, + constraints: self.constraints.unwrap_or_else(Vec::new), + format: self.format, + polymorphic_info, + value_opnums, + value_results, + imm_opnums, + is_terminator: self.is_terminator, + is_branch: self.is_branch, + is_indirect_branch: self.is_indirect_branch, + is_call: self.is_call, + is_return: self.is_return, + is_ghost: self.is_ghost, + can_load: self.can_load, + can_store: self.can_store, + can_trap: self.can_trap, + other_side_effects: self.other_side_effects, + writes_cpu_flags, + }) + } +} + +/// A thin wrapper like Option, but with more precise semantics. +#[derive(Clone)] +pub(crate) enum ValueTypeOrAny { + ValueType(ValueType), + Any, +} + +impl ValueTypeOrAny { + pub fn expect(self, msg: &str) -> ValueType { + match self { + ValueTypeOrAny::ValueType(vt) => vt, + ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)), + } + } +} + +/// The number of bits in the vector +type VectorBitWidth = u64; + +/// An parameter used for binding instructions to specific types or values +pub(crate) enum BindParameter { + Any, + Lane(LaneType), + Vector(LaneType, VectorBitWidth), + Reference(ReferenceType), + Immediate(Immediate), +} + +/// Constructor for more easily building vector parameters from any lane type +pub(crate) fn vector(parameter: impl Into, vector_size: VectorBitWidth) -> BindParameter { + BindParameter::Vector(parameter.into(), vector_size) +} + +impl From for BindParameter { + fn from(ty: Int) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From for BindParameter { + fn from(ty: Bool) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From for BindParameter { + fn from(ty: Float) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From for BindParameter { + fn from(ty: LaneType) -> Self { + BindParameter::Lane(ty) + } +} + +impl From for BindParameter { + fn from(ty: Reference) -> Self { + BindParameter::Reference(ty.into()) + } +} + +impl From for BindParameter { + fn from(imm: Immediate) -> Self { + BindParameter::Immediate(imm) + } +} + +#[derive(Clone)] +pub(crate) enum Immediate { + // When needed, this enum should be expanded to include other immediate types (e.g. u8, u128). + IntCC(IntCC), +} + +impl Display for Immediate { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + Immediate::IntCC(x) => write!(f, "IntCC::{:?}", x), + } + } +} + +#[derive(Clone)] +pub(crate) struct BoundInstruction { + pub inst: Instruction, + pub value_types: Vec, + pub immediate_values: Vec, +} + +impl BoundInstruction { + /// Construct a new bound instruction (with nothing bound yet) from an instruction + fn new(inst: &Instruction) -> Self { + BoundInstruction { + inst: inst.clone(), + value_types: vec![], + immediate_values: vec![], + } + } + + /// Verify that the bindings for a BoundInstruction are correct. + fn verify_bindings(&self) -> Result<(), String> { + // Verify that binding types to the instruction does not violate the polymorphic rules. + if !self.value_types.is_empty() { + match &self.inst.polymorphic_info { + Some(poly) => { + if self.value_types.len() > 1 + poly.other_typevars.len() { + return Err(format!( + "trying to bind too many types for {}", + self.inst.name + )); + } + } + None => { + return Err(format!( + "trying to bind a type for {} which is not a polymorphic instruction", + self.inst.name + )); + } + } + } + + // Verify that only the right number of immediates are bound. + let immediate_count = self + .inst + .operands_in + .iter() + .filter(|o| o.is_immediate_or_entityref()) + .count(); + if self.immediate_values.len() > immediate_count { + return Err(format!( + "trying to bind too many immediates ({}) to instruction {} which only expects {} \ + immediates", + self.immediate_values.len(), + self.inst.name, + immediate_count + )); + } + + Ok(()) + } +} + +impl Bindable for BoundInstruction { + fn bind(&self, parameter: impl Into) -> BoundInstruction { + let mut modified = self.clone(); + match parameter.into() { + BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any), + BindParameter::Lane(lane_type) => modified + .value_types + .push(ValueTypeOrAny::ValueType(lane_type.into())), + BindParameter::Vector(lane_type, vector_size_in_bits) => { + let num_lanes = vector_size_in_bits / lane_type.lane_bits(); + assert!( + num_lanes >= 2, + "Minimum lane number for bind_vector is 2, found {}.", + num_lanes, + ); + let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); + modified + .value_types + .push(ValueTypeOrAny::ValueType(vector_type)); + } + BindParameter::Reference(reference_type) => { + modified + .value_types + .push(ValueTypeOrAny::ValueType(reference_type.into())); + } + BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate), + } + modified.verify_bindings().unwrap(); + modified + } +} + +/// Checks that the input operands actually match the given format. +fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionFormat) { + // A format is defined by: + // - its number of input value operands, + // - its number and names of input immediate operands, + // - whether it has a value list or not. + let mut num_values = 0; + let mut num_immediates = 0; + + for operand in operands_in.iter() { + if operand.is_varargs() { + assert!( + format.has_value_list, + "instruction {} has varargs, but its format {} doesn't have a value list; you may \ + need to use a different format.", + inst_name, format.name + ); + } + if operand.is_value() { + num_values += 1; + } + if operand.is_immediate_or_entityref() { + if let Some(format_field) = format.imm_fields.get(num_immediates) { + assert_eq!( + format_field.kind.rust_field_name, + operand.kind.rust_field_name, + "{}th operand of {} should be {} (according to format), not {} (according to \ + inst definition). You may need to use a different format.", + num_immediates, + inst_name, + format_field.kind.rust_field_name, + operand.kind.rust_field_name + ); + num_immediates += 1; + } + } + } + + assert_eq!( + num_values, format.num_value_operands, + "inst {} doesnt' have as many value input operand as its format {} declares; you may need \ + to use a different format.", + inst_name, format.name + ); + + assert_eq!( + num_immediates, + format.imm_fields.len(), + "inst {} doesn't have as many immediate input \ + operands as its format {} declares; you may need to use a different format.", + inst_name, + format.name + ); +} + +/// Check if this instruction is polymorphic, and verify its use of type variables. +fn verify_polymorphic( + operands_in: &[Operand], + operands_out: &[Operand], + format: &InstructionFormat, + value_opnums: &[usize], +) -> Option { + // The instruction is polymorphic if it has one free input or output operand. + let is_polymorphic = operands_in + .iter() + .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some()) + || operands_out + .iter() + .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some()); + + if !is_polymorphic { + return None; + } + + // Verify the use of type variables. + let tv_op = format.typevar_operand; + let mut maybe_error_message = None; + if let Some(tv_op) = tv_op { + if tv_op < value_opnums.len() { + let op_num = value_opnums[tv_op]; + let tv = operands_in[op_num].type_var().unwrap(); + let free_typevar = tv.free_typevar(); + if (free_typevar.is_some() && tv == &free_typevar.unwrap()) + || tv.singleton_type().is_some() + { + match is_ctrl_typevar_candidate(tv, &operands_in, &operands_out) { + Ok(other_typevars) => { + return Some(PolymorphicInfo { + use_typevar_operand: true, + ctrl_typevar: tv.clone(), + other_typevars, + }); + } + Err(error_message) => { + maybe_error_message = Some(error_message); + } + } + } + } + }; + + // If we reached here, it means the type variable indicated as the typevar operand couldn't + // control every other input and output type variable. We need to look at the result type + // variables. + if operands_out.is_empty() { + // No result means no other possible type variable, so it's a type inference failure. + match maybe_error_message { + Some(msg) => panic!(msg), + None => panic!("typevar_operand must be a free type variable"), + } + } + + // Otherwise, try to infer the controlling type variable by looking at the first result. + let tv = operands_out[0].type_var().unwrap(); + let free_typevar = tv.free_typevar(); + if free_typevar.is_some() && tv != &free_typevar.unwrap() { + panic!("first result must be a free type variable"); + } + + // At this point, if the next unwrap() fails, it means the output type couldn't be used as a + // controlling type variable either; panicking is the right behavior. + let other_typevars = is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap(); + + Some(PolymorphicInfo { + use_typevar_operand: false, + ctrl_typevar: tv.clone(), + other_typevars, + }) +} + +/// Verify that the use of TypeVars is consistent with `ctrl_typevar` as the controlling type +/// variable. +/// +/// All polymorhic inputs must either be derived from `ctrl_typevar` or be independent free type +/// variables only used once. +/// +/// All polymorphic results must be derived from `ctrl_typevar`. +/// +/// Return a vector of other type variables used, or a string explaining what went wrong. +fn is_ctrl_typevar_candidate( + ctrl_typevar: &TypeVar, + operands_in: &[Operand], + operands_out: &[Operand], +) -> Result, String> { + let mut other_typevars = Vec::new(); + + // Check value inputs. + for input in operands_in { + if !input.is_value() { + continue; + } + + let typ = input.type_var().unwrap(); + let free_typevar = typ.free_typevar(); + + // Non-polymorphic or derived from ctrl_typevar is OK. + if free_typevar.is_none() { + continue; + } + let free_typevar = free_typevar.unwrap(); + if &free_typevar == ctrl_typevar { + continue; + } + + // No other derived typevars allowed. + if typ != &free_typevar { + return Err(format!( + "{:?}: type variable {} must be derived from {:?} while it is derived from {:?}", + input, typ.name, ctrl_typevar, free_typevar + )); + } + + // Other free type variables can only be used once each. + for other_tv in &other_typevars { + if &free_typevar == other_tv { + return Err(format!( + "non-controlling type variable {} can't be used more than once", + free_typevar.name + )); + } + } + + other_typevars.push(free_typevar); + } + + // Check outputs. + for result in operands_out { + if !result.is_value() { + continue; + } + + let typ = result.type_var().unwrap(); + let free_typevar = typ.free_typevar(); + + // Non-polymorphic or derived from ctrl_typevar is OK. + if free_typevar.is_none() || &free_typevar.unwrap() == ctrl_typevar { + continue; + } + + return Err("type variable in output not derived from ctrl_typevar".into()); + } + + Ok(other_typevars) +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) enum FormatPredicateKind { + /// Is the field member equal to the expected value (stored here)? + IsEqual(String), + + /// Is the immediate instruction format field representable as an n-bit two's complement + /// integer? (with width: first member, scale: second member). + /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a + /// multiple of `2^scale`. + IsSignedInt(usize, usize), + + /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with + /// width: first member, scale: second member). + /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of + /// `2^scale`. + IsUnsignedInt(usize, usize), + + /// Is the immediate format field member an integer equal to zero? + IsZeroInt, + /// Is the immediate format field member equal to zero? (float32 version) + IsZero32BitFloat, + + /// Is the immediate format field member equal to zero? (float64 version) + IsZero64BitFloat, + + /// Is the immediate format field member equal zero in all lanes? + IsAllZeroes, + + /// Does the immediate format field member have ones in all bits of all lanes? + IsAllOnes, + + /// Has the value list (in member_name) the size specified in parameter? + LengthEquals(usize), + + /// Is the referenced function colocated? + IsColocatedFunc, + + /// Is the referenced data object colocated? + IsColocatedData, +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) struct FormatPredicateNode { + format_name: &'static str, + member_name: &'static str, + kind: FormatPredicateKind, +} + +impl FormatPredicateNode { + fn new( + format: &InstructionFormat, + field_name: &'static str, + kind: FormatPredicateKind, + ) -> Self { + let member_name = format.imm_by_name(field_name).member; + Self { + format_name: format.name, + member_name, + kind, + } + } + + fn new_raw( + format: &InstructionFormat, + member_name: &'static str, + kind: FormatPredicateKind, + ) -> Self { + Self { + format_name: format.name, + member_name, + kind, + } + } + + fn destructuring_member_name(&self) -> &'static str { + match &self.kind { + FormatPredicateKind::LengthEquals(_) => { + // Length operates on the argument value list. + assert!(self.member_name == "args"); + "ref args" + } + _ => self.member_name, + } + } + + fn rust_predicate(&self) -> String { + match &self.kind { + FormatPredicateKind::IsEqual(arg) => { + format!("predicates::is_equal({}, {})", self.member_name, arg) + } + FormatPredicateKind::IsSignedInt(width, scale) => format!( + "predicates::is_signed_int({}, {}, {})", + self.member_name, width, scale + ), + FormatPredicateKind::IsUnsignedInt(width, scale) => format!( + "predicates::is_unsigned_int({}, {}, {})", + self.member_name, width, scale + ), + FormatPredicateKind::IsZeroInt => { + format!("predicates::is_zero_int({})", self.member_name) + } + FormatPredicateKind::IsZero32BitFloat => { + format!("predicates::is_zero_32_bit_float({})", self.member_name) + } + FormatPredicateKind::IsZero64BitFloat => { + format!("predicates::is_zero_64_bit_float({})", self.member_name) + } + FormatPredicateKind::IsAllZeroes => format!( + "predicates::is_all_zeroes(func.dfg.constants.get({}))", + self.member_name + ), + FormatPredicateKind::IsAllOnes => format!( + "predicates::is_all_ones(func.dfg.constants.get({}))", + self.member_name + ), + FormatPredicateKind::LengthEquals(num) => format!( + "predicates::has_length_of({}, {}, func)", + self.member_name, num + ), + FormatPredicateKind::IsColocatedFunc => { + format!("predicates::is_colocated_func({}, func)", self.member_name,) + } + FormatPredicateKind::IsColocatedData => { + format!("predicates::is_colocated_data({}, func)", self.member_name) + } + } + } +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) enum TypePredicateNode { + /// Is the value argument (at the index designated by the first member) the same type as the + /// type name (second member)? + TypeVarCheck(usize, String), + + /// Is the controlling type variable the same type as the one designated by the type name + /// (only member)? + CtrlTypeVarCheck(String), +} + +impl TypePredicateNode { + fn rust_predicate(&self, func_str: &str) -> String { + match self { + TypePredicateNode::TypeVarCheck(index, value_type_name) => format!( + "{}.dfg.value_type(args[{}]) == {}", + func_str, index, value_type_name + ), + TypePredicateNode::CtrlTypeVarCheck(value_type_name) => { + format!("{}.dfg.ctrl_typevar(inst) == {}", func_str, value_type_name) + } + } + } +} + +/// A basic node in an instruction predicate: either an atom, or an AND of two conditions. +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) enum InstructionPredicateNode { + FormatPredicate(FormatPredicateNode), + + TypePredicate(TypePredicateNode), + + /// An AND-combination of two or more other predicates. + And(Vec), + + /// An OR-combination of two or more other predicates. + Or(Vec), +} + +impl InstructionPredicateNode { + fn rust_predicate(&self, func_str: &str) -> String { + match self { + InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(), + InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(func_str), + InstructionPredicateNode::And(nodes) => nodes + .iter() + .map(|x| x.rust_predicate(func_str)) + .collect::>() + .join(" && "), + InstructionPredicateNode::Or(nodes) => nodes + .iter() + .map(|x| x.rust_predicate(func_str)) + .collect::>() + .join(" || "), + } + } + + pub fn format_destructuring_member_name(&self) -> &str { + match self { + InstructionPredicateNode::FormatPredicate(format_pred) => { + format_pred.destructuring_member_name() + } + _ => panic!("Only for leaf format predicates"), + } + } + + pub fn format_name(&self) -> &str { + match self { + InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name, + _ => panic!("Only for leaf format predicates"), + } + } + + pub fn is_type_predicate(&self) -> bool { + match self { + InstructionPredicateNode::FormatPredicate(_) + | InstructionPredicateNode::And(_) + | InstructionPredicateNode::Or(_) => false, + InstructionPredicateNode::TypePredicate(_) => true, + } + } + + fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { + let mut ret = Vec::new(); + match self { + InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => { + for node in nodes { + ret.extend(node.collect_leaves()); + } + } + _ => ret.push(self), + } + ret + } +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) struct InstructionPredicate { + node: Option, +} + +impl Into for InstructionPredicateNode { + fn into(self) -> InstructionPredicate { + InstructionPredicate { node: Some(self) } + } +} + +impl InstructionPredicate { + pub fn new() -> Self { + Self { node: None } + } + + pub fn unwrap(self) -> InstructionPredicateNode { + self.node.unwrap() + } + + pub fn new_typevar_check( + inst: &Instruction, + type_var: &TypeVar, + value_type: &ValueType, + ) -> InstructionPredicateNode { + let index = inst + .value_opnums + .iter() + .enumerate() + .find(|(_, &op_num)| inst.operands_in[op_num].type_var().unwrap() == type_var) + .unwrap() + .0; + InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck( + index, + value_type.rust_name(), + )) + } + + pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode { + InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck( + value_type.rust_name(), + )) + } + + pub fn new_is_field_equal( + format: &InstructionFormat, + field_name: &'static str, + imm_value: String, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsEqual(imm_value), + )) + } + + /// Used only for the AST module, which directly passes in the format field. + pub fn new_is_field_equal_ast( + format: &InstructionFormat, + field: &FormatField, + imm_value: String, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( + format, + field.member, + FormatPredicateKind::IsEqual(imm_value), + )) + } + + pub fn new_is_signed_int( + format: &InstructionFormat, + field_name: &'static str, + width: usize, + scale: usize, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsSignedInt(width, scale), + )) + } + + pub fn new_is_unsigned_int( + format: &InstructionFormat, + field_name: &'static str, + width: usize, + scale: usize, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsUnsignedInt(width, scale), + )) + } + + pub fn new_is_zero_int( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZeroInt, + )) + } + + pub fn new_is_zero_32bit_float( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZero32BitFloat, + )) + } + + pub fn new_is_zero_64bit_float( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZero64BitFloat, + )) + } + + pub fn new_is_all_zeroes( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsAllZeroes, + )) + } + + pub fn new_is_all_ones( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsAllOnes, + )) + } + + pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode { + assert!( + format.has_value_list, + "the format must be variadic in number of arguments" + ); + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( + format, + "args", + FormatPredicateKind::LengthEquals(size), + )) + } + + pub fn new_is_colocated_func( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsColocatedFunc, + )) + } + + pub fn new_is_colocated_data(formats: &Formats) -> InstructionPredicateNode { + let format = &formats.unary_global_value; + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + &*format, + "global_value", + FormatPredicateKind::IsColocatedData, + )) + } + + pub fn and(mut self, new_node: InstructionPredicateNode) -> Self { + let node = self.node; + let mut and_nodes = match node { + Some(node) => match node { + InstructionPredicateNode::And(nodes) => nodes, + InstructionPredicateNode::Or(_) => { + panic!("Can't mix and/or without implementing operator precedence!") + } + _ => vec![node], + }, + _ => Vec::new(), + }; + and_nodes.push(new_node); + self.node = Some(InstructionPredicateNode::And(and_nodes)); + self + } + + pub fn or(mut self, new_node: InstructionPredicateNode) -> Self { + let node = self.node; + let mut or_nodes = match node { + Some(node) => match node { + InstructionPredicateNode::Or(nodes) => nodes, + InstructionPredicateNode::And(_) => { + panic!("Can't mix and/or without implementing operator precedence!") + } + _ => vec![node], + }, + _ => Vec::new(), + }; + or_nodes.push(new_node); + self.node = Some(InstructionPredicateNode::Or(or_nodes)); + self + } + + pub fn rust_predicate(&self, func_str: &str) -> Option { + self.node.as_ref().map(|root| root.rust_predicate(func_str)) + } + + /// Returns the type predicate if this is one, or None otherwise. + pub fn type_predicate(&self, func_str: &str) -> Option { + let node = self.node.as_ref().unwrap(); + if node.is_type_predicate() { + Some(node.rust_predicate(func_str)) + } else { + None + } + } + + /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening + /// AND/OR). + pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { + self.node.as_ref().unwrap().collect_leaves() + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct InstructionPredicateNumber(u32); +entity_impl!(InstructionPredicateNumber); + +pub(crate) type InstructionPredicateMap = + PrimaryMap; + +/// A registry of predicates to help deduplicating them, during Encodings construction. When the +/// construction process is over, it needs to be extracted with `extract` and associated to the +/// TargetIsa. +pub(crate) struct InstructionPredicateRegistry { + /// Maps a predicate number to its actual predicate. + map: InstructionPredicateMap, + + /// Inverse map: maps a predicate to its predicate number. This is used before inserting a + /// predicate, to check whether it already exists. + inverted_map: HashMap, +} + +impl InstructionPredicateRegistry { + pub fn new() -> Self { + Self { + map: PrimaryMap::new(), + inverted_map: HashMap::new(), + } + } + pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber { + match self.inverted_map.get(&predicate) { + Some(&found) => found, + None => { + let key = self.map.push(predicate.clone()); + self.inverted_map.insert(predicate, key); + key + } + } + } + pub fn extract(self) -> InstructionPredicateMap { + self.map + } +} + +/// An instruction specification, containing an instruction that has bound types or not. +pub(crate) enum InstSpec { + Inst(Instruction), + Bound(BoundInstruction), +} + +impl InstSpec { + pub fn inst(&self) -> &Instruction { + match &self { + InstSpec::Inst(inst) => inst, + InstSpec::Bound(bound_inst) => &bound_inst.inst, + } + } +} + +impl Bindable for InstSpec { + fn bind(&self, parameter: impl Into) -> BoundInstruction { + match self { + InstSpec::Inst(inst) => inst.bind(parameter.into()), + InstSpec::Bound(inst) => inst.bind(parameter.into()), + } + } +} + +impl Into for &Instruction { + fn into(self) -> InstSpec { + InstSpec::Inst(self.clone()) + } +} + +impl Into for BoundInstruction { + fn into(self) -> InstSpec { + InstSpec::Bound(self) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::cdsl::formats::InstructionFormatBuilder; + use crate::cdsl::operands::{OperandKind, OperandKindFields}; + use crate::cdsl::typevar::TypeSetBuilder; + use crate::shared::types::Int::{I32, I64}; + + fn field_to_operand(index: usize, field: OperandKindFields) -> Operand { + // Pretend the index string is &'static. + let name = Box::leak(index.to_string().into_boxed_str()); + // Format's name / rust_type don't matter here. + let kind = OperandKind::new(name, name, field); + let operand = Operand::new(name, kind); + operand + } + + fn field_to_operands(types: Vec) -> Vec { + types + .iter() + .enumerate() + .map(|(i, f)| field_to_operand(i, f.clone())) + .collect() + } + + fn build_fake_instruction( + inputs: Vec, + outputs: Vec, + ) -> Instruction { + // Setup a format from the input operands. + let mut format = InstructionFormatBuilder::new("fake"); + for (i, f) in inputs.iter().enumerate() { + match f { + OperandKindFields::TypeVar(_) => format = format.value(), + OperandKindFields::ImmValue => { + format = format.imm(&field_to_operand(i, f.clone()).kind) + } + _ => {} + }; + } + let format = format.build(); + + // Create the fake instruction. + InstructionBuilder::new("fake", "A fake instruction for testing.", &format) + .operands_in(field_to_operands(inputs).iter().collect()) + .operands_out(field_to_operands(outputs).iter().collect()) + .build(OpcodeNumber(42)) + } + + #[test] + fn ensure_bound_instructions_can_bind_lane_types() { + let type1 = TypeSetBuilder::new().ints(8..64).build(); + let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); + let inst = build_fake_instruction(vec![in1], vec![]); + inst.bind(LaneType::Int(I32)); + } + + #[test] + fn ensure_bound_instructions_can_bind_immediates() { + let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); + let bound_inst = inst.bind(Immediate::IntCC(IntCC::Equal)); + assert!(bound_inst.verify_bindings().is_ok()); + } + + #[test] + #[should_panic] + fn ensure_instructions_fail_to_bind() { + let inst = build_fake_instruction(vec![], vec![]); + inst.bind(BindParameter::Lane(LaneType::Int(I32))); + // Trying to bind to an instruction with no inputs should fail. + } + + #[test] + #[should_panic] + fn ensure_bound_instructions_fail_to_bind_too_many_types() { + let type1 = TypeSetBuilder::new().ints(8..64).build(); + let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); + let inst = build_fake_instruction(vec![in1], vec![]); + inst.bind(LaneType::Int(I32)).bind(LaneType::Int(I64)); + } + + #[test] + #[should_panic] + fn ensure_instructions_fail_to_bind_too_many_immediates() { + let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); + inst.bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))) + .bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))); + // Trying to bind too many immediates to an instruction should fail; note that the immediate + // values are nonsensical but irrelevant to the purpose of this test. + } +} diff --git a/cranelift/codegen/meta/src/cdsl/isa.rs b/cranelift/codegen/meta/src/cdsl/isa.rs new file mode 100644 index 0000000000..512105d09a --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/isa.rs @@ -0,0 +1,99 @@ +use std::collections::HashSet; +use std::iter::FromIterator; + +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroup, InstructionPredicateMap}; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingGroup; +use crate::cdsl::xform::{TransformGroupIndex, TransformGroups}; + +pub(crate) struct TargetIsa { + pub name: &'static str, + pub instructions: InstructionGroup, + pub settings: SettingGroup, + pub regs: IsaRegs, + pub recipes: Recipes, + pub cpu_modes: Vec, + pub encodings_predicates: InstructionPredicateMap, + + /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the + /// local array of transform groups that are directly used. We use this map to get this + /// information. + pub local_transform_groups: Vec, +} + +impl TargetIsa { + pub fn new( + name: &'static str, + instructions: InstructionGroup, + settings: SettingGroup, + regs: IsaRegs, + recipes: Recipes, + cpu_modes: Vec, + encodings_predicates: InstructionPredicateMap, + ) -> Self { + // Compute the local TransformGroup index. + let mut local_transform_groups = Vec::new(); + for cpu_mode in &cpu_modes { + let transform_groups = cpu_mode.direct_transform_groups(); + for group_index in transform_groups { + // find() is fine here: the number of transform group is < 5 as of June 2019. + if local_transform_groups + .iter() + .find(|&val| group_index == *val) + .is_none() + { + local_transform_groups.push(group_index); + } + } + } + + Self { + name, + instructions, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + local_transform_groups, + } + } + + /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the + /// transitive set of TransformGroup this TargetIsa uses. + pub fn transitive_transform_groups( + &self, + all_groups: &TransformGroups, + ) -> Vec { + let mut set = HashSet::new(); + + for &root in self.local_transform_groups.iter() { + set.insert(root); + let mut base = root; + // Follow the chain of chain_with. + while let Some(chain_with) = &all_groups.get(base).chain_with { + set.insert(*chain_with); + base = *chain_with; + } + } + + let mut vec = Vec::from_iter(set); + vec.sort(); + vec + } + + /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly + /// reachable set of TransformGroup this TargetIsa uses. + pub fn direct_transform_groups(&self) -> &Vec { + &self.local_transform_groups + } + + pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize { + self.local_transform_groups + .iter() + .position(|&val| val == group_index) + .expect("TransformGroup unused by this TargetIsa!") + } +} diff --git a/cranelift/codegen/meta/src/cdsl/mod.rs b/cranelift/codegen/meta/src/cdsl/mod.rs new file mode 100644 index 0000000000..698b64dff3 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/mod.rs @@ -0,0 +1,89 @@ +//! Cranelift DSL classes. +//! +//! This module defines the classes that are used to define Cranelift +//! instructions and other entities. + +#[macro_use] +pub mod ast; +pub mod cpu_modes; +pub mod encodings; +pub mod formats; +pub mod instructions; +pub mod isa; +pub mod operands; +pub mod recipes; +pub mod regs; +pub mod settings; +pub mod type_inference; +pub mod types; +pub mod typevar; +pub mod xform; + +/// A macro that converts boolean settings into predicates to look more natural. +#[macro_export] +macro_rules! predicate { + ($a:ident && $($b:tt)*) => { + PredicateNode::And(Box::new($a.into()), Box::new(predicate!($($b)*))) + }; + (!$a:ident && $($b:tt)*) => { + PredicateNode::And( + Box::new(PredicateNode::Not(Box::new($a.into()))), + Box::new(predicate!($($b)*)) + ) + }; + (!$a:ident) => { + PredicateNode::Not(Box::new($a.into())) + }; + ($a:ident) => { + $a.into() + }; +} + +/// A macro that joins boolean settings into a list (e.g. `preset!(feature_a && feature_b)`). +#[macro_export] +macro_rules! preset { + () => { + vec![] + }; + ($($x:ident)&&*) => { + { + let mut v = Vec::new(); + $( + v.push($x.into()); + )* + v + } + }; +} + +/// Convert the string `s` to CamelCase. +pub fn camel_case(s: &str) -> String { + let mut output_chars = String::with_capacity(s.len()); + + let mut capitalize = true; + for curr_char in s.chars() { + if curr_char == '_' { + capitalize = true; + } else { + if capitalize { + output_chars.extend(curr_char.to_uppercase()); + } else { + output_chars.push(curr_char); + } + capitalize = false; + } + } + + output_chars +} + +#[cfg(test)] +mod tests { + use super::camel_case; + + #[test] + fn camel_case_works() { + assert_eq!(camel_case("x"), "X"); + assert_eq!(camel_case("camel_case"), "CamelCase"); + } +} diff --git a/cranelift/codegen/meta/src/cdsl/operands.rs b/cranelift/codegen/meta/src/cdsl/operands.rs new file mode 100644 index 0000000000..605df24862 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/operands.rs @@ -0,0 +1,173 @@ +use std::collections::HashMap; + +use crate::cdsl::typevar::TypeVar; + +/// An instruction operand can be an *immediate*, an *SSA value*, or an *entity reference*. The +/// type of the operand is one of: +/// +/// 1. A `ValueType` instance indicates an SSA value operand with a concrete type. +/// +/// 2. A `TypeVar` instance indicates an SSA value operand, and the instruction is polymorphic over +/// the possible concrete types that the type variable can assume. +/// +/// 3. An `ImmediateKind` instance indicates an immediate operand whose value is encoded in the +/// instruction itself rather than being passed as an SSA value. +/// +/// 4. An `EntityRefKind` instance indicates an operand that references another entity in the +/// function, typically something declared in the function preamble. +#[derive(Clone, Debug)] +pub(crate) struct Operand { + /// Name of the operand variable, as it appears in function parameters, legalizations, etc. + pub name: &'static str, + + /// Type of the operand. + pub kind: OperandKind, + + doc: Option<&'static str>, +} + +impl Operand { + pub fn new(name: &'static str, kind: impl Into) -> Self { + Self { + name, + doc: None, + kind: kind.into(), + } + } + pub fn with_doc(mut self, doc: &'static str) -> Self { + self.doc = Some(doc); + self + } + + pub fn doc(&self) -> Option<&str> { + if let Some(doc) = &self.doc { + return Some(doc); + } + match &self.kind.fields { + OperandKindFields::TypeVar(tvar) => Some(&tvar.doc), + _ => self.kind.doc(), + } + } + + pub fn is_value(&self) -> bool { + match self.kind.fields { + OperandKindFields::TypeVar(_) => true, + _ => false, + } + } + + pub fn type_var(&self) -> Option<&TypeVar> { + match &self.kind.fields { + OperandKindFields::TypeVar(typevar) => Some(typevar), + _ => None, + } + } + + pub fn is_varargs(&self) -> bool { + match self.kind.fields { + OperandKindFields::VariableArgs => true, + _ => false, + } + } + + /// Returns true if the operand has an immediate kind or is an EntityRef. + pub fn is_immediate_or_entityref(&self) -> bool { + match self.kind.fields { + OperandKindFields::ImmEnum(_) + | OperandKindFields::ImmValue + | OperandKindFields::EntityRef => true, + _ => false, + } + } + + /// Returns true if the operand has an immediate kind. + pub fn is_immediate(&self) -> bool { + match self.kind.fields { + OperandKindFields::ImmEnum(_) | OperandKindFields::ImmValue => true, + _ => false, + } + } + + pub fn is_cpu_flags(&self) -> bool { + match &self.kind.fields { + OperandKindFields::TypeVar(type_var) + if type_var.name == "iflags" || type_var.name == "fflags" => + { + true + } + _ => false, + } + } +} + +pub type EnumValues = HashMap<&'static str, &'static str>; + +#[derive(Clone, Debug)] +pub(crate) enum OperandKindFields { + EntityRef, + VariableArgs, + ImmValue, + ImmEnum(EnumValues), + TypeVar(TypeVar), +} + +#[derive(Clone, Debug)] +pub(crate) struct OperandKind { + /// String representation of the Rust type mapping to this OperandKind. + pub rust_type: &'static str, + + /// Name of this OperandKind in the format's member field. + pub rust_field_name: &'static str, + + /// Type-specific fields for this OperandKind. + pub fields: OperandKindFields, + + doc: Option<&'static str>, +} + +impl OperandKind { + pub fn new( + rust_field_name: &'static str, + rust_type: &'static str, + fields: OperandKindFields, + ) -> Self { + Self { + rust_field_name, + rust_type, + fields, + doc: None, + } + } + pub fn with_doc(mut self, doc: &'static str) -> Self { + assert!(self.doc.is_none()); + self.doc = Some(doc); + self + } + fn doc(&self) -> Option<&str> { + if let Some(doc) = &self.doc { + return Some(doc); + } + match &self.fields { + OperandKindFields::TypeVar(type_var) => Some(&type_var.doc), + OperandKindFields::ImmEnum(_) + | OperandKindFields::ImmValue + | OperandKindFields::EntityRef + | OperandKindFields::VariableArgs => None, + } + } +} + +impl Into for &TypeVar { + fn into(self) -> OperandKind { + OperandKind::new( + "value", + "ir::Value", + OperandKindFields::TypeVar(self.into()), + ) + } +} +impl Into for &OperandKind { + fn into(self) -> OperandKind { + self.clone() + } +} diff --git a/cranelift/codegen/meta/src/cdsl/recipes.rs b/cranelift/codegen/meta/src/cdsl/recipes.rs new file mode 100644 index 0000000000..dfe4cd67a5 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/recipes.rs @@ -0,0 +1,298 @@ +use std::rc::Rc; + +use cranelift_entity::{entity_impl, PrimaryMap}; + +use crate::cdsl::formats::InstructionFormat; +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::regs::RegClassIndex; +use crate::cdsl::settings::SettingPredicateNumber; + +/// A specific register in a register class. +/// +/// A register is identified by the top-level register class it belongs to and +/// its first register unit. +/// +/// Specific registers are used to describe constraints on instructions where +/// some operands must use a fixed register. +/// +/// Register instances can be created with the constructor, or accessed as +/// attributes on the register class: `GPR.rcx`. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub(crate) struct Register { + pub regclass: RegClassIndex, + pub unit: u8, +} + +impl Register { + pub fn new(regclass: RegClassIndex, unit: u8) -> Self { + Self { regclass, unit } + } +} + +/// An operand that must be in a stack slot. +/// +/// A `Stack` object can be used to indicate an operand constraint for a value +/// operand that must live in a stack slot. +#[derive(Copy, Clone, Hash, PartialEq)] +pub(crate) struct Stack { + pub regclass: RegClassIndex, +} + +impl Stack { + pub fn new(regclass: RegClassIndex) -> Self { + Self { regclass } + } + pub fn stack_base_mask(self) -> &'static str { + // TODO: Make this configurable instead of just using the SP. + "StackBaseMask(1)" + } +} + +#[derive(Clone, Hash, PartialEq)] +pub(crate) struct BranchRange { + pub inst_size: u64, + pub range: u64, +} + +#[derive(Copy, Clone, Hash, PartialEq)] +pub(crate) enum OperandConstraint { + RegClass(RegClassIndex), + FixedReg(Register), + TiedInput(usize), + Stack(Stack), +} + +impl Into for RegClassIndex { + fn into(self) -> OperandConstraint { + OperandConstraint::RegClass(self) + } +} + +impl Into for Register { + fn into(self) -> OperandConstraint { + OperandConstraint::FixedReg(self) + } +} + +impl Into for usize { + fn into(self) -> OperandConstraint { + OperandConstraint::TiedInput(self) + } +} + +impl Into for Stack { + fn into(self) -> OperandConstraint { + OperandConstraint::Stack(self) + } +} + +/// A recipe for encoding instructions with a given format. +/// +/// Many different instructions can be encoded by the same recipe, but they +/// must all have the same instruction format. +/// +/// The `operands_in` and `operands_out` arguments are tuples specifying the register +/// allocation constraints for the value operands and results respectively. The +/// possible constraints for an operand are: +/// +/// - A `RegClass` specifying the set of allowed registers. +/// - A `Register` specifying a fixed-register operand. +/// - An integer indicating that this result is tied to a value operand, so +/// they must use the same register. +/// - A `Stack` specifying a value in a stack slot. +/// +/// The `branch_range` argument must be provided for recipes that can encode +/// branch instructions. It is an `(origin, bits)` tuple describing the exact +/// range that can be encoded in a branch instruction. +#[derive(Clone)] +pub(crate) struct EncodingRecipe { + /// Short mnemonic name for this recipe. + pub name: String, + + /// Associated instruction format. + pub format: Rc, + + /// Base number of bytes in the binary encoded instruction. + pub base_size: u64, + + /// Tuple of register constraints for value operands. + pub operands_in: Vec, + + /// Tuple of register constraints for results. + pub operands_out: Vec, + + /// Function name to use when computing actual size. + pub compute_size: &'static str, + + /// `(origin, bits)` range for branches. + pub branch_range: Option, + + /// This instruction clobbers `iflags` and `fflags`; true by default. + pub clobbers_flags: bool, + + /// Instruction predicate. + pub inst_predicate: Option, + + /// ISA predicate. + pub isa_predicate: Option, + + /// Rust code for binary emission. + pub emit: Option, +} + +// Implement PartialEq ourselves: take all the fields into account but the name. +impl PartialEq for EncodingRecipe { + fn eq(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.format, &other.format) + && self.base_size == other.base_size + && self.operands_in == other.operands_in + && self.operands_out == other.operands_out + && self.compute_size == other.compute_size + && self.branch_range == other.branch_range + && self.clobbers_flags == other.clobbers_flags + && self.inst_predicate == other.inst_predicate + && self.isa_predicate == other.isa_predicate + && self.emit == other.emit + } +} + +// To allow using it in a hashmap. +impl Eq for EncodingRecipe {} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct EncodingRecipeNumber(u32); +entity_impl!(EncodingRecipeNumber); + +pub(crate) type Recipes = PrimaryMap; + +#[derive(Clone)] +pub(crate) struct EncodingRecipeBuilder { + pub name: String, + format: Rc, + pub base_size: u64, + pub operands_in: Option>, + pub operands_out: Option>, + pub compute_size: Option<&'static str>, + pub branch_range: Option, + pub emit: Option, + clobbers_flags: Option, + inst_predicate: Option, + isa_predicate: Option, +} + +impl EncodingRecipeBuilder { + pub fn new(name: impl Into, format: &Rc, base_size: u64) -> Self { + Self { + name: name.into(), + format: format.clone(), + base_size, + operands_in: None, + operands_out: None, + compute_size: None, + branch_range: None, + emit: None, + clobbers_flags: None, + inst_predicate: None, + isa_predicate: None, + } + } + + // Setters. + pub fn operands_in(mut self, constraints: Vec>) -> Self { + assert!(self.operands_in.is_none()); + self.operands_in = Some( + constraints + .into_iter() + .map(|constr| constr.into()) + .collect(), + ); + self + } + pub fn operands_out(mut self, constraints: Vec>) -> Self { + assert!(self.operands_out.is_none()); + self.operands_out = Some( + constraints + .into_iter() + .map(|constr| constr.into()) + .collect(), + ); + self + } + pub fn clobbers_flags(mut self, flag: bool) -> Self { + assert!(self.clobbers_flags.is_none()); + self.clobbers_flags = Some(flag); + self + } + pub fn emit(mut self, code: impl Into) -> Self { + assert!(self.emit.is_none()); + self.emit = Some(code.into()); + self + } + pub fn branch_range(mut self, range: (u64, u64)) -> Self { + assert!(self.branch_range.is_none()); + self.branch_range = Some(BranchRange { + inst_size: range.0, + range: range.1, + }); + self + } + pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self { + assert!(self.isa_predicate.is_none()); + self.isa_predicate = Some(pred); + self + } + pub fn inst_predicate(mut self, inst_predicate: impl Into) -> Self { + assert!(self.inst_predicate.is_none()); + self.inst_predicate = Some(inst_predicate.into()); + self + } + pub fn compute_size(mut self, compute_size: &'static str) -> Self { + assert!(self.compute_size.is_none()); + self.compute_size = Some(compute_size); + self + } + + pub fn build(self) -> EncodingRecipe { + let operands_in = self.operands_in.unwrap_or_default(); + let operands_out = self.operands_out.unwrap_or_default(); + + // The number of input constraints must match the number of format input operands. + if !self.format.has_value_list { + assert!( + operands_in.len() == self.format.num_value_operands, + format!( + "missing operand constraints for recipe {} (format {})", + self.name, self.format.name + ) + ); + } + + // Ensure tied inputs actually refer to existing inputs. + for constraint in operands_in.iter().chain(operands_out.iter()) { + if let OperandConstraint::TiedInput(n) = *constraint { + assert!(n < operands_in.len()); + } + } + + let compute_size = match self.compute_size { + Some(compute_size) => compute_size, + None => "base_size", + }; + + let clobbers_flags = self.clobbers_flags.unwrap_or(true); + + EncodingRecipe { + name: self.name, + format: self.format, + base_size: self.base_size, + operands_in, + operands_out, + compute_size, + branch_range: self.branch_range, + clobbers_flags, + inst_predicate: self.inst_predicate, + isa_predicate: self.isa_predicate, + emit: self.emit, + } + } +} diff --git a/cranelift/codegen/meta/src/cdsl/regs.rs b/cranelift/codegen/meta/src/cdsl/regs.rs new file mode 100644 index 0000000000..98a5751f2e --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/regs.rs @@ -0,0 +1,412 @@ +use cranelift_codegen_shared::constants; +use cranelift_entity::{entity_impl, EntityRef, PrimaryMap}; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct RegBankIndex(u32); +entity_impl!(RegBankIndex); + +pub(crate) struct RegBank { + pub name: &'static str, + pub first_unit: u8, + pub units: u8, + pub names: Vec<&'static str>, + pub prefix: &'static str, + pub pressure_tracking: bool, + pub pinned_reg: Option, + pub toprcs: Vec, + pub classes: Vec, +} + +impl RegBank { + pub fn new( + name: &'static str, + first_unit: u8, + units: u8, + names: Vec<&'static str>, + prefix: &'static str, + pressure_tracking: bool, + pinned_reg: Option, + ) -> Self { + RegBank { + name, + first_unit, + units, + names, + prefix, + pressure_tracking, + pinned_reg, + toprcs: Vec::new(), + classes: Vec::new(), + } + } + + fn unit_by_name(&self, name: &'static str) -> u8 { + let unit = if let Some(found) = self.names.iter().position(|®_name| reg_name == name) { + found + } else { + // Try to match without the bank prefix. + assert!(name.starts_with(self.prefix)); + let name_without_prefix = &name[self.prefix.len()..]; + if let Some(found) = self + .names + .iter() + .position(|®_name| reg_name == name_without_prefix) + { + found + } else { + // Ultimate try: try to parse a number and use this in the array, eg r15 on x86. + if let Ok(as_num) = name_without_prefix.parse::() { + assert!( + (as_num - self.first_unit) < self.units, + "trying to get {}, but bank only has {} registers!", + name, + self.units + ); + (as_num - self.first_unit) as usize + } else { + panic!("invalid register name {}", name); + } + } + }; + self.first_unit + (unit as u8) + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub(crate) struct RegClassIndex(u32); +entity_impl!(RegClassIndex); + +pub(crate) struct RegClass { + pub name: &'static str, + pub index: RegClassIndex, + pub width: u8, + pub bank: RegBankIndex, + pub toprc: RegClassIndex, + pub count: u8, + pub start: u8, + pub subclasses: Vec, +} + +impl RegClass { + pub fn new( + name: &'static str, + index: RegClassIndex, + width: u8, + bank: RegBankIndex, + toprc: RegClassIndex, + count: u8, + start: u8, + ) -> Self { + Self { + name, + index, + width, + bank, + toprc, + count, + start, + subclasses: Vec::new(), + } + } + + /// Compute a bit-mask of subclasses, including self. + pub fn subclass_mask(&self) -> u64 { + let mut m = 1 << self.index.index(); + for rc in self.subclasses.iter() { + m |= 1 << rc.index(); + } + m + } + + /// Compute a bit-mask of the register units allocated by this register class. + pub fn mask(&self, bank_first_unit: u8) -> Vec { + let mut u = (self.start + bank_first_unit) as usize; + let mut out_mask = vec![0, 0, 0]; + for _ in 0..self.count { + out_mask[u / 32] |= 1 << (u % 32); + u += self.width as usize; + } + out_mask + } +} + +pub(crate) enum RegClassProto { + TopLevel(RegBankIndex), + SubClass(RegClassIndex), +} + +pub(crate) struct RegClassBuilder { + pub name: &'static str, + pub width: u8, + pub count: u8, + pub start: u8, + pub proto: RegClassProto, +} + +impl RegClassBuilder { + pub fn new_toplevel(name: &'static str, bank: RegBankIndex) -> Self { + Self { + name, + width: 1, + count: 0, + start: 0, + proto: RegClassProto::TopLevel(bank), + } + } + pub fn subclass_of( + name: &'static str, + parent_index: RegClassIndex, + start: u8, + stop: u8, + ) -> Self { + assert!(stop >= start); + Self { + name, + width: 0, + count: stop - start, + start, + proto: RegClassProto::SubClass(parent_index), + } + } + pub fn count(mut self, count: u8) -> Self { + self.count = count; + self + } + pub fn width(mut self, width: u8) -> Self { + match self.proto { + RegClassProto::TopLevel(_) => self.width = width, + RegClassProto::SubClass(_) => panic!("Subclasses inherit their parent's width."), + } + self + } +} + +pub(crate) struct RegBankBuilder { + pub name: &'static str, + pub units: u8, + pub names: Vec<&'static str>, + pub prefix: &'static str, + pub pressure_tracking: Option, + pub pinned_reg: Option, +} + +impl RegBankBuilder { + pub fn new(name: &'static str, prefix: &'static str) -> Self { + Self { + name, + units: 0, + names: vec![], + prefix, + pressure_tracking: None, + pinned_reg: None, + } + } + pub fn units(mut self, units: u8) -> Self { + self.units = units; + self + } + pub fn names(mut self, names: Vec<&'static str>) -> Self { + self.names = names; + self + } + pub fn track_pressure(mut self, track: bool) -> Self { + self.pressure_tracking = Some(track); + self + } + pub fn pinned_reg(mut self, unit: u16) -> Self { + assert!(unit < u16::from(self.units)); + self.pinned_reg = Some(unit); + self + } +} + +pub(crate) struct IsaRegsBuilder { + pub banks: PrimaryMap, + pub classes: PrimaryMap, +} + +impl IsaRegsBuilder { + pub fn new() -> Self { + Self { + banks: PrimaryMap::new(), + classes: PrimaryMap::new(), + } + } + + pub fn add_bank(&mut self, builder: RegBankBuilder) -> RegBankIndex { + let first_unit = if self.banks.is_empty() { + 0 + } else { + let last = &self.banks.last().unwrap(); + let first_available_unit = (last.first_unit + last.units) as i8; + let units = builder.units; + let align = if units.is_power_of_two() { + units + } else { + units.next_power_of_two() + } as i8; + (first_available_unit + align - 1) & -align + } as u8; + + self.banks.push(RegBank::new( + builder.name, + first_unit, + builder.units, + builder.names, + builder.prefix, + builder + .pressure_tracking + .expect("Pressure tracking must be explicitly set"), + builder.pinned_reg, + )) + } + + pub fn add_class(&mut self, builder: RegClassBuilder) -> RegClassIndex { + let class_index = self.classes.next_key(); + + // Finish delayed construction of RegClass. + let (bank, toprc, start, width) = match builder.proto { + RegClassProto::TopLevel(bank_index) => { + self.banks + .get_mut(bank_index) + .unwrap() + .toprcs + .push(class_index); + (bank_index, class_index, builder.start, builder.width) + } + RegClassProto::SubClass(parent_class_index) => { + assert!(builder.width == 0); + let (bank, toprc, start, width) = { + let parent = self.classes.get(parent_class_index).unwrap(); + (parent.bank, parent.toprc, parent.start, parent.width) + }; + for reg_class in self.classes.values_mut() { + if reg_class.toprc == toprc { + reg_class.subclasses.push(class_index); + } + } + let subclass_start = start + builder.start * width; + (bank, toprc, subclass_start, width) + } + }; + + let reg_bank_units = self.banks.get(bank).unwrap().units; + assert!(start < reg_bank_units); + + let count = if builder.count != 0 { + builder.count + } else { + reg_bank_units / width + }; + + let reg_class = RegClass::new(builder.name, class_index, width, bank, toprc, count, start); + self.classes.push(reg_class); + + let reg_bank = self.banks.get_mut(bank).unwrap(); + reg_bank.classes.push(class_index); + + class_index + } + + /// Checks that the set of register classes satisfies: + /// + /// 1. Closed under intersection: The intersection of any two register + /// classes in the set is either empty or identical to a member of the + /// set. + /// 2. There are no identical classes under different names. + /// 3. Classes are sorted topologically such that all subclasses have a + /// higher index that the superclass. + pub fn build(self) -> IsaRegs { + for reg_bank in self.banks.values() { + for i1 in reg_bank.classes.iter() { + for i2 in reg_bank.classes.iter() { + if i1 >= i2 { + continue; + } + + let rc1 = self.classes.get(*i1).unwrap(); + let rc2 = self.classes.get(*i2).unwrap(); + + let rc1_mask = rc1.mask(0); + let rc2_mask = rc2.mask(0); + + assert!( + rc1.width != rc2.width || rc1_mask != rc2_mask, + "no duplicates" + ); + if rc1.width != rc2.width { + continue; + } + + let mut intersect = Vec::new(); + for (a, b) in rc1_mask.iter().zip(rc2_mask.iter()) { + intersect.push(a & b); + } + if intersect == vec![0; intersect.len()] { + continue; + } + + // Classes must be topologically ordered, so the intersection can't be the + // superclass. + assert!(intersect != rc1_mask); + + // If the intersection is the second one, then it must be a subclass. + if intersect == rc2_mask { + assert!(self + .classes + .get(*i1) + .unwrap() + .subclasses + .iter() + .any(|x| *x == *i2)); + } + } + } + } + + assert!( + self.classes.len() <= constants::MAX_NUM_REG_CLASSES, + "Too many register classes" + ); + + let num_toplevel = self + .classes + .values() + .filter(|x| x.toprc == x.index && self.banks.get(x.bank).unwrap().pressure_tracking) + .count(); + + assert!( + num_toplevel <= constants::MAX_TRACKED_TOP_RCS, + "Too many top-level register classes" + ); + + IsaRegs::new(self.banks, self.classes) + } +} + +pub(crate) struct IsaRegs { + pub banks: PrimaryMap, + pub classes: PrimaryMap, +} + +impl IsaRegs { + fn new( + banks: PrimaryMap, + classes: PrimaryMap, + ) -> Self { + Self { banks, classes } + } + + pub fn class_by_name(&self, name: &str) -> RegClassIndex { + self.classes + .values() + .find(|&class| class.name == name) + .unwrap_or_else(|| panic!("register class {} not found", name)) + .index + } + + pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 { + let bank_index = self.classes.get(class_index).unwrap().bank; + self.banks.get(bank_index).unwrap().unit_by_name(name) + } +} diff --git a/cranelift/codegen/meta/src/cdsl/settings.rs b/cranelift/codegen/meta/src/cdsl/settings.rs new file mode 100644 index 0000000000..217bad9955 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/settings.rs @@ -0,0 +1,407 @@ +use std::iter; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub(crate) struct BoolSettingIndex(usize); + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct BoolSetting { + pub default: bool, + pub bit_offset: u8, + pub predicate_number: u8, +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) enum SpecificSetting { + Bool(BoolSetting), + Enum(Vec<&'static str>), + Num(u8), +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct Setting { + pub name: &'static str, + pub comment: &'static str, + pub specific: SpecificSetting, + pub byte_offset: u8, +} + +impl Setting { + pub fn default_byte(&self) -> u8 { + match self.specific { + SpecificSetting::Bool(BoolSetting { + default, + bit_offset, + .. + }) => { + if default { + 1 << bit_offset + } else { + 0 + } + } + SpecificSetting::Enum(_) => 0, + SpecificSetting::Num(default) => default, + } + } + + fn byte_for_value(&self, v: bool) -> u8 { + match self.specific { + SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => { + if v { + 1 << bit_offset + } else { + 0 + } + } + _ => panic!("byte_for_value shouldn't be used for non-boolean settings."), + } + } + + fn byte_mask(&self) -> u8 { + match self.specific { + SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => 1 << bit_offset, + _ => panic!("byte_for_value shouldn't be used for non-boolean settings."), + } + } +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct PresetIndex(usize); + +#[derive(Hash, PartialEq, Eq)] +pub(crate) enum PresetType { + BoolSetting(BoolSettingIndex), + OtherPreset(PresetIndex), +} + +impl Into for BoolSettingIndex { + fn into(self) -> PresetType { + PresetType::BoolSetting(self) + } +} +impl Into for PresetIndex { + fn into(self) -> PresetType { + PresetType::OtherPreset(self) + } +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct Preset { + pub name: &'static str, + values: Vec, +} + +impl Preset { + pub fn layout(&self, group: &SettingGroup) -> Vec<(u8, u8)> { + let mut layout: Vec<(u8, u8)> = iter::repeat((0, 0)) + .take(group.settings_size as usize) + .collect(); + for bool_index in &self.values { + let setting = &group.settings[bool_index.0]; + let mask = setting.byte_mask(); + let val = setting.byte_for_value(true); + assert!((val & !mask) == 0); + let (ref mut l_mask, ref mut l_val) = + *layout.get_mut(setting.byte_offset as usize).unwrap(); + *l_mask |= mask; + *l_val = (*l_val & !mask) | val; + } + layout + } +} + +pub(crate) struct SettingGroup { + pub name: &'static str, + pub settings: Vec, + pub bool_start_byte_offset: u8, + pub settings_size: u8, + pub presets: Vec, + pub predicates: Vec, +} + +impl SettingGroup { + fn num_bool_settings(&self) -> u8 { + self.settings + .iter() + .filter(|s| { + if let SpecificSetting::Bool(_) = s.specific { + true + } else { + false + } + }) + .count() as u8 + } + + pub fn byte_size(&self) -> u8 { + let num_predicates = self.num_bool_settings() + (self.predicates.len() as u8); + self.bool_start_byte_offset + (num_predicates + 7) / 8 + } + + pub fn get_bool(&self, name: &'static str) -> (BoolSettingIndex, &Self) { + for (i, s) in self.settings.iter().enumerate() { + if let SpecificSetting::Bool(_) = s.specific { + if s.name == name { + return (BoolSettingIndex(i), self); + } + } + } + panic!("Should have found bool setting by name."); + } + + pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber { + self.predicates + .iter() + .find(|pred| pred.name == name) + .unwrap_or_else(|| panic!("unknown predicate {}", name)) + .number + } +} + +/// This is the basic information needed to track the specific parts of a setting when building +/// them. +pub(crate) enum ProtoSpecificSetting { + Bool(bool), + Enum(Vec<&'static str>), + Num(u8), +} + +/// This is the information provided during building for a setting. +struct ProtoSetting { + name: &'static str, + comment: &'static str, + specific: ProtoSpecificSetting, +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) enum PredicateNode { + OwnedBool(BoolSettingIndex), + SharedBool(&'static str, &'static str), + Not(Box), + And(Box, Box), +} + +impl Into for BoolSettingIndex { + fn into(self) -> PredicateNode { + PredicateNode::OwnedBool(self) + } +} +impl<'a> Into for (BoolSettingIndex, &'a SettingGroup) { + fn into(self) -> PredicateNode { + let (index, group) = (self.0, self.1); + let setting = &group.settings[index.0]; + PredicateNode::SharedBool(group.name, setting.name) + } +} + +impl PredicateNode { + fn render(&self, group: &SettingGroup) -> String { + match *self { + PredicateNode::OwnedBool(bool_setting_index) => format!( + "{}.{}()", + group.name, group.settings[bool_setting_index.0].name + ), + PredicateNode::SharedBool(ref group_name, ref bool_name) => { + format!("{}.{}()", group_name, bool_name) + } + PredicateNode::And(ref lhs, ref rhs) => { + format!("{} && {}", lhs.render(group), rhs.render(group)) + } + PredicateNode::Not(ref node) => format!("!({})", node.render(group)), + } + } +} + +struct ProtoPredicate { + pub name: &'static str, + node: PredicateNode, +} + +pub(crate) type SettingPredicateNumber = u8; + +pub(crate) struct Predicate { + pub name: &'static str, + node: PredicateNode, + pub number: SettingPredicateNumber, +} + +impl Predicate { + pub fn render(&self, group: &SettingGroup) -> String { + self.node.render(group) + } +} + +pub(crate) struct SettingGroupBuilder { + name: &'static str, + settings: Vec, + presets: Vec, + predicates: Vec, +} + +impl SettingGroupBuilder { + pub fn new(name: &'static str) -> Self { + Self { + name, + settings: Vec::new(), + presets: Vec::new(), + predicates: Vec::new(), + } + } + + fn add_setting( + &mut self, + name: &'static str, + comment: &'static str, + specific: ProtoSpecificSetting, + ) { + self.settings.push(ProtoSetting { + name, + comment, + specific, + }) + } + + pub fn add_bool( + &mut self, + name: &'static str, + comment: &'static str, + default: bool, + ) -> BoolSettingIndex { + assert!( + self.predicates.is_empty(), + "predicates must be added after the boolean settings" + ); + self.add_setting(name, comment, ProtoSpecificSetting::Bool(default)); + BoolSettingIndex(self.settings.len() - 1) + } + + pub fn add_enum( + &mut self, + name: &'static str, + comment: &'static str, + values: Vec<&'static str>, + ) { + self.add_setting(name, comment, ProtoSpecificSetting::Enum(values)); + } + + pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) { + self.add_setting(name, comment, ProtoSpecificSetting::Num(default)); + } + + pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) { + self.predicates.push(ProtoPredicate { name, node }); + } + + pub fn add_preset(&mut self, name: &'static str, args: Vec) -> PresetIndex { + let mut values = Vec::new(); + for arg in args { + match arg { + PresetType::OtherPreset(index) => { + values.extend(self.presets[index.0].values.iter()); + } + PresetType::BoolSetting(index) => values.push(index), + } + } + self.presets.push(Preset { name, values }); + PresetIndex(self.presets.len() - 1) + } + + /// Compute the layout of the byte vector used to represent this settings + /// group. + /// + /// The byte vector contains the following entries in order: + /// + /// 1. Byte-sized settings like `NumSetting` and `EnumSetting`. + /// 2. `BoolSetting` settings. + /// 3. Precomputed named predicates. + /// 4. Other numbered predicates, including parent predicates that need to be accessible by + /// number. + /// + /// Set `self.settings_size` to the length of the byte vector prefix that + /// contains the settings. All bytes after that are computed, not + /// configured. + /// + /// Set `self.boolean_offset` to the beginning of the numbered predicates, + /// 2. in the list above. + /// + /// Assign `byte_offset` and `bit_offset` fields in all settings. + pub fn build(self) -> SettingGroup { + let mut group = SettingGroup { + name: self.name, + settings: Vec::new(), + bool_start_byte_offset: 0, + settings_size: 0, + presets: Vec::new(), + predicates: Vec::new(), + }; + + let mut byte_offset = 0; + + // Assign the non-boolean settings first. + for s in &self.settings { + let specific = match s.specific { + ProtoSpecificSetting::Bool(..) => continue, + ProtoSpecificSetting::Enum(ref values) => SpecificSetting::Enum(values.clone()), + ProtoSpecificSetting::Num(default) => SpecificSetting::Num(default), + }; + + group.settings.push(Setting { + name: s.name, + comment: s.comment, + byte_offset, + specific, + }); + + byte_offset += 1; + } + + group.bool_start_byte_offset = byte_offset; + + let mut predicate_number = 0; + + // Then the boolean settings. + for s in &self.settings { + let default = match s.specific { + ProtoSpecificSetting::Bool(default) => default, + ProtoSpecificSetting::Enum(_) | ProtoSpecificSetting::Num(_) => continue, + }; + group.settings.push(Setting { + name: s.name, + comment: s.comment, + byte_offset: byte_offset + predicate_number / 8, + specific: SpecificSetting::Bool(BoolSetting { + default, + bit_offset: predicate_number % 8, + predicate_number, + }), + }); + predicate_number += 1; + } + + assert!( + group.predicates.is_empty(), + "settings_size is the byte size before adding predicates" + ); + group.settings_size = group.byte_size(); + + // Sort predicates by name to ensure the same order as the Python code. + let mut predicates = self.predicates; + predicates.sort_by_key(|predicate| predicate.name); + + group + .predicates + .extend(predicates.into_iter().map(|predicate| { + let number = predicate_number; + predicate_number += 1; + Predicate { + name: predicate.name, + node: predicate.node, + number, + } + })); + + group.presets.extend(self.presets); + + group + } +} diff --git a/cranelift/codegen/meta/src/cdsl/type_inference.rs b/cranelift/codegen/meta/src/cdsl/type_inference.rs new file mode 100644 index 0000000000..25a07a9b84 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/type_inference.rs @@ -0,0 +1,660 @@ +use crate::cdsl::ast::{Def, DefIndex, DefPool, Var, VarIndex, VarPool}; +use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar}; + +use std::collections::{HashMap, HashSet}; +use std::iter::FromIterator; + +#[derive(Debug, Hash, PartialEq, Eq)] +pub(crate) enum Constraint { + /// Constraint specifying that a type var tv1 must be wider than or equal to type var tv2 at + /// runtime. This requires that: + /// 1) They have the same number of lanes + /// 2) In a lane tv1 has at least as many bits as tv2. + WiderOrEq(TypeVar, TypeVar), + + /// Constraint specifying that two derived type vars must have the same runtime type. + Eq(TypeVar, TypeVar), + + /// Constraint specifying that a type var must belong to some typeset. + InTypeset(TypeVar, TypeSet), +} + +impl Constraint { + fn translate_with TypeVar>(&self, func: F) -> Constraint { + match self { + Constraint::WiderOrEq(lhs, rhs) => { + let lhs = func(&lhs); + let rhs = func(&rhs); + Constraint::WiderOrEq(lhs, rhs) + } + Constraint::Eq(lhs, rhs) => { + let lhs = func(&lhs); + let rhs = func(&rhs); + Constraint::Eq(lhs, rhs) + } + Constraint::InTypeset(tv, ts) => { + let tv = func(&tv); + Constraint::InTypeset(tv, ts.clone()) + } + } + } + + /// Creates a new constraint by replacing type vars by their hashmap equivalent. + fn translate_with_map( + &self, + original_to_own_typevar: &HashMap<&TypeVar, TypeVar>, + ) -> Constraint { + self.translate_with(|tv| substitute(original_to_own_typevar, tv)) + } + + /// Creates a new constraint by replacing type vars by their canonical equivalent. + fn translate_with_env(&self, type_env: &TypeEnvironment) -> Constraint { + self.translate_with(|tv| type_env.get_equivalent(tv)) + } + + fn is_trivial(&self) -> bool { + match self { + Constraint::WiderOrEq(lhs, rhs) => { + // Trivially true. + if lhs == rhs { + return true; + } + + let ts1 = lhs.get_typeset(); + let ts2 = rhs.get_typeset(); + + // Trivially true. + if ts1.is_wider_or_equal(&ts2) { + return true; + } + + // Trivially false. + if ts1.is_narrower(&ts2) { + return true; + } + + // Trivially false. + if (&ts1.lanes & &ts2.lanes).is_empty() { + return true; + } + + self.is_concrete() + } + Constraint::Eq(lhs, rhs) => lhs == rhs || self.is_concrete(), + Constraint::InTypeset(_, _) => { + // The way InTypeset are made, they would always be trivial if we were applying the + // same logic as the Python code did, so ignore this. + self.is_concrete() + } + } + } + + /// Returns true iff all the referenced type vars are singletons. + fn is_concrete(&self) -> bool { + match self { + Constraint::WiderOrEq(lhs, rhs) => { + lhs.singleton_type().is_some() && rhs.singleton_type().is_some() + } + Constraint::Eq(lhs, rhs) => { + lhs.singleton_type().is_some() && rhs.singleton_type().is_some() + } + Constraint::InTypeset(tv, _) => tv.singleton_type().is_some(), + } + } + + fn typevar_args(&self) -> Vec<&TypeVar> { + match self { + Constraint::WiderOrEq(lhs, rhs) => vec![lhs, rhs], + Constraint::Eq(lhs, rhs) => vec![lhs, rhs], + Constraint::InTypeset(tv, _) => vec![tv], + } + } +} + +#[derive(Clone, Copy)] +enum TypeEnvRank { + Singleton = 5, + Input = 4, + Intermediate = 3, + Output = 2, + Temp = 1, + Internal = 0, +} + +/// Class encapsulating the necessary bookkeeping for type inference. +pub(crate) struct TypeEnvironment { + vars: HashSet, + ranks: HashMap, + equivalency_map: HashMap, + pub constraints: Vec, +} + +impl TypeEnvironment { + fn new() -> Self { + TypeEnvironment { + vars: HashSet::new(), + ranks: HashMap::new(), + equivalency_map: HashMap::new(), + constraints: Vec::new(), + } + } + + fn register(&mut self, var_index: VarIndex, var: &mut Var) { + self.vars.insert(var_index); + let rank = if var.is_input() { + TypeEnvRank::Input + } else if var.is_intermediate() { + TypeEnvRank::Intermediate + } else if var.is_output() { + TypeEnvRank::Output + } else { + assert!(var.is_temp()); + TypeEnvRank::Temp + }; + self.ranks.insert(var.get_or_create_typevar(), rank); + } + + fn add_constraint(&mut self, constraint: Constraint) { + if self.constraints.iter().any(|item| *item == constraint) { + return; + } + + // Check extra conditions for InTypeset constraints. + if let Constraint::InTypeset(tv, _) = &constraint { + assert!( + tv.base.is_none(), + "type variable is {:?}, while expecting none", + tv + ); + assert!( + tv.name.starts_with("typeof_"), + "Name \"{}\" should start with \"typeof_\"", + tv.name + ); + } + + self.constraints.push(constraint); + } + + /// Returns the canonical representative of the equivalency class of the given argument, or + /// duplicates it if it's not there yet. + pub fn get_equivalent(&self, tv: &TypeVar) -> TypeVar { + let mut tv = tv; + while let Some(found) = self.equivalency_map.get(tv) { + tv = found; + } + match &tv.base { + Some(parent) => self + .get_equivalent(&parent.type_var) + .derived(parent.derived_func), + None => tv.clone(), + } + } + + /// Get the rank of tv in the partial order: + /// - TVs directly associated with a Var get their rank from the Var (see register()). + /// - Internally generated non-derived TVs implicitly get the lowest rank (0). + /// - Derived variables get their rank from their free typevar. + /// - Singletons have the highest rank. + /// - TVs associated with vars in a source pattern have a higher rank than TVs associated with + /// temporary vars. + fn rank(&self, tv: &TypeVar) -> u8 { + let actual_tv = match tv.base { + Some(_) => tv.free_typevar(), + None => Some(tv.clone()), + }; + + let rank = match actual_tv { + Some(actual_tv) => match self.ranks.get(&actual_tv) { + Some(rank) => Some(*rank), + None => { + assert!( + !actual_tv.name.starts_with("typeof_"), + format!("variable {} should be explicitly ranked", actual_tv.name) + ); + None + } + }, + None => None, + }; + + let rank = match rank { + Some(rank) => rank, + None => { + if tv.singleton_type().is_some() { + TypeEnvRank::Singleton + } else { + TypeEnvRank::Internal + } + } + }; + + rank as u8 + } + + /// Record the fact that the free tv1 is part of the same equivalence class as tv2. The + /// canonical representative of the merged class is tv2's canonical representative. + fn record_equivalent(&mut self, tv1: TypeVar, tv2: TypeVar) { + assert!(tv1.base.is_none()); + assert!(self.get_equivalent(&tv1) == tv1); + if let Some(tv2_base) = &tv2.base { + // Ensure there are no cycles. + assert!(self.get_equivalent(&tv2_base.type_var) != tv1); + } + self.equivalency_map.insert(tv1, tv2); + } + + /// Get the free typevars in the current type environment. + pub fn free_typevars(&self, var_pool: &mut VarPool) -> Vec { + let mut typevars = Vec::new(); + typevars.extend(self.equivalency_map.keys().cloned()); + typevars.extend( + self.vars + .iter() + .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), + ); + + let set: HashSet = HashSet::from_iter( + typevars + .iter() + .map(|tv| self.get_equivalent(tv).free_typevar()) + .filter(|opt_tv| { + // Filter out singleton types. + opt_tv.is_some() + }) + .map(|tv| tv.unwrap()), + ); + Vec::from_iter(set) + } + + /// Normalize by collapsing any roots that don't correspond to a concrete type var AND have a + /// single type var derived from them or equivalent to them. + /// + /// e.g. if we have a root of the tree that looks like: + /// + /// typeof_a typeof_b + /// \\ / + /// typeof_x + /// | + /// half_width(1) + /// | + /// 1 + /// + /// we want to collapse the linear path between 1 and typeof_x. The resulting graph is: + /// + /// typeof_a typeof_b + /// \\ / + /// typeof_x + fn normalize(&mut self, var_pool: &mut VarPool) { + let source_tvs: HashSet = HashSet::from_iter( + self.vars + .iter() + .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), + ); + + let mut children: HashMap> = HashMap::new(); + + // Insert all the parents found by the derivation relationship. + for type_var in self.equivalency_map.values() { + if type_var.base.is_none() { + continue; + } + + let parent_tv = type_var.free_typevar(); + if parent_tv.is_none() { + // Ignore this type variable, it's a singleton. + continue; + } + let parent_tv = parent_tv.unwrap(); + + children + .entry(parent_tv) + .or_insert_with(HashSet::new) + .insert(type_var.clone()); + } + + // Insert all the explicit equivalency links. + for (equivalent_tv, canon_tv) in self.equivalency_map.iter() { + children + .entry(canon_tv.clone()) + .or_insert_with(HashSet::new) + .insert(equivalent_tv.clone()); + } + + // Remove links that are straight paths up to typevar of variables. + for free_root in self.free_typevars(var_pool) { + let mut root = &free_root; + while !source_tvs.contains(&root) + && children.contains_key(&root) + && children.get(&root).unwrap().len() == 1 + { + let child = children.get(&root).unwrap().iter().next().unwrap(); + assert_eq!(self.equivalency_map[child], root.clone()); + self.equivalency_map.remove(child); + root = child; + } + } + } + + /// Extract a clean type environment from self, that only mentions type vars associated with + /// real variables. + fn extract(self, var_pool: &mut VarPool) -> TypeEnvironment { + let vars_tv: HashSet = HashSet::from_iter( + self.vars + .iter() + .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), + ); + + let mut new_equivalency_map: HashMap = HashMap::new(); + for tv in &vars_tv { + let canon_tv = self.get_equivalent(tv); + if *tv != canon_tv { + new_equivalency_map.insert(tv.clone(), canon_tv.clone()); + } + + // Sanity check: the translated type map should only refer to real variables. + assert!(vars_tv.contains(tv)); + let canon_free_tv = canon_tv.free_typevar(); + assert!(canon_free_tv.is_none() || vars_tv.contains(&canon_free_tv.unwrap())); + } + + let mut new_constraints: HashSet = HashSet::new(); + for constraint in &self.constraints { + let constraint = constraint.translate_with_env(&self); + if constraint.is_trivial() || new_constraints.contains(&constraint) { + continue; + } + + // Sanity check: translated constraints should refer only to real variables. + for arg in constraint.typevar_args() { + let arg_free_tv = arg.free_typevar(); + assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap())); + } + + new_constraints.insert(constraint); + } + + TypeEnvironment { + vars: self.vars, + ranks: self.ranks, + equivalency_map: new_equivalency_map, + constraints: Vec::from_iter(new_constraints), + } + } +} + +/// Replaces an external type variable according to the following rules: +/// - if a local copy is present in the map, return it. +/// - or if it's derived, create a local derived one that recursively substitutes the parent. +/// - or return itself. +fn substitute(map: &HashMap<&TypeVar, TypeVar>, external_type_var: &TypeVar) -> TypeVar { + match map.get(&external_type_var) { + Some(own_type_var) => own_type_var.clone(), + None => match &external_type_var.base { + Some(parent) => { + let parent_substitute = substitute(map, &parent.type_var); + TypeVar::derived(&parent_substitute, parent.derived_func) + } + None => external_type_var.clone(), + }, + } +} + +/// Normalize a (potentially derived) typevar using the following rules: +/// +/// - vector and width derived functions commute +/// {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) -> +/// {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base)) +/// +/// - half/double pairs collapse +/// {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base +/// {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base +fn canonicalize_derivations(tv: TypeVar) -> TypeVar { + let base = match &tv.base { + Some(base) => base, + None => return tv, + }; + + let derived_func = base.derived_func; + + if let Some(base_base) = &base.type_var.base { + let base_base_tv = &base_base.type_var; + match (derived_func, base_base.derived_func) { + (DerivedFunc::HalfWidth, DerivedFunc::DoubleWidth) + | (DerivedFunc::DoubleWidth, DerivedFunc::HalfWidth) + | (DerivedFunc::HalfVector, DerivedFunc::DoubleVector) + | (DerivedFunc::DoubleVector, DerivedFunc::HalfVector) => { + // Cancelling bijective transformations. This doesn't hide any overflow issues + // since derived type sets are checked upon derivaion, and base typesets are only + // allowed to shrink. + return canonicalize_derivations(base_base_tv.clone()); + } + (DerivedFunc::HalfWidth, DerivedFunc::HalfVector) + | (DerivedFunc::HalfWidth, DerivedFunc::DoubleVector) + | (DerivedFunc::DoubleWidth, DerivedFunc::DoubleVector) + | (DerivedFunc::DoubleWidth, DerivedFunc::HalfVector) => { + // Arbitrarily put WIDTH derivations before VECTOR derivations, since they commute. + return canonicalize_derivations( + base_base_tv + .derived(derived_func) + .derived(base_base.derived_func), + ); + } + _ => {} + }; + } + + canonicalize_derivations(base.type_var.clone()).derived(derived_func) +} + +/// Given typevars tv1 and tv2 (which could be derived from one another), constrain their typesets +/// to be the same. When one is derived from the other, repeat the constrain process until +/// a fixed point is reached. +fn constrain_fixpoint(tv1: &TypeVar, tv2: &TypeVar) { + loop { + let old_tv1_ts = tv1.get_typeset().clone(); + tv2.constrain_types(tv1.clone()); + if tv1.get_typeset() == old_tv1_ts { + break; + } + } + + let old_tv2_ts = tv2.get_typeset(); + tv1.constrain_types(tv2.clone()); + // The above loop should ensure that all reference cycles have been handled. + assert!(old_tv2_ts == tv2.get_typeset()); +} + +/// Unify tv1 and tv2 in the given type environment. tv1 must have a rank greater or equal to tv2's +/// one, modulo commutations. +fn unify(tv1: &TypeVar, tv2: &TypeVar, type_env: &mut TypeEnvironment) -> Result<(), String> { + let tv1 = canonicalize_derivations(type_env.get_equivalent(tv1)); + let tv2 = canonicalize_derivations(type_env.get_equivalent(tv2)); + + if tv1 == tv2 { + // Already unified. + return Ok(()); + } + + if type_env.rank(&tv2) < type_env.rank(&tv1) { + // Make sure tv1 always has the smallest rank, since real variables have the higher rank + // and we want them to be the canonical representatives of their equivalency classes. + return unify(&tv2, &tv1, type_env); + } + + constrain_fixpoint(&tv1, &tv2); + + if tv1.get_typeset().size() == 0 || tv2.get_typeset().size() == 0 { + return Err(format!( + "Error: empty type created when unifying {} and {}", + tv1.name, tv2.name + )); + } + + let base = match &tv1.base { + Some(base) => base, + None => { + type_env.record_equivalent(tv1, tv2); + return Ok(()); + } + }; + + if let Some(inverse) = base.derived_func.inverse() { + return unify(&base.type_var, &tv2.derived(inverse), type_env); + } + + type_env.add_constraint(Constraint::Eq(tv1, tv2)); + Ok(()) +} + +/// Perform type inference on one Def in the current type environment and return an updated type +/// environment or error. +/// +/// At a high level this works by creating fresh copies of each formal type var in the Def's +/// instruction's signature, and unifying the formal typevar with the corresponding actual typevar. +fn infer_definition( + def: &Def, + var_pool: &mut VarPool, + type_env: TypeEnvironment, + last_type_index: &mut usize, +) -> Result { + let apply = &def.apply; + let inst = &apply.inst; + + let mut type_env = type_env; + let free_formal_tvs = inst.all_typevars(); + + let mut original_to_own_typevar: HashMap<&TypeVar, TypeVar> = HashMap::new(); + for &tv in &free_formal_tvs { + assert!(original_to_own_typevar + .insert( + tv, + TypeVar::copy_from(tv, format!("own_{}", last_type_index)) + ) + .is_none()); + *last_type_index += 1; + } + + // Update the mapping with any explicity bound type vars: + for (i, value_type) in apply.value_types.iter().enumerate() { + let singleton = TypeVar::new_singleton(value_type.clone()); + assert!(original_to_own_typevar + .insert(free_formal_tvs[i], singleton) + .is_some()); + } + + // Get fresh copies for each typevar in the signature (both free and derived). + let mut formal_tvs = Vec::new(); + formal_tvs.extend(inst.value_results.iter().map(|&i| { + substitute( + &original_to_own_typevar, + inst.operands_out[i].type_var().unwrap(), + ) + })); + formal_tvs.extend(inst.value_opnums.iter().map(|&i| { + substitute( + &original_to_own_typevar, + inst.operands_in[i].type_var().unwrap(), + ) + })); + + // Get the list of actual vars. + let mut actual_vars = Vec::new(); + actual_vars.extend(inst.value_results.iter().map(|&i| def.defined_vars[i])); + actual_vars.extend( + inst.value_opnums + .iter() + .map(|&i| apply.args[i].unwrap_var()), + ); + + // Get the list of the actual TypeVars. + let mut actual_tvs = Vec::new(); + for var_index in actual_vars { + let var = var_pool.get_mut(var_index); + type_env.register(var_index, var); + actual_tvs.push(var.get_or_create_typevar()); + } + + // Make sure we start unifying with the control type variable first, by putting it at the + // front of both vectors. + if let Some(poly) = &inst.polymorphic_info { + let own_ctrl_tv = &original_to_own_typevar[&poly.ctrl_typevar]; + let ctrl_index = formal_tvs.iter().position(|tv| tv == own_ctrl_tv).unwrap(); + if ctrl_index != 0 { + formal_tvs.swap(0, ctrl_index); + actual_tvs.swap(0, ctrl_index); + } + } + + // Unify each actual type variable with the corresponding formal type variable. + for (actual_tv, formal_tv) in actual_tvs.iter().zip(&formal_tvs) { + if let Err(msg) = unify(actual_tv, formal_tv, &mut type_env) { + return Err(format!( + "fail ti on {} <: {}: {}", + actual_tv.name, formal_tv.name, msg + )); + } + } + + // Add any instruction specific constraints. + for constraint in &inst.constraints { + type_env.add_constraint(constraint.translate_with_map(&original_to_own_typevar)); + } + + Ok(type_env) +} + +/// Perform type inference on an transformation. Return an updated type environment or error. +pub(crate) fn infer_transform( + src: DefIndex, + dst: &[DefIndex], + def_pool: &DefPool, + var_pool: &mut VarPool, +) -> Result { + let mut type_env = TypeEnvironment::new(); + let mut last_type_index = 0; + + // Execute type inference on the source pattern. + type_env = infer_definition(def_pool.get(src), var_pool, type_env, &mut last_type_index) + .map_err(|err| format!("In src pattern: {}", err))?; + + // Collect the type sets once after applying the source patterm; we'll compare the typesets + // after we've also considered the destination pattern, and will emit supplementary InTypeset + // checks if they don't match. + let src_typesets = type_env + .vars + .iter() + .map(|&var_index| { + let var = var_pool.get_mut(var_index); + let tv = type_env.get_equivalent(&var.get_or_create_typevar()); + (var_index, tv.get_typeset()) + }) + .collect::>(); + + // Execute type inference on the destination pattern. + for (i, &def_index) in dst.iter().enumerate() { + let def = def_pool.get(def_index); + type_env = infer_definition(def, var_pool, type_env, &mut last_type_index) + .map_err(|err| format!("line {}: {}", i, err))?; + } + + for (var_index, src_typeset) in src_typesets { + let var = var_pool.get(var_index); + if !var.has_free_typevar() { + continue; + } + let tv = type_env.get_equivalent(&var.get_typevar().unwrap()); + let new_typeset = tv.get_typeset(); + assert!( + new_typeset.is_subset(&src_typeset), + "type sets can only get narrower" + ); + if new_typeset != src_typeset { + type_env.add_constraint(Constraint::InTypeset(tv.clone(), new_typeset.clone())); + } + } + + type_env.normalize(var_pool); + + Ok(type_env.extract(var_pool)) +} diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs new file mode 100644 index 0000000000..d971f45c61 --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -0,0 +1,571 @@ +//! Cranelift ValueType hierarchy + +use std::fmt; + +use crate::shared::types as shared_types; +use cranelift_codegen_shared::constants; + +// Rust name prefix used for the `rust_name` method. +static _RUST_NAME_PREFIX: &str = "ir::types::"; + +// ValueType variants (i8, i32, ...) are provided in `shared::types.rs`. + +/// A concrete SSA value type. +/// +/// All SSA values have a type that is described by an instance of `ValueType` +/// or one of its subclasses. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub(crate) enum ValueType { + Lane(LaneType), + Reference(ReferenceType), + Special(SpecialType), + Vector(VectorType), +} + +impl ValueType { + /// Iterate through all of the lane types. + pub fn all_lane_types() -> LaneTypeIterator { + LaneTypeIterator::new() + } + + /// Iterate through all of the special types (neither lanes nor vectors). + pub fn all_special_types() -> SpecialTypeIterator { + SpecialTypeIterator::new() + } + + pub fn all_reference_types() -> ReferenceTypeIterator { + ReferenceTypeIterator::new() + } + + /// Return a string containing the documentation comment for this type. + pub fn doc(&self) -> String { + match *self { + ValueType::Lane(l) => l.doc(), + ValueType::Reference(r) => r.doc(), + ValueType::Special(s) => s.doc(), + ValueType::Vector(ref v) => v.doc(), + } + } + + /// Return the number of bits in a lane. + pub fn lane_bits(&self) -> u64 { + match *self { + ValueType::Lane(l) => l.lane_bits(), + ValueType::Reference(r) => r.lane_bits(), + ValueType::Special(s) => s.lane_bits(), + ValueType::Vector(ref v) => v.lane_bits(), + } + } + + /// Return the number of lanes. + pub fn lane_count(&self) -> u64 { + match *self { + ValueType::Vector(ref v) => v.lane_count(), + _ => 1, + } + } + + /// Find the number of bytes that this type occupies in memory. + pub fn membytes(&self) -> u64 { + self.width() / 8 + } + + /// Find the unique number associated with this type. + pub fn number(&self) -> Option { + match *self { + ValueType::Lane(l) => Some(l.number()), + ValueType::Reference(r) => Some(r.number()), + ValueType::Special(s) => Some(s.number()), + ValueType::Vector(ref v) => Some(v.number()), + } + } + + /// Return the name of this type for generated Rust source files. + pub fn rust_name(&self) -> String { + format!("{}{}", _RUST_NAME_PREFIX, self.to_string().to_uppercase()) + } + + /// Return true iff: + /// 1. self and other have equal number of lanes + /// 2. each lane in self has at least as many bits as a lane in other + pub fn _wider_or_equal(&self, rhs: &ValueType) -> bool { + (self.lane_count() == rhs.lane_count()) && (self.lane_bits() >= rhs.lane_bits()) + } + + /// Return the total number of bits of an instance of this type. + pub fn width(&self) -> u64 { + self.lane_count() * self.lane_bits() + } +} + +impl fmt::Display for ValueType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ValueType::Lane(l) => l.fmt(f), + ValueType::Reference(r) => r.fmt(f), + ValueType::Special(s) => s.fmt(f), + ValueType::Vector(ref v) => v.fmt(f), + } + } +} + +/// Create a ValueType from a given lane type. +impl From for ValueType { + fn from(lane: LaneType) -> Self { + ValueType::Lane(lane) + } +} + +/// Create a ValueType from a given reference type. +impl From for ValueType { + fn from(reference: ReferenceType) -> Self { + ValueType::Reference(reference) + } +} + +/// Create a ValueType from a given special type. +impl From for ValueType { + fn from(spec: SpecialType) -> Self { + ValueType::Special(spec) + } +} + +/// Create a ValueType from a given vector type. +impl From for ValueType { + fn from(vector: VectorType) -> Self { + ValueType::Vector(vector) + } +} + +/// A concrete scalar type that can appear as a vector lane too. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum LaneType { + Bool(shared_types::Bool), + Float(shared_types::Float), + Int(shared_types::Int), +} + +impl LaneType { + /// Return a string containing the documentation comment for this lane type. + pub fn doc(self) -> String { + match self { + LaneType::Bool(_) => format!("A boolean type with {} bits.", self.lane_bits()), + LaneType::Float(shared_types::Float::F32) => String::from( + "A 32-bit floating point type represented in the IEEE 754-2008 + *binary32* interchange format. This corresponds to the :c:type:`float` + type in most C implementations.", + ), + LaneType::Float(shared_types::Float::F64) => String::from( + "A 64-bit floating point type represented in the IEEE 754-2008 + *binary64* interchange format. This corresponds to the :c:type:`double` + type in most C implementations.", + ), + LaneType::Int(_) if self.lane_bits() < 32 => format!( + "An integer type with {} bits. + WARNING: arithmetic on {}bit integers is incomplete", + self.lane_bits(), + self.lane_bits() + ), + LaneType::Int(_) => format!("An integer type with {} bits.", self.lane_bits()), + } + } + + /// Return the number of bits in a lane. + pub fn lane_bits(self) -> u64 { + match self { + LaneType::Bool(ref b) => *b as u64, + LaneType::Float(ref f) => *f as u64, + LaneType::Int(ref i) => *i as u64, + } + } + + /// Find the unique number associated with this lane type. + pub fn number(self) -> u8 { + constants::LANE_BASE + + match self { + LaneType::Bool(shared_types::Bool::B1) => 0, + LaneType::Bool(shared_types::Bool::B8) => 1, + LaneType::Bool(shared_types::Bool::B16) => 2, + LaneType::Bool(shared_types::Bool::B32) => 3, + LaneType::Bool(shared_types::Bool::B64) => 4, + LaneType::Bool(shared_types::Bool::B128) => 5, + LaneType::Int(shared_types::Int::I8) => 6, + LaneType::Int(shared_types::Int::I16) => 7, + LaneType::Int(shared_types::Int::I32) => 8, + LaneType::Int(shared_types::Int::I64) => 9, + LaneType::Int(shared_types::Int::I128) => 10, + LaneType::Float(shared_types::Float::F32) => 11, + LaneType::Float(shared_types::Float::F64) => 12, + } + } + + pub fn bool_from_bits(num_bits: u16) -> LaneType { + LaneType::Bool(match num_bits { + 1 => shared_types::Bool::B1, + 8 => shared_types::Bool::B8, + 16 => shared_types::Bool::B16, + 32 => shared_types::Bool::B32, + 64 => shared_types::Bool::B64, + 128 => shared_types::Bool::B128, + _ => unreachable!("unxpected num bits for bool"), + }) + } + + pub fn int_from_bits(num_bits: u16) -> LaneType { + LaneType::Int(match num_bits { + 8 => shared_types::Int::I8, + 16 => shared_types::Int::I16, + 32 => shared_types::Int::I32, + 64 => shared_types::Int::I64, + 128 => shared_types::Int::I128, + _ => unreachable!("unxpected num bits for int"), + }) + } + + pub fn float_from_bits(num_bits: u16) -> LaneType { + LaneType::Float(match num_bits { + 32 => shared_types::Float::F32, + 64 => shared_types::Float::F64, + _ => unreachable!("unxpected num bits for float"), + }) + } + + pub fn by(self, lanes: u16) -> ValueType { + if lanes == 1 { + self.into() + } else { + ValueType::Vector(VectorType::new(self, lanes.into())) + } + } + + pub fn is_float(self) -> bool { + match self { + LaneType::Float(_) => true, + _ => false, + } + } + + pub fn is_int(self) -> bool { + match self { + LaneType::Int(_) => true, + _ => false, + } + } +} + +impl fmt::Display for LaneType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + LaneType::Bool(_) => write!(f, "b{}", self.lane_bits()), + LaneType::Float(_) => write!(f, "f{}", self.lane_bits()), + LaneType::Int(_) => write!(f, "i{}", self.lane_bits()), + } + } +} + +impl fmt::Debug for LaneType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let inner_msg = format!("bits={}", self.lane_bits()); + write!( + f, + "{}", + match *self { + LaneType::Bool(_) => format!("BoolType({})", inner_msg), + LaneType::Float(_) => format!("FloatType({})", inner_msg), + LaneType::Int(_) => format!("IntType({})", inner_msg), + } + ) + } +} + +/// Create a LaneType from a given bool variant. +impl From for LaneType { + fn from(b: shared_types::Bool) -> Self { + LaneType::Bool(b) + } +} + +/// Create a LaneType from a given float variant. +impl From for LaneType { + fn from(f: shared_types::Float) -> Self { + LaneType::Float(f) + } +} + +/// Create a LaneType from a given int variant. +impl From for LaneType { + fn from(i: shared_types::Int) -> Self { + LaneType::Int(i) + } +} + +/// An iterator for different lane types. +pub(crate) struct LaneTypeIterator { + bool_iter: shared_types::BoolIterator, + int_iter: shared_types::IntIterator, + float_iter: shared_types::FloatIterator, +} + +impl LaneTypeIterator { + /// Create a new lane type iterator. + fn new() -> Self { + Self { + bool_iter: shared_types::BoolIterator::new(), + int_iter: shared_types::IntIterator::new(), + float_iter: shared_types::FloatIterator::new(), + } + } +} + +impl Iterator for LaneTypeIterator { + type Item = LaneType; + fn next(&mut self) -> Option { + if let Some(b) = self.bool_iter.next() { + Some(LaneType::from(b)) + } else if let Some(i) = self.int_iter.next() { + Some(LaneType::from(i)) + } else if let Some(f) = self.float_iter.next() { + Some(LaneType::from(f)) + } else { + None + } + } +} + +/// A concrete SIMD vector type. +/// +/// A vector type has a lane type which is an instance of `LaneType`, +/// and a positive number of lanes. +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct VectorType { + base: LaneType, + lanes: u64, +} + +impl VectorType { + /// Initialize a new integer type with `n` bits. + pub fn new(base: LaneType, lanes: u64) -> Self { + Self { base, lanes } + } + + /// Return a string containing the documentation comment for this vector type. + pub fn doc(&self) -> String { + format!( + "A SIMD vector with {} lanes containing a `{}` each.", + self.lane_count(), + self.base + ) + } + + /// Return the number of bits in a lane. + pub fn lane_bits(&self) -> u64 { + self.base.lane_bits() + } + + /// Return the number of lanes. + pub fn lane_count(&self) -> u64 { + self.lanes + } + + /// Return the lane type. + pub fn lane_type(&self) -> LaneType { + self.base + } + + /// Find the unique number associated with this vector type. + /// + /// Vector types are encoded with the lane type in the low 4 bits and + /// log2(lanes) in the high 4 bits, giving a range of 2-256 lanes. + pub fn number(&self) -> u8 { + let lanes_log_2: u32 = 63 - self.lane_count().leading_zeros(); + let base_num = u32::from(self.base.number()); + let num = (lanes_log_2 << 4) + base_num; + num as u8 + } +} + +impl fmt::Display for VectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}x{}", self.base, self.lane_count()) + } +} + +impl fmt::Debug for VectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "VectorType(base={}, lanes={})", + self.base, + self.lane_count() + ) + } +} + +/// A concrete scalar type that is neither a vector nor a lane type. +/// +/// Special types cannot be used to form vectors. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum SpecialType { + Flag(shared_types::Flag), +} + +impl SpecialType { + /// Return a string containing the documentation comment for this special type. + pub fn doc(self) -> String { + match self { + SpecialType::Flag(shared_types::Flag::IFlags) => String::from( + "CPU flags representing the result of an integer comparison. These flags + can be tested with an :type:`intcc` condition code.", + ), + SpecialType::Flag(shared_types::Flag::FFlags) => String::from( + "CPU flags representing the result of a floating point comparison. These + flags can be tested with a :type:`floatcc` condition code.", + ), + } + } + + /// Return the number of bits in a lane. + pub fn lane_bits(self) -> u64 { + match self { + SpecialType::Flag(_) => 0, + } + } + + /// Find the unique number associated with this special type. + pub fn number(self) -> u8 { + match self { + SpecialType::Flag(shared_types::Flag::IFlags) => 1, + SpecialType::Flag(shared_types::Flag::FFlags) => 2, + } + } +} + +impl fmt::Display for SpecialType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + SpecialType::Flag(shared_types::Flag::IFlags) => write!(f, "iflags"), + SpecialType::Flag(shared_types::Flag::FFlags) => write!(f, "fflags"), + } + } +} + +impl fmt::Debug for SpecialType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match *self { + SpecialType::Flag(_) => format!("FlagsType({})", self), + } + ) + } +} + +impl From for SpecialType { + fn from(f: shared_types::Flag) -> Self { + SpecialType::Flag(f) + } +} + +pub(crate) struct SpecialTypeIterator { + flag_iter: shared_types::FlagIterator, +} + +impl SpecialTypeIterator { + fn new() -> Self { + Self { + flag_iter: shared_types::FlagIterator::new(), + } + } +} + +impl Iterator for SpecialTypeIterator { + type Item = SpecialType; + fn next(&mut self) -> Option { + if let Some(f) = self.flag_iter.next() { + Some(SpecialType::from(f)) + } else { + None + } + } +} + +/// Reference type is scalar type, but not lane type. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct ReferenceType(pub shared_types::Reference); + +impl ReferenceType { + /// Return a string containing the documentation comment for this reference type. + pub fn doc(self) -> String { + format!("An opaque reference type with {} bits.", self.lane_bits()) + } + + /// Return the number of bits in a lane. + pub fn lane_bits(self) -> u64 { + match self.0 { + shared_types::Reference::R32 => 32, + shared_types::Reference::R64 => 64, + } + } + + /// Find the unique number associated with this reference type. + pub fn number(self) -> u8 { + constants::REFERENCE_BASE + + match self { + ReferenceType(shared_types::Reference::R32) => 0, + ReferenceType(shared_types::Reference::R64) => 1, + } + } + + pub fn ref_from_bits(num_bits: u16) -> ReferenceType { + ReferenceType(match num_bits { + 32 => shared_types::Reference::R32, + 64 => shared_types::Reference::R64, + _ => unreachable!("unexpected number of bits for a reference type"), + }) + } +} + +impl fmt::Display for ReferenceType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "r{}", self.lane_bits()) + } +} + +impl fmt::Debug for ReferenceType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ReferenceType(bits={})", self.lane_bits()) + } +} + +/// Create a ReferenceType from a given reference variant. +impl From for ReferenceType { + fn from(r: shared_types::Reference) -> Self { + ReferenceType(r) + } +} + +/// An iterator for different reference types. +pub(crate) struct ReferenceTypeIterator { + reference_iter: shared_types::ReferenceIterator, +} + +impl ReferenceTypeIterator { + /// Create a new reference type iterator. + fn new() -> Self { + Self { + reference_iter: shared_types::ReferenceIterator::new(), + } + } +} + +impl Iterator for ReferenceTypeIterator { + type Item = ReferenceType; + fn next(&mut self) -> Option { + if let Some(r) = self.reference_iter.next() { + Some(ReferenceType::from(r)) + } else { + None + } + } +} diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs new file mode 100644 index 0000000000..302da4561f --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -0,0 +1,1222 @@ +use std::cell::RefCell; +use std::collections::{BTreeSet, HashSet}; +use std::fmt; +use std::hash; +use std::iter::FromIterator; +use std::ops; +use std::rc::Rc; + +use crate::cdsl::types::{LaneType, ReferenceType, SpecialType, ValueType}; + +const MAX_LANES: u16 = 256; +const MAX_BITS: u16 = 128; +const MAX_FLOAT_BITS: u16 = 64; + +/// Type variables can be used in place of concrete types when defining +/// instructions. This makes the instructions *polymorphic*. +/// +/// A type variable is restricted to vary over a subset of the value types. +/// This subset is specified by a set of flags that control the permitted base +/// types and whether the type variable can assume scalar or vector types, or +/// both. +#[derive(Debug)] +pub(crate) struct TypeVarContent { + /// Short name of type variable used in instruction descriptions. + pub name: String, + + /// Documentation string. + pub doc: String, + + /// Type set associated to the type variable. + /// This field must remain private; use `get_typeset()` or `get_raw_typeset()` to get the + /// information you want. + type_set: TypeSet, + + pub base: Option, +} + +#[derive(Clone, Debug)] +pub(crate) struct TypeVar { + content: Rc>, +} + +impl TypeVar { + pub fn new(name: impl Into, doc: impl Into, type_set: TypeSet) -> Self { + Self { + content: Rc::new(RefCell::new(TypeVarContent { + name: name.into(), + doc: doc.into(), + type_set, + base: None, + })), + } + } + + pub fn new_singleton(value_type: ValueType) -> Self { + let (name, doc) = (value_type.to_string(), value_type.doc()); + let mut builder = TypeSetBuilder::new(); + + let (scalar_type, num_lanes) = match value_type { + ValueType::Special(special_type) => { + return TypeVar::new(name, doc, builder.specials(vec![special_type]).build()); + } + ValueType::Reference(ReferenceType(reference_type)) => { + let bits = reference_type as RangeBound; + return TypeVar::new(name, doc, builder.refs(bits..bits).build()); + } + ValueType::Lane(lane_type) => (lane_type, 1), + ValueType::Vector(vec_type) => { + (vec_type.lane_type(), vec_type.lane_count() as RangeBound) + } + }; + + builder = builder.simd_lanes(num_lanes..num_lanes); + + let builder = match scalar_type { + LaneType::Int(int_type) => { + let bits = int_type as RangeBound; + builder.ints(bits..bits) + } + LaneType::Float(float_type) => { + let bits = float_type as RangeBound; + builder.floats(bits..bits) + } + LaneType::Bool(bool_type) => { + let bits = bool_type as RangeBound; + builder.bools(bits..bits) + } + }; + TypeVar::new(name, doc, builder.build()) + } + + /// Get a fresh copy of self, named after `name`. Can only be called on non-derived typevars. + pub fn copy_from(other: &TypeVar, name: String) -> TypeVar { + assert!( + other.base.is_none(), + "copy_from() can only be called on non-derived type variables" + ); + TypeVar { + content: Rc::new(RefCell::new(TypeVarContent { + name, + doc: "".into(), + type_set: other.type_set.clone(), + base: None, + })), + } + } + + /// Returns the typeset for this TV. If the TV is derived, computes it recursively from the + /// derived function and the base's typeset. + /// Note this can't be done non-lazily in the constructor, because the TypeSet of the base may + /// change over time. + pub fn get_typeset(&self) -> TypeSet { + match &self.base { + Some(base) => base.type_var.get_typeset().image(base.derived_func), + None => self.type_set.clone(), + } + } + + /// Returns this typevar's type set, assuming this type var has no parent. + pub fn get_raw_typeset(&self) -> &TypeSet { + assert_eq!(self.type_set, self.get_typeset()); + &self.type_set + } + + /// If the associated typeset has a single type return it. Otherwise return None. + pub fn singleton_type(&self) -> Option { + let type_set = self.get_typeset(); + if type_set.size() == 1 { + Some(type_set.get_singleton()) + } else { + None + } + } + + /// Get the free type variable controlling this one. + pub fn free_typevar(&self) -> Option { + match &self.base { + Some(base) => base.type_var.free_typevar(), + None => { + match self.singleton_type() { + // A singleton type isn't a proper free variable. + Some(_) => None, + None => Some(self.clone()), + } + } + } + } + + /// Create a type variable that is a function of another. + pub fn derived(&self, derived_func: DerivedFunc) -> TypeVar { + let ts = self.get_typeset(); + + // Safety checks to avoid over/underflows. + assert!(ts.specials.is_empty(), "can't derive from special types"); + match derived_func { + DerivedFunc::HalfWidth => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8, + "can't halve all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32, + "can't halve all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8, + "can't halve all boolean types" + ); + } + DerivedFunc::DoubleWidth => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS, + "can't double all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS, + "can't double all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS, + "can't double all boolean types" + ); + } + DerivedFunc::HalfVector => { + assert!( + *ts.lanes.iter().min().unwrap() > 1, + "can't halve a scalar type" + ); + } + DerivedFunc::DoubleVector => { + assert!( + *ts.lanes.iter().max().unwrap() < MAX_LANES, + "can't double 256 lanes" + ); + } + DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ } + } + + TypeVar { + content: Rc::new(RefCell::new(TypeVarContent { + name: format!("{}({})", derived_func.name(), self.name), + doc: "".into(), + type_set: ts, + base: Some(TypeVarParent { + type_var: self.clone(), + derived_func, + }), + })), + } + } + + pub fn lane_of(&self) -> TypeVar { + self.derived(DerivedFunc::LaneOf) + } + pub fn as_bool(&self) -> TypeVar { + self.derived(DerivedFunc::AsBool) + } + pub fn half_width(&self) -> TypeVar { + self.derived(DerivedFunc::HalfWidth) + } + pub fn double_width(&self) -> TypeVar { + self.derived(DerivedFunc::DoubleWidth) + } + pub fn half_vector(&self) -> TypeVar { + self.derived(DerivedFunc::HalfVector) + } + pub fn double_vector(&self) -> TypeVar { + self.derived(DerivedFunc::DoubleVector) + } + + /// Constrain the range of types this variable can assume to a subset of those in the typeset + /// ts. + /// May mutate itself if it's not derived, or its parent if it is. + pub fn constrain_types_by_ts(&self, type_set: TypeSet) { + match &self.base { + Some(base) => { + base.type_var + .constrain_types_by_ts(type_set.preimage(base.derived_func)); + } + None => { + self.content + .borrow_mut() + .type_set + .inplace_intersect_with(&type_set); + } + } + } + + /// Constrain the range of types this variable can assume to a subset of those `other` can + /// assume. + /// May mutate itself if it's not derived, or its parent if it is. + pub fn constrain_types(&self, other: TypeVar) { + if self == &other { + return; + } + self.constrain_types_by_ts(other.get_typeset()); + } + + /// Get a Rust expression that computes the type of this type variable. + pub fn to_rust_code(&self) -> String { + match &self.base { + Some(base) => format!( + "{}.{}()", + base.type_var.to_rust_code(), + base.derived_func.name() + ), + None => { + if let Some(singleton) = self.singleton_type() { + singleton.rust_name() + } else { + self.name.clone() + } + } + } + } +} + +impl Into for &TypeVar { + fn into(self) -> TypeVar { + self.clone() + } +} +impl Into for ValueType { + fn into(self) -> TypeVar { + TypeVar::new_singleton(self) + } +} + +// Hash TypeVars by pointers. +// There might be a better way to do this, but since TypeVar's content (namely TypeSet) can be +// mutated, it makes sense to use pointer equality/hashing here. +impl hash::Hash for TypeVar { + fn hash(&self, h: &mut H) { + match &self.base { + Some(base) => { + base.type_var.hash(h); + base.derived_func.hash(h); + } + None => { + (&**self as *const TypeVarContent).hash(h); + } + } + } +} + +impl PartialEq for TypeVar { + fn eq(&self, other: &TypeVar) -> bool { + match (&self.base, &other.base) { + (Some(base1), Some(base2)) => { + base1.type_var.eq(&base2.type_var) && base1.derived_func == base2.derived_func + } + (None, None) => Rc::ptr_eq(&self.content, &other.content), + _ => false, + } + } +} + +// Allow TypeVar as map keys, based on pointer equality (see also above PartialEq impl). +impl Eq for TypeVar {} + +impl ops::Deref for TypeVar { + type Target = TypeVarContent; + fn deref(&self) -> &Self::Target { + unsafe { self.content.as_ptr().as_ref().unwrap() } + } +} + +#[derive(Clone, Copy, Debug, Hash, PartialEq)] +pub(crate) enum DerivedFunc { + LaneOf, + AsBool, + HalfWidth, + DoubleWidth, + HalfVector, + DoubleVector, +} + +impl DerivedFunc { + pub fn name(self) -> &'static str { + match self { + DerivedFunc::LaneOf => "lane_of", + DerivedFunc::AsBool => "as_bool", + DerivedFunc::HalfWidth => "half_width", + DerivedFunc::DoubleWidth => "double_width", + DerivedFunc::HalfVector => "half_vector", + DerivedFunc::DoubleVector => "double_vector", + } + } + + /// Returns the inverse function of this one, if it is a bijection. + pub fn inverse(self) -> Option { + match self { + DerivedFunc::HalfWidth => Some(DerivedFunc::DoubleWidth), + DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth), + DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector), + DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector), + _ => None, + } + } +} + +#[derive(Debug, Hash)] +pub(crate) struct TypeVarParent { + pub type_var: TypeVar, + pub derived_func: DerivedFunc, +} + +/// A set of types. +/// +/// We don't allow arbitrary subsets of types, but use a parametrized approach +/// instead. +/// +/// Objects of this class can be used as dictionary keys. +/// +/// Parametrized type sets are specified in terms of ranges: +/// - The permitted range of vector lanes, where 1 indicates a scalar type. +/// - The permitted range of integer types. +/// - The permitted range of floating point types, and +/// - The permitted range of boolean types. +/// +/// The ranges are inclusive from smallest bit-width to largest bit-width. +/// +/// Finally, a type set can contain special types (derived from `SpecialType`) +/// which can't appear as lane types. + +type RangeBound = u16; +type Range = ops::Range; +type NumSet = BTreeSet; + +macro_rules! num_set { + ($($expr:expr),*) => { + NumSet::from_iter(vec![$($expr),*]) + }; +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct TypeSet { + pub lanes: NumSet, + pub ints: NumSet, + pub floats: NumSet, + pub bools: NumSet, + pub refs: NumSet, + pub specials: Vec, +} + +impl TypeSet { + fn new( + lanes: NumSet, + ints: NumSet, + floats: NumSet, + bools: NumSet, + refs: NumSet, + specials: Vec, + ) -> Self { + Self { + lanes, + ints, + floats, + bools, + refs, + specials, + } + } + + /// Return the number of concrete types represented by this typeset. + pub fn size(&self) -> usize { + self.lanes.len() + * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len()) + + self.specials.len() + } + + /// Return the image of self across the derived function func. + fn image(&self, derived_func: DerivedFunc) -> TypeSet { + match derived_func { + DerivedFunc::LaneOf => self.lane_of(), + DerivedFunc::AsBool => self.as_bool(), + DerivedFunc::HalfWidth => self.half_width(), + DerivedFunc::DoubleWidth => self.double_width(), + DerivedFunc::HalfVector => self.half_vector(), + DerivedFunc::DoubleVector => self.double_vector(), + } + } + + /// Return a TypeSet describing the image of self across lane_of. + fn lane_of(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = num_set![1]; + copy + } + + /// Return a TypeSet describing the image of self across as_bool. + fn as_bool(&self) -> TypeSet { + let mut copy = self.clone(); + copy.ints = NumSet::new(); + copy.floats = NumSet::new(); + copy.refs = NumSet::new(); + if !(&self.lanes - &num_set![1]).is_empty() { + copy.bools = &self.ints | &self.floats; + copy.bools = ©.bools | &self.bools; + } + if self.lanes.contains(&1) { + copy.bools.insert(1); + } + copy + } + + /// Return a TypeSet describing the image of self across halfwidth. + fn half_width(&self) -> TypeSet { + let mut copy = self.clone(); + copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x > 8).map(|&x| x / 2)); + copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 32).map(|&x| x / 2)); + copy.bools = NumSet::from_iter(self.bools.iter().filter(|&&x| x > 8).map(|&x| x / 2)); + copy.specials = Vec::new(); + copy + } + + /// Return a TypeSet describing the image of self across doublewidth. + fn double_width(&self) -> TypeSet { + let mut copy = self.clone(); + copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x < MAX_BITS).map(|&x| x * 2)); + copy.floats = NumSet::from_iter( + self.floats + .iter() + .filter(|&&x| x < MAX_FLOAT_BITS) + .map(|&x| x * 2), + ); + copy.bools = NumSet::from_iter( + self.bools + .iter() + .filter(|&&x| x < MAX_BITS) + .map(|&x| x * 2) + .filter(|x| legal_bool(*x)), + ); + copy.specials = Vec::new(); + copy + } + + /// Return a TypeSet describing the image of self across halfvector. + fn half_vector(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = NumSet::from_iter(self.lanes.iter().filter(|&&x| x > 1).map(|&x| x / 2)); + copy.specials = Vec::new(); + copy + } + + /// Return a TypeSet describing the image of self across doublevector. + fn double_vector(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = NumSet::from_iter( + self.lanes + .iter() + .filter(|&&x| x < MAX_LANES) + .map(|&x| x * 2), + ); + copy.specials = Vec::new(); + copy + } + + fn concrete_types(&self) -> Vec { + let mut ret = Vec::new(); + for &num_lanes in &self.lanes { + for &bits in &self.ints { + ret.push(LaneType::int_from_bits(bits).by(num_lanes)); + } + for &bits in &self.floats { + ret.push(LaneType::float_from_bits(bits).by(num_lanes)); + } + for &bits in &self.bools { + ret.push(LaneType::bool_from_bits(bits).by(num_lanes)); + } + for &bits in &self.refs { + ret.push(ReferenceType::ref_from_bits(bits).into()); + } + } + for &special in &self.specials { + ret.push(special.into()); + } + ret + } + + /// Return the singleton type represented by self. Can only call on typesets containing 1 type. + fn get_singleton(&self) -> ValueType { + let mut types = self.concrete_types(); + assert_eq!(types.len(), 1); + types.remove(0) + } + + /// Return the inverse image of self across the derived function func. + fn preimage(&self, func: DerivedFunc) -> TypeSet { + if self.size() == 0 { + // The inverse of the empty set is itself. + return self.clone(); + } + + match func { + DerivedFunc::LaneOf => { + let mut copy = self.clone(); + copy.lanes = + NumSet::from_iter((0..=MAX_LANES.trailing_zeros()).map(|i| u16::pow(2, i))); + copy + } + DerivedFunc::AsBool => { + let mut copy = self.clone(); + if self.bools.contains(&1) { + copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]); + copy.floats = NumSet::from_iter(vec![32, 64]); + } else { + copy.ints = &self.bools - &NumSet::from_iter(vec![1]); + copy.floats = &self.bools & &NumSet::from_iter(vec![32, 64]); + // If b1 is not in our typeset, than lanes=1 cannot be in the pre-image, as + // as_bool() of scalars is always b1. + copy.lanes = &self.lanes - &NumSet::from_iter(vec![1]); + } + copy + } + DerivedFunc::HalfWidth => self.double_width(), + DerivedFunc::DoubleWidth => self.half_width(), + DerivedFunc::HalfVector => self.double_vector(), + DerivedFunc::DoubleVector => self.half_vector(), + } + } + + pub fn inplace_intersect_with(&mut self, other: &TypeSet) { + self.lanes = &self.lanes & &other.lanes; + self.ints = &self.ints & &other.ints; + self.floats = &self.floats & &other.floats; + self.bools = &self.bools & &other.bools; + self.refs = &self.refs & &other.refs; + + let mut new_specials = Vec::new(); + for spec in &self.specials { + if let Some(spec) = other.specials.iter().find(|&other_spec| other_spec == spec) { + new_specials.push(*spec); + } + } + self.specials = new_specials; + } + + pub fn is_subset(&self, other: &TypeSet) -> bool { + self.lanes.is_subset(&other.lanes) + && self.ints.is_subset(&other.ints) + && self.floats.is_subset(&other.floats) + && self.bools.is_subset(&other.bools) + && self.refs.is_subset(&other.refs) + && { + let specials: HashSet = HashSet::from_iter(self.specials.clone()); + let other_specials = HashSet::from_iter(other.specials.clone()); + specials.is_subset(&other_specials) + } + } + + pub fn is_wider_or_equal(&self, other: &TypeSet) -> bool { + set_wider_or_equal(&self.ints, &other.ints) + && set_wider_or_equal(&self.floats, &other.floats) + && set_wider_or_equal(&self.bools, &other.bools) + && set_wider_or_equal(&self.refs, &other.refs) + } + + pub fn is_narrower(&self, other: &TypeSet) -> bool { + set_narrower(&self.ints, &other.ints) + && set_narrower(&self.floats, &other.floats) + && set_narrower(&self.bools, &other.bools) + && set_narrower(&self.refs, &other.refs) + } +} + +fn set_wider_or_equal(s1: &NumSet, s2: &NumSet) -> bool { + !s1.is_empty() && !s2.is_empty() && s1.iter().min() >= s2.iter().max() +} + +fn set_narrower(s1: &NumSet, s2: &NumSet) -> bool { + !s1.is_empty() && !s2.is_empty() && s1.iter().min() < s2.iter().max() +} + +impl fmt::Debug for TypeSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(fmt, "TypeSet(")?; + + let mut subsets = Vec::new(); + if !self.lanes.is_empty() { + subsets.push(format!( + "lanes={{{}}}", + Vec::from_iter(self.lanes.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.ints.is_empty() { + subsets.push(format!( + "ints={{{}}}", + Vec::from_iter(self.ints.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.floats.is_empty() { + subsets.push(format!( + "floats={{{}}}", + Vec::from_iter(self.floats.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.bools.is_empty() { + subsets.push(format!( + "bools={{{}}}", + Vec::from_iter(self.bools.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.refs.is_empty() { + subsets.push(format!( + "refs={{{}}}", + Vec::from_iter(self.refs.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.specials.is_empty() { + subsets.push(format!( + "specials={{{}}}", + Vec::from_iter(self.specials.iter().map(|x| x.to_string())).join(", ") + )); + } + + write!(fmt, "{})", subsets.join(", "))?; + Ok(()) + } +} + +pub(crate) struct TypeSetBuilder { + ints: Interval, + floats: Interval, + bools: Interval, + refs: Interval, + includes_scalars: bool, + simd_lanes: Interval, + specials: Vec, +} + +impl TypeSetBuilder { + pub fn new() -> Self { + Self { + ints: Interval::None, + floats: Interval::None, + bools: Interval::None, + refs: Interval::None, + includes_scalars: true, + simd_lanes: Interval::None, + specials: Vec::new(), + } + } + + pub fn ints(mut self, interval: impl Into) -> Self { + assert!(self.ints == Interval::None); + self.ints = interval.into(); + self + } + pub fn floats(mut self, interval: impl Into) -> Self { + assert!(self.floats == Interval::None); + self.floats = interval.into(); + self + } + pub fn bools(mut self, interval: impl Into) -> Self { + assert!(self.bools == Interval::None); + self.bools = interval.into(); + self + } + pub fn refs(mut self, interval: impl Into) -> Self { + assert!(self.refs == Interval::None); + self.refs = interval.into(); + self + } + pub fn includes_scalars(mut self, includes_scalars: bool) -> Self { + self.includes_scalars = includes_scalars; + self + } + pub fn simd_lanes(mut self, interval: impl Into) -> Self { + assert!(self.simd_lanes == Interval::None); + self.simd_lanes = interval.into(); + self + } + pub fn specials(mut self, specials: Vec) -> Self { + assert!(self.specials.is_empty()); + self.specials = specials; + self + } + + pub fn build(self) -> TypeSet { + let min_lanes = if self.includes_scalars { 1 } else { 2 }; + + let bools = range_to_set(self.bools.to_range(1..MAX_BITS, None)) + .into_iter() + .filter(|x| legal_bool(*x)) + .collect(); + + TypeSet::new( + range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))), + range_to_set(self.ints.to_range(8..MAX_BITS, None)), + range_to_set(self.floats.to_range(32..64, None)), + bools, + range_to_set(self.refs.to_range(32..64, None)), + self.specials, + ) + } + + pub fn all() -> TypeSet { + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .refs(Interval::All) + .simd_lanes(Interval::All) + .specials(ValueType::all_special_types().collect()) + .includes_scalars(true) + .build() + } +} + +#[derive(PartialEq)] +pub(crate) enum Interval { + None, + All, + Range(Range), +} + +impl Interval { + fn to_range(&self, full_range: Range, default: Option) -> Option { + match self { + Interval::None => { + if let Some(default_val) = default { + Some(default_val..default_val) + } else { + None + } + } + + Interval::All => Some(full_range), + + Interval::Range(range) => { + let (low, high) = (range.start, range.end); + assert!(low.is_power_of_two()); + assert!(high.is_power_of_two()); + assert!(low <= high); + assert!(low >= full_range.start); + assert!(high <= full_range.end); + Some(low..high) + } + } + } +} + +impl Into for Range { + fn into(self) -> Interval { + Interval::Range(self) + } +} + +fn legal_bool(bits: RangeBound) -> bool { + // Only allow legal bit widths for bool types. + bits == 1 || (bits >= 8 && bits <= MAX_BITS && bits.is_power_of_two()) +} + +/// Generates a set with all the powers of two included in the range. +fn range_to_set(range: Option) -> NumSet { + let mut set = NumSet::new(); + + let (low, high) = match range { + Some(range) => (range.start, range.end), + None => return set, + }; + + assert!(low.is_power_of_two()); + assert!(high.is_power_of_two()); + assert!(low <= high); + + for i in low.trailing_zeros()..=high.trailing_zeros() { + assert!(1 << i <= RangeBound::max_value()); + set.insert(1 << i); + } + set +} + +#[test] +fn test_typevar_builder() { + let type_set = TypeSetBuilder::new().ints(Interval::All).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.floats.is_empty()); + assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new().bools(Interval::All).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.floats.is_empty()); + assert!(type_set.ints.is_empty()); + assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new().floats(Interval::All).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(true) + .build(); + assert_eq!(type_set.lanes, num_set![1, 2, 4, 8, 16, 32, 64, 128, 256]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new().ints(16..64).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert_eq!(type_set.ints, num_set![16, 32, 64]); + assert!(type_set.floats.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); +} + +#[test] +#[should_panic] +fn test_typevar_builder_too_high_bound_panic() { + TypeSetBuilder::new().ints(16..2 * MAX_BITS).build(); +} + +#[test] +#[should_panic] +fn test_typevar_builder_inverted_bounds_panic() { + TypeSetBuilder::new().ints(32..16).build(); +} + +#[test] +fn test_as_bool() { + let a = TypeSetBuilder::new() + .simd_lanes(2..8) + .ints(8..8) + .floats(32..32) + .build(); + assert_eq!( + a.lane_of(), + TypeSetBuilder::new().ints(8..8).floats(32..32).build() + ); + + // Test as_bool with disjoint intervals. + let mut a_as_bool = TypeSetBuilder::new().simd_lanes(2..8).build(); + a_as_bool.bools = num_set![8, 32]; + assert_eq!(a.as_bool(), a_as_bool); + + let b = TypeSetBuilder::new() + .simd_lanes(1..8) + .ints(8..8) + .floats(32..32) + .build(); + let mut b_as_bool = TypeSetBuilder::new().simd_lanes(1..8).build(); + b_as_bool.bools = num_set![1, 8, 32]; + assert_eq!(b.as_bool(), b_as_bool); +} + +#[test] +fn test_forward_images() { + let empty_set = TypeSetBuilder::new().build(); + + // Half vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..32) + .build() + .half_vector(), + TypeSetBuilder::new().simd_lanes(1..16).build() + ); + + // Double vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..32) + .build() + .double_vector(), + TypeSetBuilder::new().simd_lanes(2..64).build() + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(128..256) + .build() + .double_vector(), + TypeSetBuilder::new().simd_lanes(256..256).build() + ); + + // Half width. + assert_eq!( + TypeSetBuilder::new().ints(8..32).build().half_width(), + TypeSetBuilder::new().ints(8..16).build() + ); + assert_eq!( + TypeSetBuilder::new().floats(32..32).build().half_width(), + empty_set + ); + assert_eq!( + TypeSetBuilder::new().floats(32..64).build().half_width(), + TypeSetBuilder::new().floats(32..32).build() + ); + assert_eq!( + TypeSetBuilder::new().bools(1..8).build().half_width(), + empty_set + ); + assert_eq!( + TypeSetBuilder::new().bools(1..32).build().half_width(), + TypeSetBuilder::new().bools(8..16).build() + ); + + // Double width. + assert_eq!( + TypeSetBuilder::new().ints(8..32).build().double_width(), + TypeSetBuilder::new().ints(16..64).build() + ); + assert_eq!( + TypeSetBuilder::new().ints(32..64).build().double_width(), + TypeSetBuilder::new().ints(64..128).build() + ); + assert_eq!( + TypeSetBuilder::new().floats(32..32).build().double_width(), + TypeSetBuilder::new().floats(64..64).build() + ); + assert_eq!( + TypeSetBuilder::new().floats(32..64).build().double_width(), + TypeSetBuilder::new().floats(64..64).build() + ); + assert_eq!( + TypeSetBuilder::new().bools(1..16).build().double_width(), + TypeSetBuilder::new().bools(16..32).build() + ); + assert_eq!( + TypeSetBuilder::new().bools(32..64).build().double_width(), + TypeSetBuilder::new().bools(64..128).build() + ); +} + +#[test] +fn test_backward_images() { + let empty_set = TypeSetBuilder::new().build(); + + // LaneOf. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..1) + .ints(8..8) + .floats(32..32) + .build() + .preimage(DerivedFunc::LaneOf), + TypeSetBuilder::new() + .simd_lanes(Interval::All) + .ints(8..8) + .floats(32..32) + .build() + ); + assert_eq!(empty_set.preimage(DerivedFunc::LaneOf), empty_set); + + // AsBool. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..4) + .bools(1..128) + .build() + .preimage(DerivedFunc::AsBool), + TypeSetBuilder::new() + .simd_lanes(1..4) + .ints(Interval::All) + .bools(Interval::All) + .floats(Interval::All) + .build() + ); + + // Double vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..1) + .ints(8..8) + .build() + .preimage(DerivedFunc::DoubleVector) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..16) + .ints(8..16) + .floats(32..32) + .build() + .preimage(DerivedFunc::DoubleVector), + TypeSetBuilder::new() + .simd_lanes(1..8) + .ints(8..16) + .floats(32..32) + .build(), + ); + + // Half vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(256..256) + .ints(8..8) + .build() + .preimage(DerivedFunc::HalfVector) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(64..128) + .bools(1..32) + .build() + .preimage(DerivedFunc::HalfVector), + TypeSetBuilder::new() + .simd_lanes(128..256) + .bools(1..32) + .build(), + ); + + // Half width. + assert_eq!( + TypeSetBuilder::new() + .ints(128..128) + .floats(64..64) + .bools(128..128) + .build() + .preimage(DerivedFunc::HalfWidth) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(64..256) + .bools(1..64) + .build() + .preimage(DerivedFunc::HalfWidth), + TypeSetBuilder::new() + .simd_lanes(64..256) + .bools(16..128) + .build(), + ); + + // Double width. + assert_eq!( + TypeSetBuilder::new() + .ints(8..8) + .floats(32..32) + .bools(1..8) + .build() + .preimage(DerivedFunc::DoubleWidth) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..16) + .ints(8..16) + .floats(32..64) + .build() + .preimage(DerivedFunc::DoubleWidth), + TypeSetBuilder::new() + .simd_lanes(1..16) + .ints(8..8) + .floats(32..32) + .build() + ); +} + +#[test] +#[should_panic] +fn test_typeset_singleton_panic_nonsingleton_types() { + TypeSetBuilder::new() + .ints(8..8) + .floats(32..32) + .build() + .get_singleton(); +} + +#[test] +#[should_panic] +fn test_typeset_singleton_panic_nonsingleton_lanes() { + TypeSetBuilder::new() + .simd_lanes(1..2) + .floats(32..32) + .build() + .get_singleton(); +} + +#[test] +fn test_typeset_singleton() { + use crate::shared::types as shared_types; + assert_eq!( + TypeSetBuilder::new().ints(16..16).build().get_singleton(), + ValueType::Lane(shared_types::Int::I16.into()) + ); + assert_eq!( + TypeSetBuilder::new().floats(64..64).build().get_singleton(), + ValueType::Lane(shared_types::Float::F64.into()) + ); + assert_eq!( + TypeSetBuilder::new().bools(1..1).build().get_singleton(), + ValueType::Lane(shared_types::Bool::B1.into()) + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(4..4) + .ints(32..32) + .build() + .get_singleton(), + LaneType::from(shared_types::Int::I32).by(4) + ); +} + +#[test] +fn test_typevar_functions() { + let x = TypeVar::new( + "x", + "i16 and up", + TypeSetBuilder::new().ints(16..64).build(), + ); + assert_eq!(x.half_width().name, "half_width(x)"); + assert_eq!( + x.half_width().double_width().name, + "double_width(half_width(x))" + ); + + let x = TypeVar::new("x", "up to i32", TypeSetBuilder::new().ints(8..32).build()); + assert_eq!(x.double_width().name, "double_width(x)"); +} + +#[test] +fn test_typevar_singleton() { + use crate::cdsl::types::VectorType; + use crate::shared::types as shared_types; + + // Test i32. + let typevar = TypeVar::new_singleton(ValueType::Lane(LaneType::Int(shared_types::Int::I32))); + assert_eq!(typevar.name, "i32"); + assert_eq!(typevar.type_set.ints, num_set![32]); + assert!(typevar.type_set.floats.is_empty()); + assert!(typevar.type_set.bools.is_empty()); + assert!(typevar.type_set.specials.is_empty()); + assert_eq!(typevar.type_set.lanes, num_set![1]); + + // Test f32x4. + let typevar = TypeVar::new_singleton(ValueType::Vector(VectorType::new( + LaneType::Float(shared_types::Float::F32), + 4, + ))); + assert_eq!(typevar.name, "f32x4"); + assert!(typevar.type_set.ints.is_empty()); + assert_eq!(typevar.type_set.floats, num_set![32]); + assert_eq!(typevar.type_set.lanes, num_set![4]); + assert!(typevar.type_set.bools.is_empty()); + assert!(typevar.type_set.specials.is_empty()); +} diff --git a/cranelift/codegen/meta/src/cdsl/xform.rs b/cranelift/codegen/meta/src/cdsl/xform.rs new file mode 100644 index 0000000000..d21e93128d --- /dev/null +++ b/cranelift/codegen/meta/src/cdsl/xform.rs @@ -0,0 +1,484 @@ +use crate::cdsl::ast::{ + Apply, BlockPool, ConstPool, DefIndex, DefPool, DummyDef, DummyExpr, Expr, PatternPosition, + VarIndex, VarPool, +}; +use crate::cdsl::instructions::Instruction; +use crate::cdsl::type_inference::{infer_transform, TypeEnvironment}; +use crate::cdsl::typevar::TypeVar; + +use cranelift_entity::{entity_impl, PrimaryMap}; + +use std::collections::{HashMap, HashSet}; +use std::iter::FromIterator; + +/// An instruction transformation consists of a source and destination pattern. +/// +/// Patterns are expressed in *register transfer language* as tuples of Def or Expr nodes. A +/// pattern may optionally have a sequence of TypeConstraints, that additionally limit the set of +/// cases when it applies. +/// +/// The source pattern can contain only a single instruction. +pub(crate) struct Transform { + pub src: DefIndex, + pub dst: Vec, + pub var_pool: VarPool, + pub def_pool: DefPool, + pub block_pool: BlockPool, + pub const_pool: ConstPool, + pub type_env: TypeEnvironment, +} + +type SymbolTable = HashMap; + +impl Transform { + fn new(src: DummyDef, dst: Vec) -> Self { + let mut var_pool = VarPool::new(); + let mut def_pool = DefPool::new(); + let mut block_pool = BlockPool::new(); + let mut const_pool = ConstPool::new(); + + let mut input_vars: Vec = Vec::new(); + let mut defined_vars: Vec = Vec::new(); + + // Maps variable names to our own Var copies. + let mut symbol_table: SymbolTable = SymbolTable::new(); + + // Rewrite variables in src and dst using our own copies. + let src = rewrite_def_list( + PatternPosition::Source, + vec![src], + &mut symbol_table, + &mut input_vars, + &mut defined_vars, + &mut var_pool, + &mut def_pool, + &mut block_pool, + &mut const_pool, + )[0]; + + let num_src_inputs = input_vars.len(); + + let dst = rewrite_def_list( + PatternPosition::Destination, + dst, + &mut symbol_table, + &mut input_vars, + &mut defined_vars, + &mut var_pool, + &mut def_pool, + &mut block_pool, + &mut const_pool, + ); + + // Sanity checks. + for &var_index in &input_vars { + assert!( + var_pool.get(var_index).is_input(), + format!("'{:?}' used as both input and def", var_pool.get(var_index)) + ); + } + assert!( + input_vars.len() == num_src_inputs, + format!( + "extra input vars in dst pattern: {:?}", + input_vars + .iter() + .map(|&i| var_pool.get(i)) + .skip(num_src_inputs) + .collect::>() + ) + ); + + // Perform type inference and cleanup. + let type_env = infer_transform(src, &dst, &def_pool, &mut var_pool).unwrap(); + + // Sanity check: the set of inferred free type variables should be a subset of the type + // variables corresponding to Vars appearing in the source pattern. + { + let free_typevars: HashSet = + HashSet::from_iter(type_env.free_typevars(&mut var_pool)); + let src_tvs = HashSet::from_iter( + input_vars + .clone() + .iter() + .chain( + defined_vars + .iter() + .filter(|&&var_index| !var_pool.get(var_index).is_temp()), + ) + .map(|&var_index| var_pool.get(var_index).get_typevar()) + .filter(|maybe_var| maybe_var.is_some()) + .map(|var| var.unwrap()), + ); + if !free_typevars.is_subset(&src_tvs) { + let missing_tvs = (&free_typevars - &src_tvs) + .iter() + .map(|tv| tv.name.clone()) + .collect::>() + .join(", "); + panic!("Some free vars don't appear in src: {}", missing_tvs); + } + } + + for &var_index in input_vars.iter().chain(defined_vars.iter()) { + let var = var_pool.get_mut(var_index); + let canon_tv = type_env.get_equivalent(&var.get_or_create_typevar()); + var.set_typevar(canon_tv); + } + + Self { + src, + dst, + var_pool, + def_pool, + block_pool, + const_pool, + type_env, + } + } + + fn verify_legalize(&self) { + let def = self.def_pool.get(self.src); + for &var_index in def.defined_vars.iter() { + let defined_var = self.var_pool.get(var_index); + assert!( + defined_var.is_output(), + format!("{:?} not defined in the destination pattern", defined_var) + ); + } + } +} + +/// Inserts, if not present, a name in the `symbol_table`. Then returns its index in the variable +/// pool `var_pool`. If the variable was not present in the symbol table, then add it to the list of +/// `defined_vars`. +fn var_index( + name: &str, + symbol_table: &mut SymbolTable, + defined_vars: &mut Vec, + var_pool: &mut VarPool, +) -> VarIndex { + let name = name.to_string(); + match symbol_table.get(&name) { + Some(&existing_var) => existing_var, + None => { + // Materialize the variable. + let new_var = var_pool.create(name.clone()); + symbol_table.insert(name, new_var); + defined_vars.push(new_var); + new_var + } + } +} + +/// Given a list of symbols defined in a Def, rewrite them to local symbols. Yield the new locals. +fn rewrite_defined_vars( + position: PatternPosition, + dummy_def: &DummyDef, + def_index: DefIndex, + symbol_table: &mut SymbolTable, + defined_vars: &mut Vec, + var_pool: &mut VarPool, +) -> Vec { + let mut new_defined_vars = Vec::new(); + for var in &dummy_def.defined_vars { + let own_var = var_index(&var.name, symbol_table, defined_vars, var_pool); + var_pool.get_mut(own_var).set_def(position, def_index); + new_defined_vars.push(own_var); + } + new_defined_vars +} + +/// Find all uses of variables in `expr` and replace them with our own local symbols. +fn rewrite_expr( + position: PatternPosition, + dummy_expr: DummyExpr, + symbol_table: &mut SymbolTable, + input_vars: &mut Vec, + var_pool: &mut VarPool, + const_pool: &mut ConstPool, +) -> Apply { + let (apply_target, dummy_args) = if let DummyExpr::Apply(apply_target, dummy_args) = dummy_expr + { + (apply_target, dummy_args) + } else { + panic!("we only rewrite apply expressions"); + }; + + assert_eq!( + apply_target.inst().operands_in.len(), + dummy_args.len(), + "number of arguments in instruction {} is incorrect\nexpected: {:?}", + apply_target.inst().name, + apply_target + .inst() + .operands_in + .iter() + .map(|operand| format!("{}: {}", operand.name, operand.kind.rust_type)) + .collect::>(), + ); + + let mut args = Vec::new(); + for (i, arg) in dummy_args.into_iter().enumerate() { + match arg { + DummyExpr::Var(var) => { + let own_var = var_index(&var.name, symbol_table, input_vars, var_pool); + let var = var_pool.get(own_var); + assert!( + var.is_input() || var.get_def(position).is_some(), + format!("{:?} used as both input and def", var) + ); + args.push(Expr::Var(own_var)); + } + DummyExpr::Literal(literal) => { + assert!(!apply_target.inst().operands_in[i].is_value()); + args.push(Expr::Literal(literal)); + } + DummyExpr::Constant(constant) => { + let const_name = const_pool.insert(constant.0); + // Here we abuse var_index by passing an empty, immediately-dropped vector to + // `defined_vars`; the reason for this is that unlike the `Var` case above, + // constants will create a variable that is not an input variable (it is tracked + // instead by ConstPool). + let const_var = var_index(&const_name, symbol_table, &mut vec![], var_pool); + args.push(Expr::Var(const_var)); + } + DummyExpr::Apply(..) => { + panic!("Recursive apply is not allowed."); + } + DummyExpr::Block(_block) => { + panic!("Blocks are not valid arguments."); + } + } + } + + Apply::new(apply_target, args) +} + +#[allow(clippy::too_many_arguments)] +fn rewrite_def_list( + position: PatternPosition, + dummy_defs: Vec, + symbol_table: &mut SymbolTable, + input_vars: &mut Vec, + defined_vars: &mut Vec, + var_pool: &mut VarPool, + def_pool: &mut DefPool, + block_pool: &mut BlockPool, + const_pool: &mut ConstPool, +) -> Vec { + let mut new_defs = Vec::new(); + // Register variable names of new blocks first as a block name can be used to jump forward. Thus + // the name has to be registered first to avoid misinterpreting it as an input-var. + for dummy_def in dummy_defs.iter() { + if let DummyExpr::Block(ref var) = dummy_def.expr { + var_index(&var.name, symbol_table, defined_vars, var_pool); + } + } + + // Iterate over the definitions and blocks, to map variables names to inputs or outputs. + for dummy_def in dummy_defs { + let def_index = def_pool.next_index(); + + let new_defined_vars = rewrite_defined_vars( + position, + &dummy_def, + def_index, + symbol_table, + defined_vars, + var_pool, + ); + if let DummyExpr::Block(var) = dummy_def.expr { + let var_index = *symbol_table + .get(&var.name) + .or_else(|| { + panic!( + "Block {} was not registered during the first visit", + var.name + ) + }) + .unwrap(); + var_pool.get_mut(var_index).set_def(position, def_index); + block_pool.create_block(var_index, def_index); + } else { + let new_apply = rewrite_expr( + position, + dummy_def.expr, + symbol_table, + input_vars, + var_pool, + const_pool, + ); + + assert!( + def_pool.next_index() == def_index, + "shouldn't have created new defs in the meanwhile" + ); + assert_eq!( + new_apply.inst.value_results.len(), + new_defined_vars.len(), + "number of Var results in instruction is incorrect" + ); + + new_defs.push(def_pool.create_inst(new_apply, new_defined_vars)); + } + } + new_defs +} + +/// A group of related transformations. +pub(crate) struct TransformGroup { + pub name: &'static str, + pub doc: &'static str, + pub chain_with: Option, + pub isa_name: Option<&'static str>, + pub id: TransformGroupIndex, + + /// Maps Instruction camel_case names to custom legalization functions names. + pub custom_legalizes: HashMap, + pub transforms: Vec, +} + +impl TransformGroup { + pub fn rust_name(&self) -> String { + match self.isa_name { + Some(_) => { + // This is a function in the same module as the LEGALIZE_ACTIONS table referring to + // it. + self.name.to_string() + } + None => format!("crate::legalizer::{}", self.name), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct TransformGroupIndex(u32); +entity_impl!(TransformGroupIndex); + +pub(crate) struct TransformGroupBuilder { + name: &'static str, + doc: &'static str, + chain_with: Option, + isa_name: Option<&'static str>, + pub custom_legalizes: HashMap, + pub transforms: Vec, +} + +impl TransformGroupBuilder { + pub fn new(name: &'static str, doc: &'static str) -> Self { + Self { + name, + doc, + chain_with: None, + isa_name: None, + custom_legalizes: HashMap::new(), + transforms: Vec::new(), + } + } + + pub fn chain_with(mut self, next_id: TransformGroupIndex) -> Self { + assert!(self.chain_with.is_none()); + self.chain_with = Some(next_id); + self + } + + pub fn isa(mut self, isa_name: &'static str) -> Self { + assert!(self.isa_name.is_none()); + self.isa_name = Some(isa_name); + self + } + + /// Add a custom legalization action for `inst`. + /// + /// The `func_name` parameter is the fully qualified name of a Rust function which takes the + /// same arguments as the `isa::Legalize` actions. + /// + /// The custom function will be called to legalize `inst` and any return value is ignored. + pub fn custom_legalize(&mut self, inst: &Instruction, func_name: &'static str) { + assert!( + self.custom_legalizes + .insert(inst.camel_name.clone(), func_name) + .is_none(), + format!( + "custom legalization action for {} inserted twice", + inst.name + ) + ); + } + + /// Add a legalization pattern to this group. + pub fn legalize(&mut self, src: DummyDef, dst: Vec) { + let transform = Transform::new(src, dst); + transform.verify_legalize(); + self.transforms.push(transform); + } + + pub fn build_and_add_to(self, owner: &mut TransformGroups) -> TransformGroupIndex { + let next_id = owner.next_key(); + owner.add(TransformGroup { + name: self.name, + doc: self.doc, + isa_name: self.isa_name, + id: next_id, + chain_with: self.chain_with, + custom_legalizes: self.custom_legalizes, + transforms: self.transforms, + }) + } +} + +pub(crate) struct TransformGroups { + groups: PrimaryMap, +} + +impl TransformGroups { + pub fn new() -> Self { + Self { + groups: PrimaryMap::new(), + } + } + pub fn add(&mut self, new_group: TransformGroup) -> TransformGroupIndex { + for group in self.groups.values() { + assert!( + group.name != new_group.name, + format!("trying to insert {} for the second time", new_group.name) + ); + } + self.groups.push(new_group) + } + pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup { + &self.groups[id] + } + fn next_key(&self) -> TransformGroupIndex { + self.groups.next_key() + } + pub fn by_name(&self, name: &'static str) -> &TransformGroup { + for group in self.groups.values() { + if group.name == name { + return group; + } + } + panic!(format!("transform group with name {} not found", name)); + } +} + +#[test] +#[should_panic] +fn test_double_custom_legalization() { + use crate::cdsl::formats::InstructionFormatBuilder; + use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder}; + + let nullary = InstructionFormatBuilder::new("nullary").build(); + + let mut dummy_all = AllInstructions::new(); + let mut inst_group = InstructionGroupBuilder::new(&mut dummy_all); + inst_group.push(InstructionBuilder::new("dummy", "doc", &nullary)); + + let inst_group = inst_group.build(); + let dummy_inst = inst_group.by_name("dummy"); + + let mut transform_group = TransformGroupBuilder::new("test", "doc"); + transform_group.custom_legalize(&dummy_inst, "custom 1"); + transform_group.custom_legalize(&dummy_inst, "custom 2"); +} diff --git a/cranelift/codegen/meta/src/default_map.rs b/cranelift/codegen/meta/src/default_map.rs new file mode 100644 index 0000000000..3a2be05dac --- /dev/null +++ b/cranelift/codegen/meta/src/default_map.rs @@ -0,0 +1,20 @@ +//! Trait for extending `HashMap` with `get_or_default`. +use std::collections::HashMap; +use std::hash::Hash; + +pub(crate) trait MapWithDefault { + fn get_or_default(&mut self, k: K) -> &mut V; +} + +impl MapWithDefault for HashMap { + fn get_or_default(&mut self, k: K) -> &mut V { + self.entry(k).or_insert_with(V::default) + } +} + +#[test] +fn test_default() { + let mut hash_map = HashMap::new(); + hash_map.insert(42, "hello"); + assert_eq!(*hash_map.get_or_default(43), ""); +} diff --git a/cranelift/codegen/meta/src/error.rs b/cranelift/codegen/meta/src/error.rs new file mode 100644 index 0000000000..4cbf3d8285 --- /dev/null +++ b/cranelift/codegen/meta/src/error.rs @@ -0,0 +1,48 @@ +//! Error returned during meta code-generation. +use std::fmt; +use std::io; + +/// An error that occurred when the cranelift_codegen_meta crate was generating +/// source files for the cranelift_codegen crate. +#[derive(Debug)] +pub struct Error { + inner: Box, +} + +impl Error { + /// Create a new error object with the given message. + pub fn with_msg>(msg: S) -> Error { + Error { + inner: Box::new(ErrorInner::Msg(msg.into())), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.inner) + } +} + +impl From for Error { + fn from(e: io::Error) -> Self { + Error { + inner: Box::new(ErrorInner::IoError(e)), + } + } +} + +#[derive(Debug)] +enum ErrorInner { + Msg(String), + IoError(io::Error), +} + +impl fmt::Display for ErrorInner { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ErrorInner::Msg(ref s) => write!(f, "{}", s), + ErrorInner::IoError(ref e) => write!(f, "{}", e), + } + } +} diff --git a/cranelift/codegen/meta/src/gen_binemit.rs b/cranelift/codegen/meta/src/gen_binemit.rs new file mode 100644 index 0000000000..f67aa9b5a9 --- /dev/null +++ b/cranelift/codegen/meta/src/gen_binemit.rs @@ -0,0 +1,224 @@ +//! Generate binary emission code for each ISA. + +use cranelift_entity::EntityRef; + +use crate::error; +use crate::srcgen::Formatter; + +use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes}; + +/// Generate code to handle a single recipe. +/// +/// - Unpack the instruction data, knowing the format. +/// - Determine register locations for operands with register constraints. +/// - Determine stack slot locations for operands with stack constraints. +/// - Call hand-written code for the actual emission. +fn gen_recipe(recipe: &EncodingRecipe, fmt: &mut Formatter) { + let inst_format = &recipe.format; + let num_value_ops = inst_format.num_value_operands; + + // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value + // list. + let want_args = inst_format.has_value_list + || recipe.operands_in.iter().any(|c| match c { + OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, + OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, + }); + assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list); + + let want_outs = recipe.operands_out.iter().any(|c| match c { + OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, + OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, + }); + + let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name); + + // Unpack the instruction data. + fmtln!(fmt, "if let InstructionData::{} {{", inst_format.name); + fmt.indent(|fmt| { + fmt.line("opcode,"); + for f in &inst_format.imm_fields { + fmtln!(fmt, "{},", f.member); + } + if want_args { + if inst_format.has_value_list || num_value_ops > 1 { + fmt.line("ref args,"); + } else { + fmt.line("arg,"); + } + } + fmt.line(".."); + + fmt.outdented_line("} = *inst_data {"); + + // Pass recipe arguments in this order: inputs, imm_fields, outputs. + let mut args = String::new(); + + if want_args && !is_regmove { + if inst_format.has_value_list { + fmt.line("let args = args.as_slice(&func.dfg.value_lists);"); + } else if num_value_ops == 1 { + fmt.line("let args = [arg];"); + } + args += &unwrap_values(&recipe.operands_in, "in", "args", fmt); + } + + for f in &inst_format.imm_fields { + args += &format!(", {}", f.member); + } + + // Unwrap interesting output arguments. + if want_outs { + if recipe.operands_out.len() == 1 { + fmt.line("let results = [func.dfg.first_result(inst)];") + } else { + fmt.line("let results = func.dfg.inst_results(inst);"); + } + args += &unwrap_values(&recipe.operands_out, "out", "results", fmt); + } + + // Optimization: Only update the register diversion tracker for regmove instructions. + if is_regmove { + fmt.line("divert.apply(inst_data);") + } + + match &recipe.emit { + Some(emit) => { + fmt.multi_line(emit); + fmt.line("return;"); + } + None => { + fmtln!( + fmt, + "return recipe_{}(func, inst, sink, bits{});", + recipe.name.to_lowercase(), + args + ); + } + } + }); + fmt.line("}"); +} + +/// Emit code that unwraps values living in registers or stack slots. +/// +/// :param args: Input or output constraints. +/// :param prefix: Prefix to be used for the generated local variables. +/// :param values: Name of slice containing the values to be unwrapped. +/// :returns: Comma separated list of the generated variables +fn unwrap_values( + args: &[OperandConstraint], + prefix: &str, + values_slice: &str, + fmt: &mut Formatter, +) -> String { + let mut varlist = String::new(); + for (i, cst) in args.iter().enumerate() { + match cst { + OperandConstraint::RegClass(_reg_class) => { + let v = format!("{}_reg{}", prefix, i); + varlist += &format!(", {}", v); + fmtln!( + fmt, + "let {} = divert.reg({}[{}], &func.locations);", + v, + values_slice, + i + ); + } + OperandConstraint::Stack(stack) => { + let v = format!("{}_stk{}", prefix, i); + varlist += &format!(", {}", v); + fmtln!(fmt, "let {} = StackRef::masked(", v); + fmt.indent(|fmt| { + fmtln!( + fmt, + "divert.stack({}[{}], &func.locations),", + values_slice, + i + ); + fmt.line(format!("{},", stack.stack_base_mask())); + fmt.line("&func.stack_slots,"); + }); + fmt.line(").unwrap();"); + } + _ => {} + } + } + varlist +} + +fn gen_isa(isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) { + fmt.doc_comment(format!( + "Emit binary machine code for `inst` for the {} ISA.", + isa_name + )); + + if recipes.is_empty() { + fmt.line("pub fn emit_inst("); + fmt.indent(|fmt| { + fmt.line("func: &Function,"); + fmt.line("inst: Inst,"); + fmt.line("_divert: &mut RegDiversions,"); + fmt.line("_sink: &mut CS,"); + fmt.line("_isa: &dyn TargetIsa,"); + }); + fmt.line(") {"); + fmt.indent(|fmt| { + // No encoding recipes: Emit a stub. + fmt.line("bad_encoding(func, inst)"); + }); + fmt.line("}"); + return; + } + + fmt.line("#[allow(unused_variables, unreachable_code)]"); + fmt.line("pub fn emit_inst("); + fmt.indent(|fmt| { + fmt.line("func: &Function,"); + fmt.line("inst: Inst,"); + fmt.line("divert: &mut RegDiversions,"); + fmt.line("sink: &mut CS,"); + fmt.line("isa: &dyn TargetIsa,") + }); + + fmt.line(") {"); + fmt.indent(|fmt| { + fmt.line("let encoding = func.encodings[inst];"); + fmt.line("let bits = encoding.bits();"); + fmt.line("let inst_data = &func.dfg[inst];"); + fmt.line("match encoding.recipe() {"); + fmt.indent(|fmt| { + for (i, recipe) in recipes.iter() { + fmt.comment(format!("Recipe {}", recipe.name)); + fmtln!(fmt, "{} => {{", i.index()); + fmt.indent(|fmt| { + gen_recipe(recipe, fmt); + }); + fmt.line("}"); + } + fmt.line("_ => {},"); + }); + fmt.line("}"); + + // Allow for unencoded ghost instructions. The verifier will check details. + fmt.line("if encoding.is_legal() {"); + fmt.indent(|fmt| { + fmt.line("bad_encoding(func, inst);"); + }); + fmt.line("}"); + }); + fmt.line("}"); +} + +pub(crate) fn generate( + isa_name: &str, + recipes: &Recipes, + binemit_filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_isa(isa_name, recipes, &mut fmt); + fmt.update_file(binemit_filename, out_dir)?; + Ok(()) +} diff --git a/cranelift/codegen/meta/src/gen_encodings.rs b/cranelift/codegen/meta/src/gen_encodings.rs new file mode 100644 index 0000000000..d79dc66340 --- /dev/null +++ b/cranelift/codegen/meta/src/gen_encodings.rs @@ -0,0 +1,1139 @@ +//! Generate sources for instruction encoding. +//! +//! The tables and functions generated here support the `TargetISA::encode()` function which +//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a +//! *recipe* and some *encoding* bits. +//! +//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a +//! corresponding hand-written function to do that after registers are allocated. +//! +//! This is the information available to us: +//! +//! - The instruction to be encoded as an `InstructionData` reference. +//! - The controlling type variable. +//! - The data-flow graph giving us access to the types of all values involved. This is needed for +//! testing any secondary type variables. +//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates. +//! - The currently active CPU mode is determined by the ISA. +//! +//! ## Level 1 table lookup +//! +//! The CPU mode provides the first table. The key is the instruction's controlling type variable. +//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values +//! are level 2 tables. +//! +//! ## Level 2 table lookup +//! +//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*. +//! +//! The two-level table lookup allows the level 2 tables to be much smaller with good locality. +//! Code in any given function usually only uses a few different types, so many of the level 2 +//! tables will be cold. +//! +//! ## Encoding lists +//! +//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms: +//! +//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied. +//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied. +//! Otherwise, stop with the default legalization code. +//! 3. Stop with legalization code. +//! 4. Predicate + skip count. Test predicate and skip N entries if it is false. +//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false. +//! +//! The instruction predicate is also used to distinguish between polymorphic instructions with +//! different types for secondary type variables. + +use std::collections::btree_map; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::convert::TryFrom; +use std::iter::FromIterator; + +use cranelift_codegen_shared::constant_hash::generate_table; +use cranelift_entity::EntityRef; + +use crate::error; +use crate::srcgen::Formatter; + +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::encodings::Encoding; +use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register}; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingPredicateNumber; +use crate::cdsl::types::ValueType; +use crate::cdsl::xform::TransformGroupIndex; + +use crate::shared::Definitions as SharedDefinitions; + +use crate::default_map::MapWithDefault; +use crate::unique_table::UniqueSeqTable; + +/// Emit code for matching an instruction predicate against an `InstructionData` reference called +/// `inst`. +/// +/// The generated code is an `if let` pattern match that falls through if the instruction has an +/// unexpected format. This should lead to a panic. +fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) { + if let Some(type_predicate) = instp.type_predicate("func") { + fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); + fmt.line(type_predicate); + return; + } + + let leaves = instp.collect_leaves(); + + let mut has_type_check = false; + let mut format_name = None; + let mut field_names = HashSet::new(); + + for leaf in leaves { + if leaf.is_type_predicate() { + has_type_check = true; + } else { + field_names.insert(leaf.format_destructuring_member_name()); + let leaf_format_name = leaf.format_name(); + match format_name { + None => format_name = Some(leaf_format_name), + Some(previous_format_name) => { + assert!( + previous_format_name == leaf_format_name, + format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name + )); + } + } + } + } + + let mut fields = Vec::from_iter(field_names); + fields.sort(); + let fields = fields.join(", "); + + let format_name = format_name.expect("There should be a format name!"); + + fmtln!( + fmt, + "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{", + format_name, + fields + ); + fmt.indent(|fmt| { + if has_type_check { + // We could implement this. + assert!(has_func, "recipe predicates can't check type variables."); + fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); + } else if has_func { + // Silence dead argument. + fmt.line("let _ = func;"); + } + fmtln!(fmt, "return {};", instp.rust_predicate("func").unwrap()); + }); + fmtln!(fmt, "}"); + + fmt.line("unreachable!();"); +} + +/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES` +/// array indexed by recipe number. +/// +/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many +/// recipes have identical predicates. +fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) { + let mut predicate_names = HashMap::new(); + + fmt.comment(format!("{} recipe predicates.", isa.name)); + for recipe in isa.recipes.values() { + let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) { + (None, None) => continue, + (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue, + (isap, instp) => (isap, instp), + }; + + let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase()); + predicate_names.insert((isap, instp), func_name.clone()); + + // Generate the predicate function. + fmtln!( + fmt, + "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{", + func_name, + if isap.is_some() { "isap" } else { "_" }, + if instp.is_some() { "inst" } else { "_" } + ); + fmt.indent(|fmt| { + match (isap, instp) { + (Some(isap), None) => { + fmtln!(fmt, "isap.test({})", isap); + } + (None, Some(instp)) => { + emit_instp(instp, /* has func */ false, fmt); + } + (Some(isap), Some(instp)) => { + fmtln!(fmt, "isap.test({}) &&", isap); + emit_instp(instp, /* has func */ false, fmt); + } + _ => panic!("skipped above"), + } + }); + fmtln!(fmt, "}"); + } + fmt.empty_line(); + + // Generate the static table. + fmt.doc_comment(format!( + r#"{} recipe predicate table. + + One entry per recipe, set to Some only when the recipe is guarded by a predicate."#, + isa.name + )); + fmtln!( + fmt, + "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + match (&recipe.isa_predicate, &recipe.inst_predicate) { + (None, None) => fmt.line("None,"), + key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()), + } + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Emit private functions for matching instruction predicates as well as a static +/// `INST_PREDICATES` array indexed by predicate number. +fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.comment(format!("{} instruction predicates.", isa.name)); + for (id, instp) in isa.encodings_predicates.iter() { + fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index()); + fmt.indent(|fmt| { + emit_instp(instp, /* has func */ true, fmt); + }); + fmtln!(fmt, "}"); + } + fmt.empty_line(); + + // Generate the static table. + fmt.doc_comment(format!( + r#"{} instruction predicate table. + + One entry per instruction predicate, so the encoding bytecode can embed indexes into this + table."#, + isa.name + )); + fmtln!( + fmt, + "pub static INST_PREDICATES: [InstPredicate; {}] = [", + isa.encodings_predicates.len() + ); + fmt.indent(|fmt| { + for id in isa.encodings_predicates.keys() { + fmtln!(fmt, "inst_predicate_{},", id.index()); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Emit a table of encoding recipe names keyed by recipe number. +/// +/// This is used for pretty-printing encodings. +fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.doc_comment(format!( + r#"{} recipe names, using the same recipe index spaces as the one specified by the + corresponding binemit file."#, + isa.name + )); + fmtln!( + fmt, + "static RECIPE_NAMES: [&str; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + fmtln!(fmt, r#""{}","#, recipe.name); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Returns a set of all the registers involved in fixed register constraints. +fn get_fixed_registers(operands_in: &[OperandConstraint]) -> HashSet { + HashSet::from_iter( + operands_in + .iter() + .map(|constraint| { + if let OperandConstraint::FixedReg(reg) = &constraint { + Some(*reg) + } else { + None + } + }) + .filter(|opt| opt.is_some()) + .map(|opt| opt.unwrap()), + ) +} + +/// Emit a struct field initializer for an array of operand constraints. +/// +/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on +/// inputs, fixed_registers must contain the fixed output registers). +fn emit_operand_constraints( + registers: &IsaRegs, + recipe: &EncodingRecipe, + constraints: &[OperandConstraint], + field_name: &'static str, + tied_operands: &HashMap, + fixed_registers: &HashSet, + fmt: &mut Formatter, +) { + if constraints.is_empty() { + fmtln!(fmt, "{}: &[],", field_name); + return; + } + + fmtln!(fmt, "{}: &[", field_name); + fmt.indent(|fmt| { + for (n, constraint) in constraints.iter().enumerate() { + fmt.line("OperandConstraint {"); + fmt.indent(|fmt| { + match constraint { + OperandConstraint::RegClass(reg_class) => { + if let Some(tied_input) = tied_operands.get(&n) { + fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); + } else { + fmt.line("kind: ConstraintKind::Reg,"); + } + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[*reg_class].name + ); + } + OperandConstraint::FixedReg(reg) => { + assert!(!tied_operands.contains_key(&n), "can't tie fixed registers"); + let constraint_kind = if fixed_registers.contains(®) { + "FixedTied" + } else { + "FixedReg" + }; + fmtln!( + fmt, + "kind: ConstraintKind::{}({}),", + constraint_kind, + reg.unit + ); + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[reg.regclass].name + ); + } + OperandConstraint::TiedInput(tied_input) => { + // This is a tied output constraint. It should never happen + // for input constraints. + assert!( + tied_input == tied_operands.get(&n).unwrap(), + "invalid tied constraint" + ); + fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); + + let tied_class = if let OperandConstraint::RegClass(tied_class) = + recipe.operands_in[*tied_input] + { + tied_class + } else { + panic!("tied constraints relate only to register inputs"); + }; + + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[tied_class].name + ); + } + OperandConstraint::Stack(stack) => { + assert!(!tied_operands.contains_key(&n), "can't tie stack operand"); + fmt.line("kind: ConstraintKind::Stack,"); + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[stack.regclass].name + ); + } + } + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "],"); +} + +/// Emit a table of encoding recipe operand constraints keyed by recipe number. +/// +/// These are used by the register allocator to pick registers that can be properly encoded. +fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.doc_comment(format!( + r#"{} recipe constraints list, using the same recipe index spaces as the one + specified by the corresponding binemit file. These constraints are used by register + allocation to select the right location to use for input and output values."#, + isa.name + )); + fmtln!( + fmt, + "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + // Compute a mapping of tied operands in both directions (input tied to outputs and + // conversely). + let mut tied_in_to_out = HashMap::new(); + let mut tied_out_to_in = HashMap::new(); + for (out_index, constraint) in recipe.operands_out.iter().enumerate() { + if let OperandConstraint::TiedInput(in_index) = &constraint { + tied_in_to_out.insert(*in_index, out_index); + tied_out_to_in.insert(out_index, *in_index); + } + } + + // Find the sets of registers involved in fixed register constraints. + let fixed_inputs = get_fixed_registers(&recipe.operands_in); + let fixed_outputs = get_fixed_registers(&recipe.operands_out); + + fmt.comment(format!("Constraints for recipe {}:", recipe.name)); + fmt.line("RecipeConstraints {"); + fmt.indent(|fmt| { + emit_operand_constraints( + &isa.regs, + recipe, + &recipe.operands_in, + "ins", + &tied_in_to_out, + &fixed_outputs, + fmt, + ); + emit_operand_constraints( + &isa.regs, + recipe, + &recipe.operands_out, + "outs", + &tied_out_to_in, + &fixed_inputs, + fmt, + ); + fmtln!( + fmt, + "fixed_ins: {},", + if !fixed_inputs.is_empty() { + "true" + } else { + "false" + } + ); + fmtln!( + fmt, + "fixed_outs: {},", + if !fixed_outputs.is_empty() { + "true" + } else { + "false" + } + ); + fmtln!( + fmt, + "tied_ops: {},", + if !tied_in_to_out.is_empty() { + "true" + } else { + "false" + } + ); + fmtln!( + fmt, + "clobbers_flags: {},", + if recipe.clobbers_flags { + "true" + } else { + "false" + } + ); + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Emit a table of encoding recipe code size information. +fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.doc_comment(format!( + r#"{} recipe sizing descriptors, using the same recipe index spaces as the one + specified by the corresponding binemit file. These are used to compute the final size of an + instruction, as well as to compute the range of branches."#, + isa.name + )); + fmtln!( + fmt, + "static RECIPE_SIZING: [RecipeSizing; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + fmt.comment(format!("Code size information for recipe {}:", recipe.name)); + fmt.line("RecipeSizing {"); + fmt.indent(|fmt| { + fmtln!(fmt, "base_size: {},", recipe.base_size); + fmtln!(fmt, "compute_size: {},", recipe.compute_size); + if let Some(range) = &recipe.branch_range { + fmtln!( + fmt, + "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),", + range.inst_size, + range.range + ); + } else { + fmt.line("branch_range: None,"); + } + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Level 1 table mapping types to `Level2` objects. +struct Level1Table<'cpu_mode> { + cpu_mode: &'cpu_mode CpuMode, + legalize_code: TransformGroupIndex, + + table_map: HashMap, usize>, + table_vec: Vec, +} + +impl<'cpu_mode> Level1Table<'cpu_mode> { + fn new(cpu_mode: &'cpu_mode CpuMode) -> Self { + Self { + cpu_mode, + legalize_code: cpu_mode.get_default_legalize_code(), + table_map: HashMap::new(), + table_vec: Vec::new(), + } + } + + /// Returns the level2 table for the given type; None means monomorphic, in this context. + fn l2table_for(&mut self, typ: Option) -> &mut Level2Table { + let cpu_mode = &self.cpu_mode; + let index = match self.table_map.get(&typ) { + Some(&index) => index, + None => { + let legalize_code = cpu_mode.get_legalize_code_for(&typ); + let table = Level2Table::new(typ.clone(), legalize_code); + let index = self.table_vec.len(); + self.table_map.insert(typ, index); + self.table_vec.push(table); + index + } + }; + self.table_vec.get_mut(index).unwrap() + } + + fn l2tables(&mut self) -> Vec<&mut Level2Table> { + self.table_vec + .iter_mut() + .filter(|table| !table.is_empty()) + .collect::>() + } +} + +struct Level2HashTableEntry { + inst_name: String, + offset: usize, +} + +/// Level 2 table mapping instruction opcodes to `EncList` objects. +/// +/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`. +struct Level2Table { + typ: Option, + legalize_code: TransformGroupIndex, + inst_to_encodings: BTreeMap, + hash_table_offset: Option, + hash_table_len: Option, +} + +impl Level2Table { + fn new(typ: Option, legalize_code: TransformGroupIndex) -> Self { + Self { + typ, + legalize_code, + inst_to_encodings: BTreeMap::new(), + hash_table_offset: None, + hash_table_len: None, + } + } + + fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList { + let copied_typ = self.typ.clone(); + self.inst_to_encodings + .entry(inst.name.clone()) + .or_insert_with(|| EncodingList::new(inst, copied_typ)) + } + + fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> { + self.inst_to_encodings.values_mut() + } + + fn is_empty(&self) -> bool { + self.inst_to_encodings.is_empty() + } + + fn layout_hashtable( + &mut self, + level2_hashtables: &mut Vec>, + level2_doc: &mut HashMap>, + ) { + let hash_table = generate_table( + self.inst_to_encodings.values(), + self.inst_to_encodings.len(), + // TODO the Python code wanted opcode numbers to start from 1. + |enc_list| enc_list.inst.opcode_number.index() + 1, + ); + + let hash_table_offset = level2_hashtables.len(); + let hash_table_len = hash_table.len(); + + assert!(self.hash_table_offset.is_none()); + assert!(self.hash_table_len.is_none()); + self.hash_table_offset = Some(hash_table_offset); + self.hash_table_len = Some(hash_table_len); + + level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| { + opt_enc_list.map(|enc_list| Level2HashTableEntry { + inst_name: enc_list.inst.camel_name.clone(), + offset: enc_list.offset.unwrap(), + }) + })); + + let typ_comment = match &self.typ { + Some(ty) => ty.to_string(), + None => "typeless".into(), + }; + + level2_doc.get_or_default(hash_table_offset).push(format!( + "{:06x}: {}, {} entries", + hash_table_offset, typ_comment, hash_table_len + )); + } +} + +/// The u16 values in an encoding list entry are interpreted as follows: +/// +/// NR = len(all_recipes) +/// +/// entry < 2*NR +/// Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied. +/// If bit 0 is set, stop with the default legalization code. +/// If bit 0 is clear, keep going down the list. +/// entry < PRED_START +/// Stop with legalization code `entry - 2*NR`. +/// +/// Remaining entries are interpreted as (skip, pred) pairs, where: +/// +/// skip = (entry - PRED_START) >> PRED_BITS +/// pred = (entry - PRED_START) & PRED_MASK +/// +/// If the predicate is satisfied, keep going. Otherwise skip over the next +/// `skip` entries. If skip == 0, stop with the default legalization code. +/// +/// The `pred` predicate number is interpreted as an instruction predicate if it +/// is in range, otherwise an ISA predicate. + +/// Encoding lists are represented as u16 arrays. +const CODE_BITS: usize = 16; + +/// Beginning of the predicate code words. +const PRED_START: u16 = 0x1000; + +/// Number of bits used to hold a predicate number (instruction + ISA predicates). +const PRED_BITS: usize = 12; + +/// Mask for extracting the predicate number. +const PRED_MASK: usize = (1 << PRED_BITS) - 1; + +/// Encoder for the list format above. +struct Encoder { + num_instruction_predicates: usize, + + /// u16 encoding list words. + words: Vec, + + /// Documentation comments: Index into `words` + comment. + docs: Vec<(usize, String)>, +} + +impl Encoder { + fn new(num_instruction_predicates: usize) -> Self { + Self { + num_instruction_predicates, + words: Vec::new(), + docs: Vec::new(), + } + } + + /// Add a recipe+bits entry to the list. + fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) { + let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16; + assert!(code < PRED_START); + + let doc = format!( + "--> {}{}", + enc.to_rust_comment(recipes), + if is_final { " and stop" } else { "" } + ); + self.docs.push((self.words.len(), doc)); + + self.words.push(code); + self.words.push(enc.encbits); + } + + /// Add a predicate entry. + fn pred(&mut self, pred_comment: String, skip: usize, n: usize) { + assert!(n <= PRED_MASK); + let entry = (PRED_START as usize) + (n | (skip << PRED_BITS)); + assert!(entry < (1 << CODE_BITS)); + let entry = entry as u16; + + let doc = if skip == 0 { + "stop".to_string() + } else { + format!("skip {}", skip) + }; + let doc = format!("{} unless {}", doc, pred_comment); + + self.docs.push((self.words.len(), doc)); + self.words.push(entry); + } + + /// Add an instruction predicate entry. + fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) { + let number = pred.index(); + let pred_comment = format!("inst_predicate_{}", number); + self.pred(pred_comment, skip, number); + } + + /// Add an ISA predicate entry. + fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) { + // ISA predicates follow the instruction predicates. + let n = self.num_instruction_predicates + (pred as usize); + let pred_comment = format!("PredicateView({})", pred); + self.pred(pred_comment, skip, n); + } +} + +/// List of instructions for encoding a given type + opcode pair. +/// +/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16 +/// values. +struct EncodingList { + inst: Instruction, + typ: Option, + encodings: Vec, + offset: Option, +} + +impl EncodingList { + fn new(inst: &Instruction, typ: Option) -> Self { + Self { + inst: inst.clone(), + typ, + encodings: Default::default(), + offset: None, + } + } + + /// Encode this list as a sequence of u16 numbers. + /// + /// Adds the sequence to `enc_lists` and records the returned offset as + /// `self.offset`. + /// + /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets. + fn encode( + &mut self, + isa: &TargetIsa, + cpu_mode: &CpuMode, + enc_lists: &mut UniqueSeqTable, + enc_lists_doc: &mut HashMap>, + ) { + assert!(!self.encodings.is_empty()); + + let mut encoder = Encoder::new(isa.encodings_predicates.len()); + + let mut index = 0; + while index < self.encodings.len() { + let encoding = &self.encodings[index]; + + // Try to see how many encodings are following and have the same ISA predicate and + // instruction predicate, so as to reduce the number of tests carried out by the + // encoding list interpreter.. + // + // Encodings with similar tests are hereby called a group. The group includes the + // current encoding we're looking at. + let (isa_predicate, inst_predicate) = + (&encoding.isa_predicate, &encoding.inst_predicate); + + let group_size = { + let mut group_size = 1; + while index + group_size < self.encodings.len() { + let next_encoding = &self.encodings[index + group_size]; + if &next_encoding.inst_predicate != inst_predicate + || &next_encoding.isa_predicate != isa_predicate + { + break; + } + group_size += 1; + } + group_size + }; + + let is_last_group = index + group_size == self.encodings.len(); + + // The number of entries to skip when a predicate isn't satisfied is the size of both + // predicates + the size of the group, minus one (for this predicate). Each recipe + // entry has a size of two u16 (recipe index + bits). + let mut skip = if is_last_group { + 0 + } else { + let isap_size = match isa_predicate { + Some(_) => 1, + None => 0, + }; + let instp_size = match inst_predicate { + Some(_) => 1, + None => 0, + }; + isap_size + instp_size + group_size * 2 - 1 + }; + + if let Some(pred) = isa_predicate { + encoder.isa_predicate(*pred, skip); + if !is_last_group { + skip -= 1; + } + } + + if let Some(pred) = inst_predicate { + encoder.inst_predicate(*pred, skip); + // No need to update skip, it's dead after this point. + } + + for i in 0..group_size { + let encoding = &self.encodings[index + i]; + let is_last_encoding = index + i == self.encodings.len() - 1; + encoder.recipe(&isa.recipes, encoding, is_last_encoding); + } + + index += group_size; + } + + assert!(self.offset.is_none()); + let offset = enc_lists.add(&encoder.words); + self.offset = Some(offset); + + // Doc comments. + let recipe_typ_mode_name = format!( + "{}{} ({})", + self.inst.name, + if let Some(typ) = &self.typ { + format!(".{}", typ.to_string()) + } else { + "".into() + }, + cpu_mode.name + ); + + enc_lists_doc + .get_or_default(offset) + .push(format!("{:06x}: {}", offset, recipe_typ_mode_name)); + for (pos, doc) in encoder.docs { + enc_lists_doc.get_or_default(offset + pos).push(doc); + } + enc_lists_doc + .get_or_default(offset + encoder.words.len()) + .insert(0, format!("end of {}", recipe_typ_mode_name)); + } +} + +fn make_tables(cpu_mode: &CpuMode) -> Level1Table { + let mut table = Level1Table::new(cpu_mode); + + for encoding in &cpu_mode.encodings { + table + .l2table_for(encoding.bound_type.clone()) + .enclist_for(encoding.inst()) + .encodings + .push(encoding.clone()); + } + + // Ensure there are level 1 table entries for all types with a custom legalize action. + for value_type in cpu_mode.get_legalized_types() { + table.l2table_for(Some(value_type.clone())); + } + // ... and also for monomorphic instructions. + table.l2table_for(None); + + table +} + +/// Compute encodings and doc comments for encoding lists in `level1`. +fn encode_enclists( + isa: &TargetIsa, + cpu_mode: &CpuMode, + level1: &mut Level1Table, + enc_lists: &mut UniqueSeqTable, + enc_lists_doc: &mut HashMap>, +) { + for level2 in level1.l2tables() { + for enclist in level2.enclists() { + enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc); + } + } +} + +fn encode_level2_hashtables<'a>( + level1: &'a mut Level1Table, + level2_hashtables: &mut Vec>, + level2_doc: &mut HashMap>, +) { + for level2 in level1.l2tables() { + level2.layout_hashtable(level2_hashtables, level2_doc); + } +} + +fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { + // Level 1 tables, one per CPU mode. + let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new(); + + // Single table containing all the level2 hash tables. + let mut level2_hashtables = Vec::new(); + let mut level2_doc: HashMap> = HashMap::new(); + + // Tables for encoding lists with comments. + let mut enc_lists = UniqueSeqTable::new(); + let mut enc_lists_doc = HashMap::new(); + + for cpu_mode in &isa.cpu_modes { + level2_doc + .get_or_default(level2_hashtables.len()) + .push(cpu_mode.name.into()); + + let mut level1 = make_tables(cpu_mode); + + encode_enclists( + isa, + cpu_mode, + &mut level1, + &mut enc_lists, + &mut enc_lists_doc, + ); + encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc); + + level1_tables.insert(cpu_mode.name, level1); + } + + // Compute an appropriate Rust integer type to use for offsets into a table of the given length. + let offset_type = |length: usize| { + if length <= 0x10000 { + "u16" + } else { + assert!(u32::try_from(length).is_ok(), "table too big!"); + "u32" + } + }; + + let level1_offset_type = offset_type(level2_hashtables.len()); + let level2_offset_type = offset_type(enc_lists.len()); + + // Emit encoding lists. + fmt.doc_comment( + format!(r#"{} encoding lists. + + This contains the entire encodings bytecode for every single instruction; the encodings + interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2 + table entries below."#, isa.name) + ); + fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len()); + fmt.indent(|fmt| { + let mut line = Vec::new(); + for (index, entry) in enc_lists.iter().enumerate() { + if let Some(comments) = enc_lists_doc.get(&index) { + if !line.is_empty() { + fmtln!(fmt, "{},", line.join(", ")); + line.clear(); + } + for comment in comments { + fmt.comment(comment); + } + } + line.push(format!("{:#06x}", entry)); + } + if !line.is_empty() { + fmtln!(fmt, "{},", line.join(", ")); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + // Emit the full concatenation of level 2 hash tables. + fmt.doc_comment(format!( + r#"{} level 2 hash tables. + + This hash table, keyed by instruction opcode, contains all the starting offsets for the + encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the + instruction's controlling type in the level 1 hash table."#, + isa.name + )); + fmtln!( + fmt, + "pub static LEVEL2: [Level2Entry<{}>; {}] = [", + level2_offset_type, + level2_hashtables.len() + ); + fmt.indent(|fmt| { + for (offset, entry) in level2_hashtables.iter().enumerate() { + if let Some(comments) = level2_doc.get(&offset) { + for comment in comments { + fmt.comment(comment); + } + } + if let Some(entry) = entry { + fmtln!( + fmt, + "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},", + entry.inst_name, + entry.offset + ); + } else { + fmt.line("Level2Entry { opcode: None, offset: 0 },"); + } + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + // Emit a level 1 hash table for each CPU mode. + for cpu_mode in &isa.cpu_modes { + let level1 = &level1_tables.get(cpu_mode.name).unwrap(); + let hash_table = generate_table( + level1.table_vec.iter(), + level1.table_vec.len(), + |level2_table| { + if let Some(typ) = &level2_table.typ { + typ.number().expect("type without a number") as usize + } else { + 0 + } + }, + ); + + fmt.doc_comment(format!( + r#"{} level 1 hash table for the CPU mode {}. + + This hash table, keyed by instruction controlling type, contains all the level 2 + hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating + which legalization scheme to apply when the instruction doesn't have any valid encoding for + this CPU mode. + "#, + isa.name, cpu_mode.name + )); + fmtln!( + fmt, + "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [", + cpu_mode.name.to_uppercase(), + level1_offset_type, + hash_table.len() + ); + fmt.indent(|fmt| { + for opt_level2 in hash_table { + let level2 = match opt_level2 { + None => { + // Empty hash table entry. Include the default legalization action. + fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},", + isa.translate_group_index(level1.legalize_code)); + continue; + } + Some(level2) => level2, + }; + + let legalize_comment = defs.transform_groups.get(level2.legalize_code).name; + let legalize_code = isa.translate_group_index(level2.legalize_code); + + let typ_name = if let Some(typ) = &level2.typ { + typ.rust_name() + } else { + "ir::types::INVALID".into() + }; + + if level2.is_empty() { + // Empty level 2 table: Only a specialized legalization action, no actual + // table. + // Set an offset that is out of bounds, but make sure it doesn't overflow its + // type when adding `1< 0, "Level2 hash table was too small."); + fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}", + typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + } +} + +fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { + // Make the `RECIPE_PREDICATES` table. + emit_recipe_predicates(isa, fmt); + + // Make the `INST_PREDICATES` table. + emit_inst_predicates(isa, fmt); + + emit_encoding_tables(defs, isa, fmt); + + emit_recipe_names(isa, fmt); + emit_recipe_constraints(isa, fmt); + emit_recipe_sizing(isa, fmt); + + // Finally, tie it all together in an `EncInfo`. + fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {"); + fmt.indent(|fmt| { + fmt.line("constraints: &RECIPE_CONSTRAINTS,"); + fmt.line("sizing: &RECIPE_SIZING,"); + fmt.line("names: &RECIPE_NAMES,"); + }); + fmt.line("};"); +} + +pub(crate) fn generate( + defs: &SharedDefinitions, + isa: &TargetIsa, + filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_isa(defs, isa, &mut fmt); + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs new file mode 100644 index 0000000000..af54257fea --- /dev/null +++ b/cranelift/codegen/meta/src/gen_inst.rs @@ -0,0 +1,1137 @@ +//! Generate instruction data (including opcodes, formats, builders, etc.). +use std::fmt; + +use cranelift_codegen_shared::constant_hash; +use cranelift_entity::EntityRef; + +use crate::cdsl::camel_case; +use crate::cdsl::formats::InstructionFormat; +use crate::cdsl::instructions::{AllInstructions, Instruction}; +use crate::cdsl::operands::Operand; +use crate::cdsl::typevar::{TypeSet, TypeVar}; + +use crate::error; +use crate::srcgen::{Formatter, Match}; +use crate::unique_table::{UniqueSeqTable, UniqueTable}; + +// TypeSet indexes are encoded in 8 bits, with `0xff` reserved. +const TYPESET_LIMIT: usize = 0xff; + +/// Generate an instruction format enumeration. +fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.doc_comment( + r#" + An instruction format + + Every opcode has a corresponding instruction format + which is represented by both the `InstructionFormat` + and the `InstructionData` enums. + "#, + ); + fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug)]"); + fmt.line("pub enum InstructionFormat {"); + fmt.indent(|fmt| { + for format in formats { + fmt.doc_comment(format.to_string()); + fmtln!(fmt, "{},", format.name); + } + }); + fmt.line("}"); + fmt.empty_line(); + + // Emit a From which also serves to verify that + // InstructionFormat and InstructionData are in sync. + fmt.line("impl<'a> From<&'a InstructionData> for InstructionFormat {"); + fmt.indent(|fmt| { + fmt.line("fn from(inst: &'a InstructionData) -> Self {"); + fmt.indent(|fmt| { + let mut m = Match::new("*inst"); + for format in formats { + m.arm( + format!("InstructionData::{}", format.name), + vec![".."], + format!("Self::{}", format.name), + ); + } + fmt.add_match(m); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); +} + +/// Generate the InstructionData enum. +/// +/// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at +/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a +/// `ValueList` to store the additional information out of line. +fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("#[derive(Clone, Debug)]"); + fmt.line("#[allow(missing_docs)]"); + fmt.line("pub enum InstructionData {"); + fmt.indent(|fmt| { + for format in formats { + fmtln!(fmt, "{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode: Opcode,"); + if format.typevar_operand.is_some() { + if format.has_value_list { + fmt.line("args: ValueList,"); + } else if format.num_value_operands == 1 { + fmt.line("arg: Value,"); + } else { + fmtln!(fmt, "args: [Value; {}],", format.num_value_operands); + } + } + for field in &format.imm_fields { + fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type); + } + }); + fmtln!(fmt, "},"); + } + }); + fmt.line("}"); +} + +fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) { + let (method, mut_, rslice, as_slice) = if is_mut { + ( + "arguments_mut", + "mut ", + "core::slice::from_mut", + "as_mut_slice", + ) + } else { + ("arguments", "", "core::slice::from_ref", "as_slice") + }; + + fmtln!( + fmt, + "pub fn {}<'a>(&'a {}self, pool: &'a {}ir::ValueListPool) -> &{}[Value] {{", + method, + mut_, + mut_, + mut_ + ); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + let name = format!("Self::{}", format.name); + + // Formats with a value list put all of their arguments in the list. We don't split + // them up, just return it all as variable arguments. (I expect the distinction to go + // away). + if format.has_value_list { + m.arm( + name, + vec![format!("ref {}args", mut_), "..".to_string()], + format!("args.{}(pool)", as_slice), + ); + continue; + } + + // Fixed args. + let mut fields = Vec::new(); + let arg = if format.num_value_operands == 0 { + format!("&{}[]", mut_) + } else if format.num_value_operands == 1 { + fields.push(format!("ref {}arg", mut_)); + format!("{}(arg)", rslice) + } else { + let arg = format!("args_arity{}", format.num_value_operands); + fields.push(format!("args: ref {}{}", mut_, arg)); + arg + }; + fields.push("..".into()); + + m.arm(name, fields, arg); + } + fmt.add_match(m); + }); + fmtln!(fmt, "}"); +} + +/// Generate the boring parts of the InstructionData implementation. +/// +/// These methods in `impl InstructionData` can be generated automatically from the instruction +/// formats: +/// +/// - `pub fn opcode(&self) -> Opcode` +/// - `pub fn arguments(&self, &pool) -> &[Value]` +/// - `pub fn arguments_mut(&mut self, &pool) -> &mut [Value]` +/// - `pub fn take_value_list(&mut self) -> Option` +/// - `pub fn put_value_list(&mut self, args: ir::ValueList>` +/// - `pub fn eq(&self, &other: Self, &pool) -> bool` +/// - `pub fn hash(&self, state: &mut H, &pool)` +fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("impl InstructionData {"); + fmt.indent(|fmt| { + fmt.doc_comment("Get the opcode of this instruction."); + fmt.line("pub fn opcode(&self) -> Opcode {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + m.arm(format!("Self::{}", format.name), vec!["opcode", ".."], + "opcode".to_string()); + } + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment("Get the controlling type variable operand."); + fmt.line("pub fn typevar_operand(&self, pool: &ir::ValueListPool) -> Option {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + let name = format!("Self::{}", format.name); + if format.typevar_operand.is_none() { + m.arm(name, vec![".."], "None".to_string()); + } else if format.has_value_list { + // We keep all arguments in a value list. + m.arm(name, vec!["ref args", ".."], format!("args.get({}, pool)", format.typevar_operand.unwrap())); + } else if format.num_value_operands == 1 { + m.arm(name, vec!["arg", ".."], "Some(arg)".to_string()); + } else { + // We have multiple value operands and an array `args`. + // Which `args` index to use? + let args = format!("args_arity{}", format.num_value_operands); + m.arm(name, vec![format!("args: ref {}", args), "..".to_string()], + format!("Some({}[{}])", args, format.typevar_operand.unwrap())); + } + } + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment("Get the value arguments to this instruction."); + gen_arguments_method(formats, fmt, false); + fmt.empty_line(); + + fmt.doc_comment(r#"Get mutable references to the value arguments to this + instruction."#); + gen_arguments_method(formats, fmt, true); + fmt.empty_line(); + + fmt.doc_comment(r#" + Take out the value list with all the value arguments and return + it. + + This leaves the value list in the instruction empty. Use + `put_value_list` to put the value list back. + "#); + fmt.line("pub fn take_value_list(&mut self) -> Option {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + + for format in formats { + if format.has_value_list { + m.arm(format!("Self::{}", format.name), + vec!["ref mut args", ".."], + "Some(args.take())".to_string()); + } + } + + m.arm_no_fields("_", "None"); + + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment(r#" + Put back a value list. + + After removing a value list with `take_value_list()`, use this + method to put it back. It is required that this instruction has + a format that accepts a value list, and that the existing value + list is empty. This avoids leaking list pool memory. + "#); + fmt.line("pub fn put_value_list(&mut self, vlist: ir::ValueList) {"); + fmt.indent(|fmt| { + fmt.line("let args = match *self {"); + fmt.indent(|fmt| { + for format in formats { + if format.has_value_list { + fmtln!(fmt, "Self::{} {{ ref mut args, .. }} => args,", format.name); + } + } + fmt.line("_ => panic!(\"No value list: {:?}\", self),"); + }); + fmt.line("};"); + fmt.line("debug_assert!(args.is_empty(), \"Value list already in use\");"); + fmt.line("*args = vlist;"); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment(r#" + Compare two `InstructionData` for equality. + + This operation requires a reference to a `ValueListPool` to + determine if the contents of any `ValueLists` are equal. + "#); + fmt.line("pub fn eq(&self, other: &Self, pool: &ir::ValueListPool) -> bool {"); + fmt.indent(|fmt| { + fmt.line("if ::core::mem::discriminant(self) != ::core::mem::discriminant(other) {"); + fmt.indent(|fmt| { + fmt.line("return false;"); + }); + fmt.line("}"); + + fmt.line("match (self, other) {"); + fmt.indent(|fmt| { + for format in formats { + let name = format!("&Self::{}", format.name); + let mut members = vec!["opcode"]; + + let args_eq = if format.typevar_operand.is_none() { + None + } else if format.has_value_list { + members.push("args"); + Some("args1.as_slice(pool) == args2.as_slice(pool)") + } else if format.num_value_operands == 1 { + members.push("arg"); + Some("arg1 == arg2") + } else { + members.push("args"); + Some("args1 == args2") + }; + + for field in &format.imm_fields { + members.push(field.member); + } + + let pat1 = members.iter().map(|x| format!("{}: ref {}1", x, x)).collect::>().join(", "); + let pat2 = members.iter().map(|x| format!("{}: ref {}2", x, x)).collect::>().join(", "); + fmtln!(fmt, "({} {{ {} }}, {} {{ {} }}) => {{", name, pat1, name, pat2); + fmt.indent(|fmt| { + fmt.line("opcode1 == opcode2"); + for field in &format.imm_fields { + fmtln!(fmt, "&& {}1 == {}2", field.member, field.member); + } + if let Some(args_eq) = args_eq { + fmtln!(fmt, "&& {}", args_eq); + } + }); + fmtln!(fmt, "}"); + } + fmt.line("_ => unreachable!()"); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment(r#" + Hash an `InstructionData`. + + This operation requires a reference to a `ValueListPool` to + hash the contents of any `ValueLists`. + "#); + fmt.line("pub fn hash(&self, state: &mut H, pool: &ir::ValueListPool) {"); + fmt.indent(|fmt| { + fmt.line("match *self {"); + fmt.indent(|fmt| { + for format in formats { + let name = format!("Self::{}", format.name); + let mut members = vec!["opcode"]; + + let args = if format.typevar_operand.is_none() { + "&()" + } else if format.has_value_list { + members.push("ref args"); + "args.as_slice(pool)" + } else if format.num_value_operands == 1 { + members.push("ref arg"); + "arg" + } else { + members.push("ref args"); + "args" + }; + + for field in &format.imm_fields { + members.push(field.member); + } + let members = members.join(", "); + + fmtln!(fmt, "{}{{{}}} => {{", name, members ); // beware the moustaches + fmt.indent(|fmt| { + fmt.line("::core::hash::Hash::hash( &::core::mem::discriminant(self), state);"); + fmt.line("::core::hash::Hash::hash(&opcode, state);"); + for field in &format.imm_fields { + fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", field.member); + } + fmtln!(fmt, "::core::hash::Hash::hash({}, state);", args); + }); + fmtln!(fmt, "}"); + } + }); + fmt.line("}"); + }); + fmt.line("}"); + }); + fmt.line("}"); +} + +fn gen_bool_accessor bool>( + all_inst: &AllInstructions, + get_attr: T, + name: &'static str, + doc: &'static str, + fmt: &mut Formatter, +) { + fmt.doc_comment(doc); + fmtln!(fmt, "pub fn {}(self) -> bool {{", name); + fmt.indent(|fmt| { + let mut m = Match::new("self"); + for inst in all_inst.values() { + if get_attr(inst) { + m.arm_no_fields(format!("Self::{}", inst.camel_name), "true"); + } + } + m.arm_no_fields("_", "false"); + fmt.add_match(m); + }); + fmtln!(fmt, "}"); + fmt.empty_line(); +} + +fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) { + fmt.doc_comment( + r#" + An instruction opcode. + + All instructions from all supported ISAs are present. + "#, + ); + fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]"); + + // We explicitly set the discriminant of the first variant to 1, which allows us to take + // advantage of the NonZero optimization, meaning that wrapping enums can use the 0 + // discriminant instead of increasing the size of the whole type, and so the size of + // Option is the same as Opcode's. + fmt.line("pub enum Opcode {"); + fmt.indent(|fmt| { + let mut is_first_opcode = true; + for inst in all_inst.values() { + fmt.doc_comment(format!("`{}`. ({})", inst, inst.format.name)); + + // Document polymorphism. + if let Some(poly) = &inst.polymorphic_info { + if poly.use_typevar_operand { + let op_num = inst.value_opnums[inst.format.typevar_operand.unwrap()]; + fmt.doc_comment(format!( + "Type inferred from `{}`.", + inst.operands_in[op_num].name + )); + } + } + + // Enum variant itself. + if is_first_opcode { + assert!(inst.opcode_number.index() == 0); + // TODO the python crate requires opcode numbers to start from one. + fmtln!(fmt, "{} = 1,", inst.camel_name); + is_first_opcode = false; + } else { + fmtln!(fmt, "{},", inst.camel_name) + } + } + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.line("impl Opcode {"); + fmt.indent(|fmt| { + gen_bool_accessor( + all_inst, + |inst| inst.is_terminator, + "is_terminator", + "True for instructions that terminate the block", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_branch, + "is_branch", + "True for all branch or jump instructions.", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_indirect_branch, + "is_indirect_branch", + "True for all indirect branch or jump instructions.", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_call, + "is_call", + "Is this a call instruction?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_return, + "is_return", + "Is this a return instruction?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_ghost, + "is_ghost", + "Is this a ghost instruction?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.can_load, + "can_load", + "Can this instruction read from memory?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.can_store, + "can_store", + "Can this instruction write to memory?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.can_trap, + "can_trap", + "Can this instruction cause a trap?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.other_side_effects, + "other_side_effects", + "Does this instruction have other side effects besides can_* flags?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.writes_cpu_flags, + "writes_cpu_flags", + "Does this instruction write to CPU flags?", + fmt, + ); + }); + fmt.line("}"); + fmt.empty_line(); + + // Generate a private opcode_format table. + fmtln!( + fmt, + "const OPCODE_FORMAT: [InstructionFormat; {}] = [", + all_inst.len() + ); + fmt.indent(|fmt| { + for inst in all_inst.values() { + fmtln!( + fmt, + "InstructionFormat::{}, // {}", + inst.format.name, + inst.name + ); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + // Generate a private opcode_name function. + fmt.line("fn opcode_name(opc: Opcode) -> &\'static str {"); + fmt.indent(|fmt| { + let mut m = Match::new("opc"); + for inst in all_inst.values() { + m.arm_no_fields( + format!("Opcode::{}", inst.camel_name), + format!("\"{}\"", inst.name), + ); + } + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + // Generate an opcode hash table for looking up opcodes by name. + let hash_table = constant_hash::generate_table(all_inst.values(), all_inst.len(), |inst| { + constant_hash::simple_hash(&inst.name) + }); + fmtln!( + fmt, + "const OPCODE_HASH_TABLE: [Option; {}] = [", + hash_table.len() + ); + fmt.indent(|fmt| { + for i in hash_table { + match i { + Some(i) => fmtln!(fmt, "Some(Opcode::{}),", i.camel_name), + None => fmtln!(fmt, "None,"), + } + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Get the value type constraint for an SSA value operand, where +/// `ctrl_typevar` is the controlling type variable. +/// +/// Each operand constraint is represented as a string, one of: +/// - `Concrete(vt)`, where `vt` is a value type name. +/// - `Free(idx)` where `idx` is an index into `type_sets`. +/// - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints. +fn get_constraint<'entries, 'table>( + operand: &'entries Operand, + ctrl_typevar: Option<&TypeVar>, + type_sets: &'table mut UniqueTable<'entries, TypeSet>, +) -> String { + assert!(operand.is_value()); + let type_var = operand.type_var().unwrap(); + + if let Some(typ) = type_var.singleton_type() { + return format!("Concrete({})", typ.rust_name()); + } + + if let Some(free_typevar) = type_var.free_typevar() { + if ctrl_typevar.is_some() && free_typevar != *ctrl_typevar.unwrap() { + assert!(type_var.base.is_none()); + return format!("Free({})", type_sets.add(&type_var.get_raw_typeset())); + } + } + + if let Some(base) = &type_var.base { + assert!(base.type_var == *ctrl_typevar.unwrap()); + return camel_case(base.derived_func.name()); + } + + assert!(type_var == ctrl_typevar.unwrap()); + "Same".into() +} + +fn gen_bitset<'a, T: IntoIterator>( + iterable: T, + name: &'static str, + field_size: u8, + fmt: &mut Formatter, +) { + let bits = iterable.into_iter().fold(0, |acc, x| { + assert!(x.is_power_of_two()); + assert!(u32::from(*x) < (1 << u32::from(field_size))); + acc | x + }); + fmtln!(fmt, "{}: BitSet::({}),", name, field_size, bits); +} + +fn iterable_to_string>(iterable: T) -> String { + let elems = iterable + .into_iter() + .map(|x| x.to_string()) + .collect::>() + .join(", "); + format!("{{{}}}", elems) +} + +fn typeset_to_string(ts: &TypeSet) -> String { + let mut result = format!("TypeSet(lanes={}", iterable_to_string(&ts.lanes)); + if !ts.ints.is_empty() { + result += &format!(", ints={}", iterable_to_string(&ts.ints)); + } + if !ts.floats.is_empty() { + result += &format!(", floats={}", iterable_to_string(&ts.floats)); + } + if !ts.bools.is_empty() { + result += &format!(", bools={}", iterable_to_string(&ts.bools)); + } + if !ts.specials.is_empty() { + result += &format!(", specials=[{}]", iterable_to_string(&ts.specials)); + } + if !ts.refs.is_empty() { + result += &format!(", refs={}", iterable_to_string(&ts.refs)); + } + result += ")"; + result +} + +/// Generate the table of ValueTypeSets described by type_sets. +pub(crate) fn gen_typesets_table(type_sets: &UniqueTable, fmt: &mut Formatter) { + if type_sets.len() == 0 { + return; + } + + fmt.comment("Table of value type sets."); + assert!(type_sets.len() <= TYPESET_LIMIT, "Too many type sets!"); + fmtln!( + fmt, + "const TYPE_SETS: [ir::instructions::ValueTypeSet; {}] = [", + type_sets.len() + ); + fmt.indent(|fmt| { + for ts in type_sets.iter() { + fmt.line("ir::instructions::ValueTypeSet {"); + fmt.indent(|fmt| { + fmt.comment(typeset_to_string(ts)); + gen_bitset(&ts.lanes, "lanes", 16, fmt); + gen_bitset(&ts.ints, "ints", 8, fmt); + gen_bitset(&ts.floats, "floats", 8, fmt); + gen_bitset(&ts.bools, "bools", 8, fmt); + gen_bitset(&ts.refs, "refs", 8, fmt); + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); +} + +/// Generate value type constraints for all instructions. +/// - Emit a compact constant table of ValueTypeSet objects. +/// - Emit a compact constant table of OperandConstraint objects. +/// - Emit an opcode-indexed table of instruction constraints. +fn gen_type_constraints(all_inst: &AllInstructions, fmt: &mut Formatter) { + // Table of TypeSet instances. + let mut type_sets = UniqueTable::new(); + + // Table of operand constraint sequences (as tuples). Each operand + // constraint is represented as a string, one of: + // - `Concrete(vt)`, where `vt` is a value type name. + // - `Free(idx)` where `idx` is an index into `type_sets`. + // - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints. + let mut operand_seqs = UniqueSeqTable::new(); + + // Preload table with constraints for typical binops. + #[allow(clippy::useless_vec)] + operand_seqs.add(&vec!["Same".to_string(); 3]); + + fmt.comment("Table of opcode constraints."); + fmtln!( + fmt, + "const OPCODE_CONSTRAINTS: [OpcodeConstraints; {}] = [", + all_inst.len() + ); + fmt.indent(|fmt| { + for inst in all_inst.values() { + let (ctrl_typevar, ctrl_typeset) = if let Some(poly) = &inst.polymorphic_info { + let index = type_sets.add(&*poly.ctrl_typevar.get_raw_typeset()); + (Some(&poly.ctrl_typevar), index) + } else { + (None, TYPESET_LIMIT) + }; + + // Collect constraints for the value results, not including `variable_args` results + // which are always special cased. + let mut constraints = Vec::new(); + for &index in &inst.value_results { + constraints.push(get_constraint(&inst.operands_out[index], ctrl_typevar, &mut type_sets)); + } + for &index in &inst.value_opnums { + constraints.push(get_constraint(&inst.operands_in[index], ctrl_typevar, &mut type_sets)); + } + + let constraint_offset = operand_seqs.add(&constraints); + + let fixed_results = inst.value_results.len(); + let fixed_values = inst.value_opnums.len(); + + // Can the controlling type variable be inferred from the designated operand? + let use_typevar_operand = if let Some(poly) = &inst.polymorphic_info { + poly.use_typevar_operand + } else { + false + }; + + // Can the controlling type variable be inferred from the result? + let use_result = fixed_results > 0 && inst.operands_out[inst.value_results[0]].type_var() == ctrl_typevar; + + // Are we required to use the designated operand instead of the result? + let requires_typevar_operand = use_typevar_operand && !use_result; + + fmt.comment( + format!("{}: fixed_results={}, use_typevar_operand={}, requires_typevar_operand={}, fixed_values={}", + inst.camel_name, + fixed_results, + use_typevar_operand, + requires_typevar_operand, + fixed_values) + ); + fmt.comment(format!("Constraints=[{}]", constraints + .iter() + .map(|x| format!("'{}'", x)) + .collect::>() + .join(", "))); + if let Some(poly) = &inst.polymorphic_info { + fmt.comment(format!("Polymorphic over {}", typeset_to_string(&poly.ctrl_typevar.get_raw_typeset()))); + } + + // Compute the bit field encoding, c.f. instructions.rs. + assert!(fixed_results < 8 && fixed_values < 8, "Bit field encoding too tight"); + let mut flags = fixed_results; // 3 bits + if use_typevar_operand { + flags |= 1<<3; // 4th bit + } + if requires_typevar_operand { + flags |= 1<<4; // 5th bit + } + flags |= fixed_values << 5; // 6th bit and more + + fmt.line("OpcodeConstraints {"); + fmt.indent(|fmt| { + fmtln!(fmt, "flags: {:#04x},", flags); + fmtln!(fmt, "typeset_offset: {},", ctrl_typeset); + fmtln!(fmt, "constraint_offset: {},", constraint_offset); + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + gen_typesets_table(&type_sets, fmt); + fmt.empty_line(); + + fmt.comment("Table of operand constraint sequences."); + fmtln!( + fmt, + "const OPERAND_CONSTRAINTS: [OperandConstraint; {}] = [", + operand_seqs.len() + ); + fmt.indent(|fmt| { + for constraint in operand_seqs.iter() { + fmtln!(fmt, "OperandConstraint::{},", constraint); + } + }); + fmtln!(fmt, "];"); +} + +/// Emit member initializers for an instruction format. +fn gen_member_inits(format: &InstructionFormat, fmt: &mut Formatter) { + // Immediate operands. + // We have local variables with the same names as the members. + for f in &format.imm_fields { + fmtln!(fmt, "{},", f.member); + } + + // Value operands. + if format.has_value_list { + fmt.line("args,"); + } else if format.num_value_operands == 1 { + fmt.line("arg: arg0,"); + } else if format.num_value_operands > 1 { + let mut args = Vec::new(); + for i in 0..format.num_value_operands { + args.push(format!("arg{}", i)); + } + fmtln!(fmt, "args: [{}],", args.join(", ")); + } +} + +/// Emit a method for creating and inserting an instruction format. +/// +/// All instruction formats take an `opcode` argument and a `ctrl_typevar` argument for deducing +/// the result types. +fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) { + // Construct method arguments. + let mut args = vec![ + "self".to_string(), + "opcode: Opcode".into(), + "ctrl_typevar: Type".into(), + ]; + + // Normal operand arguments. Start with the immediate operands. + for f in &format.imm_fields { + args.push(format!("{}: {}", f.member, f.kind.rust_type)); + } + + // Then the value operands. + if format.has_value_list { + // Take all value arguments as a finished value list. The value lists + // are created by the individual instruction constructors. + args.push("args: ir::ValueList".into()); + } else { + // Take a fixed number of value operands. + for i in 0..format.num_value_operands { + args.push(format!("arg{}: Value", i)); + } + } + + let proto = format!( + "{}({}) -> (Inst, &'f mut ir::DataFlowGraph)", + format.name, + args.join(", ") + ); + + fmt.doc_comment(format.to_string()); + fmt.line("#[allow(non_snake_case)]"); + fmtln!(fmt, "fn {} {{", proto); + fmt.indent(|fmt| { + // Generate the instruction data. + fmtln!(fmt, "let data = ir::InstructionData::{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode,"); + gen_member_inits(format, fmt); + }); + fmtln!(fmt, "};"); + fmt.line("self.build(data, ctrl_typevar)"); + }); + fmtln!(fmt, "}"); +} + +/// Emit a method for generating the instruction `inst`. +/// +/// The method will create and insert an instruction, then return the result values, or the +/// instruction reference itself for instructions that don't have results. +fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Formatter) { + // Construct method arguments. + let mut args = vec![if format.has_value_list { + "mut self" + } else { + "self" + } + .to_string()]; + + let mut args_doc = Vec::new(); + let mut rets_doc = Vec::new(); + + // The controlling type variable will be inferred from the input values if + // possible. Otherwise, it is the first method argument. + if let Some(poly) = &inst.polymorphic_info { + if !poly.use_typevar_operand { + args.push(format!("{}: crate::ir::Type", poly.ctrl_typevar.name)); + args_doc.push(format!( + "- {} (controlling type variable): {}", + poly.ctrl_typevar.name, poly.ctrl_typevar.doc + )); + } + } + + let mut tmpl_types = Vec::new(); + let mut into_args = Vec::new(); + for op in &inst.operands_in { + let t = if op.is_immediate() { + let t = format!("T{}", tmpl_types.len() + 1); + tmpl_types.push(format!("{}: Into<{}>", t, op.kind.rust_type)); + into_args.push(op.name); + t + } else { + op.kind.rust_type.to_string() + }; + args.push(format!("{}: {}", op.name, t)); + args_doc.push(format!( + "- {}: {}", + op.name, + op.doc() + .expect("every instruction's input operand must be documented") + )); + } + + for op in &inst.operands_out { + rets_doc.push(format!( + "- {}: {}", + op.name, + op.doc() + .expect("every instruction's output operand must be documented") + )); + } + + let rtype = match inst.value_results.len() { + 0 => "Inst".into(), + 1 => "Value".into(), + _ => format!("({})", vec!["Value"; inst.value_results.len()].join(", ")), + }; + + let tmpl = if !tmpl_types.is_empty() { + format!("<{}>", tmpl_types.join(", ")) + } else { + "".into() + }; + + let proto = format!( + "{}{}({}) -> {}", + inst.snake_name(), + tmpl, + args.join(", "), + rtype + ); + + fmt.doc_comment(&inst.doc); + if !args_doc.is_empty() { + fmt.line("///"); + fmt.doc_comment("Inputs:"); + fmt.line("///"); + for doc_line in args_doc { + fmt.doc_comment(doc_line); + } + } + if !rets_doc.is_empty() { + fmt.line("///"); + fmt.doc_comment("Outputs:"); + fmt.line("///"); + for doc_line in rets_doc { + fmt.doc_comment(doc_line); + } + } + + fmt.line("#[allow(non_snake_case)]"); + fmtln!(fmt, "fn {} {{", proto); + fmt.indent(|fmt| { + // Convert all of the `Into<>` arguments. + for arg in &into_args { + fmtln!(fmt, "let {} = {}.into();", arg, arg); + } + + // Arguments for instruction constructor. + let first_arg = format!("Opcode::{}", inst.camel_name); + let mut args = vec![first_arg.as_str()]; + if let Some(poly) = &inst.polymorphic_info { + if poly.use_typevar_operand { + // Infer the controlling type variable from the input operands. + let op_num = inst.value_opnums[format.typevar_operand.unwrap()]; + fmtln!( + fmt, + "let ctrl_typevar = self.data_flow_graph().value_type({});", + inst.operands_in[op_num].name + ); + + // The format constructor will resolve the result types from the type var. + args.push("ctrl_typevar"); + } else { + // This was an explicit method argument. + args.push(&poly.ctrl_typevar.name); + } + } else { + // No controlling type variable needed. + args.push("types::INVALID"); + } + + // Now add all of the immediate operands to the constructor arguments. + for &op_num in &inst.imm_opnums { + args.push(inst.operands_in[op_num].name); + } + + // Finally, the value operands. + if format.has_value_list { + // We need to build a value list with all the arguments. + fmt.line("let mut vlist = ir::ValueList::default();"); + args.push("vlist"); + fmt.line("{"); + fmt.indent(|fmt| { + fmt.line("let pool = &mut self.data_flow_graph_mut().value_lists;"); + for op in &inst.operands_in { + if op.is_value() { + fmtln!(fmt, "vlist.push({}, pool);", op.name); + } else if op.is_varargs() { + fmtln!(fmt, "vlist.extend({}.iter().cloned(), pool);", op.name); + } + } + }); + fmt.line("}"); + } else { + // With no value list, we're guaranteed to just have a set of fixed value operands. + for &op_num in &inst.value_opnums { + args.push(inst.operands_in[op_num].name); + } + } + + // Call to the format constructor, + let fcall = format!("self.{}({})", format.name, args.join(", ")); + + if inst.value_results.is_empty() { + fmtln!(fmt, "{}.0", fcall); + return; + } + + fmtln!(fmt, "let (inst, dfg) = {};", fcall); + if inst.value_results.len() == 1 { + fmt.line("dfg.first_result(inst)"); + } else { + fmtln!( + fmt, + "let results = &dfg.inst_results(inst)[0..{}];", + inst.value_results.len() + ); + fmtln!( + fmt, + "({})", + inst.value_results + .iter() + .enumerate() + .map(|(i, _)| format!("results[{}]", i)) + .collect::>() + .join(", ") + ); + } + }); + fmtln!(fmt, "}") +} + +/// Generate a Builder trait with methods for all instructions. +fn gen_builder( + instructions: &AllInstructions, + formats: &[&InstructionFormat], + fmt: &mut Formatter, +) { + fmt.doc_comment( + r#" + Convenience methods for building instructions. + + The `InstBuilder` trait has one method per instruction opcode for + conveniently constructing the instruction with minimum arguments. + Polymorphic instructions infer their result types from the input + arguments when possible. In some cases, an explicit `ctrl_typevar` + argument is required. + + The opcode methods return the new instruction's result values, or + the `Inst` itself for instructions that don't have any results. + + There is also a method per instruction format. These methods all + return an `Inst`. + "#, + ); + fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {"); + fmt.indent(|fmt| { + for inst in instructions.values() { + gen_inst_builder(inst, &*inst.format, fmt); + fmt.empty_line(); + } + for (i, format) in formats.iter().enumerate() { + gen_format_constructor(format, fmt); + if i + 1 != formats.len() { + fmt.empty_line(); + } + } + }); + fmt.line("}"); +} + +pub(crate) fn generate( + formats: Vec<&InstructionFormat>, + all_inst: &AllInstructions, + opcode_filename: &str, + inst_builder_filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + // Opcodes. + let mut fmt = Formatter::new(); + gen_formats(&formats, &mut fmt); + gen_instruction_data(&formats, &mut fmt); + fmt.empty_line(); + gen_instruction_data_impl(&formats, &mut fmt); + fmt.empty_line(); + gen_opcodes(all_inst, &mut fmt); + gen_type_constraints(all_inst, &mut fmt); + fmt.update_file(opcode_filename, out_dir)?; + + // Instruction builder. + let mut fmt = Formatter::new(); + gen_builder(all_inst, &formats, &mut fmt); + fmt.update_file(inst_builder_filename, out_dir)?; + + Ok(()) +} diff --git a/cranelift/codegen/meta/src/gen_legalizer.rs b/cranelift/codegen/meta/src/gen_legalizer.rs new file mode 100644 index 0000000000..759121894f --- /dev/null +++ b/cranelift/codegen/meta/src/gen_legalizer.rs @@ -0,0 +1,727 @@ +//! Generate transformations to legalize instructions without encodings. +use crate::cdsl::ast::{Def, DefPool, Expr, VarPool}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::operands::Operand; +use crate::cdsl::type_inference::Constraint; +use crate::cdsl::typevar::{TypeSet, TypeVar}; +use crate::cdsl::xform::{Transform, TransformGroup, TransformGroups}; + +use crate::error; +use crate::gen_inst::gen_typesets_table; +use crate::srcgen::Formatter; +use crate::unique_table::UniqueTable; + +use std::collections::{HashMap, HashSet}; +use std::iter::FromIterator; + +/// Given a `Def` node, emit code that extracts all the instruction fields from +/// `pos.func.dfg[iref]`. +/// +/// Create local variables named after the `Var` instances in `node`. +/// +/// Also create a local variable named `predicate` with the value of the evaluated instruction +/// predicate, or `true` if the node has no predicate. +fn unwrap_inst(transform: &Transform, fmt: &mut Formatter) -> bool { + let var_pool = &transform.var_pool; + let def_pool = &transform.def_pool; + + let def = def_pool.get(transform.src); + let apply = &def.apply; + let inst = &apply.inst; + let iform = &inst.format; + + fmt.comment(format!( + "Unwrap fields from instruction format {}", + def.to_comment_string(&transform.var_pool) + )); + + // Extract the Var arguments. + let arg_names = apply + .args + .iter() + .enumerate() + .filter(|(arg_num, _)| { + // Variable args are specially handled after extracting args. + !inst.operands_in[*arg_num].is_varargs() + }) + .map(|(arg_num, arg)| match &arg { + Expr::Var(var_index) => var_pool.get(*var_index).name.as_ref(), + Expr::Literal(_) => { + let n = inst.imm_opnums.iter().position(|&i| i == arg_num).unwrap(); + iform.imm_fields[n].member + } + }) + .collect::>() + .join(", "); + + // May we need "args" in the values consumed by predicates? + let emit_args = iform.num_value_operands >= 1 || iform.has_value_list; + + // We need a tuple: + // - if there's at least one value operand, then we emit a variable for the value, and the + // value list as args. + // - otherwise, if there's the count of immediate operands added to the presence of a value list exceeds one. + let need_tuple = if iform.num_value_operands >= 1 { + true + } else { + let mut imm_and_varargs = inst + .operands_in + .iter() + .filter(|op| op.is_immediate_or_entityref()) + .count(); + if iform.has_value_list { + imm_and_varargs += 1; + } + imm_and_varargs > 1 + }; + + let maybe_args = if emit_args { ", args" } else { "" }; + let defined_values = format!("{}{}", arg_names, maybe_args); + + let tuple_or_value = if need_tuple { + format!("({})", defined_values) + } else { + defined_values + }; + + fmtln!( + fmt, + "let {} = if let ir::InstructionData::{} {{", + tuple_or_value, + iform.name + ); + + fmt.indent(|fmt| { + // Fields are encoded directly. + for field in &iform.imm_fields { + fmtln!(fmt, "{},", field.member); + } + + if iform.has_value_list || iform.num_value_operands > 1 { + fmt.line("ref args,"); + } else if iform.num_value_operands == 1 { + fmt.line("arg,"); + } + + fmt.line(".."); + fmt.outdented_line("} = pos.func.dfg[inst] {"); + + if iform.has_value_list { + fmt.line("let args = args.as_slice(&pos.func.dfg.value_lists);"); + } else if iform.num_value_operands == 1 { + fmt.line("let args = [arg];") + } + + // Generate the values for the tuple. + let emit_one_value = + |fmt: &mut Formatter, needs_comma: bool, op_num: usize, op: &Operand| { + let comma = if needs_comma { "," } else { "" }; + if op.is_immediate_or_entityref() { + let n = inst.imm_opnums.iter().position(|&i| i == op_num).unwrap(); + fmtln!(fmt, "{}{}", iform.imm_fields[n].member, comma); + } else if op.is_value() { + let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap(); + fmtln!(fmt, "pos.func.dfg.resolve_aliases(args[{}]),", n); + } else { + // This is a value list argument or a varargs. + assert!(iform.has_value_list || op.is_varargs()); + } + }; + + if need_tuple { + fmt.line("("); + fmt.indent(|fmt| { + for (op_num, op) in inst.operands_in.iter().enumerate() { + let needs_comma = emit_args || op_num + 1 < inst.operands_in.len(); + emit_one_value(fmt, needs_comma, op_num, op); + } + if emit_args { + fmt.line("args"); + } + }); + fmt.line(")"); + } else { + // Only one of these can be true at the same time, otherwise we'd need a tuple. + emit_one_value(fmt, false, 0, &inst.operands_in[0]); + if emit_args { + fmt.line("args"); + } + } + + fmt.outdented_line("} else {"); + fmt.line(r#"unreachable!("bad instruction format")"#); + }); + fmtln!(fmt, "};"); + fmt.empty_line(); + + assert_eq!(inst.operands_in.len(), apply.args.len()); + for (i, op) in inst.operands_in.iter().enumerate() { + if op.is_varargs() { + let name = &var_pool + .get(apply.args[i].maybe_var().expect("vararg without name")) + .name; + let n = inst + .imm_opnums + .iter() + .chain(inst.value_opnums.iter()) + .max() + .copied() + .unwrap_or(0); + fmtln!(fmt, "let {} = &Vec::from(&args[{}..]);", name, n); + } + } + + for &op_num in &inst.value_opnums { + let arg = &apply.args[op_num]; + if let Some(var_index) = arg.maybe_var() { + let var = var_pool.get(var_index); + if var.has_free_typevar() { + fmtln!( + fmt, + "let typeof_{} = pos.func.dfg.value_type({});", + var.name, + var.name + ); + } + } + } + + // If the definition creates results, detach the values and place them in locals. + let mut replace_inst = false; + if !def.defined_vars.is_empty() { + if def.defined_vars + == def_pool + .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) + .defined_vars + { + // Special case: The instruction replacing node defines the exact same values. + fmt.comment(format!( + "Results handled by {}.", + def_pool + .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) + .to_comment_string(var_pool) + )); + + fmt.line("let r = pos.func.dfg.inst_results(inst);"); + for (i, &var_index) in def.defined_vars.iter().enumerate() { + let var = var_pool.get(var_index); + fmtln!(fmt, "let {} = &r[{}];", var.name, i); + fmtln!( + fmt, + "let typeof_{} = pos.func.dfg.value_type(*{});", + var.name, + var.name + ); + } + + replace_inst = true; + } else { + // Boring case: Detach the result values, capture them in locals. + for &var_index in &def.defined_vars { + fmtln!(fmt, "let {};", var_pool.get(var_index).name); + } + + fmt.line("{"); + fmt.indent(|fmt| { + fmt.line("let r = pos.func.dfg.inst_results(inst);"); + for i in 0..def.defined_vars.len() { + let var = var_pool.get(def.defined_vars[i]); + fmtln!(fmt, "{} = r[{}];", var.name, i); + } + }); + fmt.line("}"); + + for &var_index in &def.defined_vars { + let var = var_pool.get(var_index); + if var.has_free_typevar() { + fmtln!( + fmt, + "let typeof_{} = pos.func.dfg.value_type({});", + var.name, + var.name + ); + } + } + } + } + replace_inst +} + +fn build_derived_expr(tv: &TypeVar) -> String { + let base = match &tv.base { + Some(base) => base, + None => { + assert!(tv.name.starts_with("typeof_")); + return format!("Some({})", tv.name); + } + }; + let base_expr = build_derived_expr(&base.type_var); + format!( + "{}.map(|t: crate::ir::Type| t.{}())", + base_expr, + base.derived_func.name() + ) +} + +/// Emit rust code for the given check. +/// +/// The emitted code is a statement redefining the `predicate` variable like this: +/// let predicate = predicate && ... +fn emit_runtime_typecheck<'a>( + constraint: &'a Constraint, + type_sets: &mut UniqueTable<'a, TypeSet>, + fmt: &mut Formatter, +) { + match constraint { + Constraint::InTypeset(tv, ts) => { + let ts_index = type_sets.add(&ts); + fmt.comment(format!( + "{} must belong to {:?}", + tv.name, + type_sets.get(ts_index) + )); + fmtln!( + fmt, + "let predicate = predicate && TYPE_SETS[{}].contains({});", + ts_index, + tv.name + ); + } + Constraint::Eq(tv1, tv2) => { + fmtln!( + fmt, + "let predicate = predicate && match ({}, {}) {{", + build_derived_expr(tv1), + build_derived_expr(tv2) + ); + fmt.indent(|fmt| { + fmt.line("(Some(a), Some(b)) => a == b,"); + fmt.comment("On overflow, constraint doesn\'t apply"); + fmt.line("_ => false,"); + }); + fmtln!(fmt, "};"); + } + Constraint::WiderOrEq(tv1, tv2) => { + fmtln!( + fmt, + "let predicate = predicate && match ({}, {}) {{", + build_derived_expr(tv1), + build_derived_expr(tv2) + ); + fmt.indent(|fmt| { + fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),"); + fmt.comment("On overflow, constraint doesn\'t apply"); + fmt.line("_ => false,"); + }); + fmtln!(fmt, "};"); + } + } +} + +/// Determine if `node` represents one of the value splitting instructions: `isplit` or `vsplit. +/// These instructions are lowered specially by the `legalize::split` module. +fn is_value_split(def: &Def) -> bool { + let name = &def.apply.inst.name; + name == "isplit" || name == "vsplit" +} + +fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Formatter) { + let defined_vars = { + let vars = def + .defined_vars + .iter() + .map(|&var_index| var_pool.get(var_index).name.as_ref()) + .collect::>(); + if vars.len() == 1 { + vars[0].to_string() + } else { + format!("({})", vars.join(", ")) + } + }; + + if is_value_split(def) { + // Split instructions are not emitted with the builder, but by calling special functions in + // the `legalizer::split` module. These functions will eliminate concat-split patterns. + fmt.line("let curpos = pos.position();"); + fmt.line("let srcloc = pos.srcloc();"); + fmtln!( + fmt, + "let {} = split::{}(pos.func, cfg, curpos, srcloc, {});", + defined_vars, + def.apply.inst.snake_name(), + def.apply.args[0].to_rust_code(var_pool) + ); + return; + } + + if def.defined_vars.is_empty() { + // This node doesn't define any values, so just insert the new instruction. + fmtln!( + fmt, + "pos.ins().{};", + def.apply.rust_builder(&def.defined_vars, var_pool) + ); + return; + } + + if let Some(src_def0) = var_pool.get(def.defined_vars[0]).src_def { + if def.defined_vars == def_pool.get(src_def0).defined_vars { + // The replacement instruction defines the exact same values as the source pattern. + // Unwrapping would have left the results intact. Replace the whole instruction. + fmtln!( + fmt, + "let {} = pos.func.dfg.replace(inst).{};", + defined_vars, + def.apply.rust_builder(&def.defined_vars, var_pool) + ); + + // We need to bump the cursor so following instructions are inserted *after* the + // replaced instruction. + fmt.line("if pos.current_inst() == Some(inst) {"); + fmt.indent(|fmt| { + fmt.line("pos.next_inst();"); + }); + fmt.line("}"); + return; + } + } + + // Insert a new instruction. + let mut builder = format!("let {} = pos.ins()", defined_vars); + + if def.defined_vars.len() == 1 && var_pool.get(def.defined_vars[0]).is_output() { + // Reuse the single source result value. + builder = format!( + "{}.with_result({})", + builder, + var_pool.get(def.defined_vars[0]).to_rust_code() + ); + } else if def + .defined_vars + .iter() + .any(|&var_index| var_pool.get(var_index).is_output()) + { + // There are more than one output values that can be reused. + let array = def + .defined_vars + .iter() + .map(|&var_index| { + let var = var_pool.get(var_index); + if var.is_output() { + format!("Some({})", var.name) + } else { + "None".into() + } + }) + .collect::>() + .join(", "); + builder = format!("{}.with_results([{}])", builder, array); + } + + fmtln!( + fmt, + "{}.{};", + builder, + def.apply.rust_builder(&def.defined_vars, var_pool) + ); +} + +/// Emit code for `transform`, assuming that the opcode of transform's root instruction +/// has already been matched. +/// +/// `inst: Inst` is the variable to be replaced. It is pointed to by `pos: Cursor`. +/// `dfg: DataFlowGraph` is available and mutable. +fn gen_transform<'a>( + replace_inst: bool, + transform: &'a Transform, + type_sets: &mut UniqueTable<'a, TypeSet>, + fmt: &mut Formatter, +) { + // Evaluate the instruction predicate if any. + let apply = &transform.def_pool.get(transform.src).apply; + + let inst_predicate = apply + .inst_predicate_with_ctrl_typevar(&transform.var_pool) + .rust_predicate("pos.func"); + + let has_extra_constraints = !transform.type_env.constraints.is_empty(); + if has_extra_constraints { + // Extra constraints rely on the predicate being a variable that we can rebind as we add + // more constraint predicates. + if let Some(pred) = &inst_predicate { + fmt.multi_line(&format!("let predicate = {};", pred)); + } else { + fmt.line("let predicate = true;"); + } + } + + // Emit any runtime checks; these will rebind `predicate` emitted right above. + for constraint in &transform.type_env.constraints { + emit_runtime_typecheck(constraint, type_sets, fmt); + } + + let do_expand = |fmt: &mut Formatter| { + // Emit any constants that must be created before use. + for (name, value) in transform.const_pool.iter() { + fmtln!( + fmt, + "let {} = pos.func.dfg.constants.insert(vec!{:?}.into());", + name, + value + ); + } + + // If we are adding some blocks, we need to recall the original block, such that we can + // recompute it. + if !transform.block_pool.is_empty() { + fmt.line("let orig_block = pos.current_block().unwrap();"); + } + + // If we're going to delete `inst`, we need to detach its results first so they can be + // reattached during pattern expansion. + if !replace_inst { + fmt.line("pos.func.dfg.clear_results(inst);"); + } + + // Emit new block creation. + for block in &transform.block_pool { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "let {} = pos.func.dfg.make_block();", var.name); + } + + // Emit the destination pattern. + for &def_index in &transform.dst { + if let Some(block) = transform.block_pool.get(def_index) { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "pos.insert_block({});", var.name); + } + emit_dst_inst( + transform.def_pool.get(def_index), + &transform.def_pool, + &transform.var_pool, + fmt, + ); + } + + // Insert a new block after the last instruction, if needed. + let def_next_index = transform.def_pool.next_index(); + if let Some(block) = transform.block_pool.get(def_next_index) { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "pos.insert_block({});", var.name); + } + + // Delete the original instruction if we didn't have an opportunity to replace it. + if !replace_inst { + fmt.line("let removed = pos.remove_inst();"); + fmt.line("debug_assert_eq!(removed, inst);"); + } + + if transform.block_pool.is_empty() { + if transform.def_pool.get(transform.src).apply.inst.is_branch { + // A branch might have been legalized into multiple branches, so we need to recompute + // the cfg. + fmt.line("cfg.recompute_block(pos.func, pos.current_block().unwrap());"); + } + } else { + // Update CFG for the new blocks. + fmt.line("cfg.recompute_block(pos.func, orig_block);"); + for block in &transform.block_pool { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "cfg.recompute_block(pos.func, {});", var.name); + } + } + + fmt.line("return true;"); + }; + + // Guard the actual expansion by `predicate`. + if has_extra_constraints { + fmt.line("if predicate {"); + fmt.indent(|fmt| { + do_expand(fmt); + }); + fmt.line("}"); + } else if let Some(pred) = &inst_predicate { + fmt.multi_line(&format!("if {} {{", pred)); + fmt.indent(|fmt| { + do_expand(fmt); + }); + fmt.line("}"); + } else { + // Unconditional transform (there was no predicate), just emit it. + do_expand(fmt); + } +} + +fn gen_transform_group<'a>( + group: &'a TransformGroup, + transform_groups: &TransformGroups, + type_sets: &mut UniqueTable<'a, TypeSet>, + fmt: &mut Formatter, +) { + fmt.doc_comment(group.doc); + fmt.line("#[allow(unused_variables,unused_assignments,unused_imports,non_snake_case)]"); + + // Function arguments. + fmtln!(fmt, "pub fn {}(", group.name); + fmt.indent(|fmt| { + fmt.line("inst: crate::ir::Inst,"); + fmt.line("func: &mut crate::ir::Function,"); + fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,"); + fmt.line("isa: &dyn crate::isa::TargetIsa,"); + }); + fmtln!(fmt, ") -> bool {"); + + // Function body. + fmt.indent(|fmt| { + fmt.line("use crate::ir::InstBuilder;"); + fmt.line("use crate::cursor::{Cursor, FuncCursor};"); + fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);"); + fmt.line("pos.use_srcloc(inst);"); + + // Group the transforms by opcode so we can generate a big switch. + // Preserve ordering. + let mut inst_to_transforms = HashMap::new(); + for transform in &group.transforms { + let def_index = transform.src; + let inst = &transform.def_pool.get(def_index).apply.inst; + inst_to_transforms + .entry(inst.camel_name.clone()) + .or_insert_with(Vec::new) + .push(transform); + } + + let mut sorted_inst_names = Vec::from_iter(inst_to_transforms.keys()); + sorted_inst_names.sort(); + + fmt.line("{"); + fmt.indent(|fmt| { + fmt.line("match pos.func.dfg[inst].opcode() {"); + fmt.indent(|fmt| { + for camel_name in sorted_inst_names { + fmtln!(fmt, "ir::Opcode::{} => {{", camel_name); + fmt.indent(|fmt| { + let transforms = inst_to_transforms.get(camel_name).unwrap(); + + // Unwrap the source instruction, create local variables for the input variables. + let replace_inst = unwrap_inst(&transforms[0], fmt); + fmt.empty_line(); + + for (i, transform) in transforms.iter().enumerate() { + if i > 0 { + fmt.empty_line(); + } + gen_transform(replace_inst, transform, type_sets, fmt); + } + }); + fmtln!(fmt, "}"); + fmt.empty_line(); + } + + // Emit the custom transforms. The Rust compiler will complain about any overlap with + // the normal transforms. + let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes); + sorted_custom_legalizes.sort(); + for (inst_camel_name, func_name) in sorted_custom_legalizes { + fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name); + fmt.indent(|fmt| { + fmtln!(fmt, "{}(inst, func, cfg, isa);", func_name); + fmt.line("return true;"); + }); + fmtln!(fmt, "}"); + fmt.empty_line(); + } + + // We'll assume there are uncovered opcodes. + fmt.line("_ => {},"); + }); + fmt.line("}"); + }); + fmt.line("}"); + + // If we fall through, nothing was expanded; call the chain if any. + match &group.chain_with { + Some(group_id) => fmtln!( + fmt, + "{}(inst, func, cfg, isa)", + transform_groups.get(*group_id).rust_name() + ), + None => fmt.line("false"), + }; + }); + fmtln!(fmt, "}"); + fmt.empty_line(); +} + +/// Generate legalization functions for `isa` and add any shared `TransformGroup`s +/// encountered to `shared_groups`. +/// +/// Generate `TYPE_SETS` and `LEGALIZE_ACTIONS` tables. +fn gen_isa( + isa: &TargetIsa, + transform_groups: &TransformGroups, + shared_group_names: &mut HashSet<&'static str>, + fmt: &mut Formatter, +) { + let mut type_sets = UniqueTable::new(); + for group_index in isa.transitive_transform_groups(transform_groups) { + let group = transform_groups.get(group_index); + match group.isa_name { + Some(isa_name) => { + assert!( + isa_name == isa.name, + "ISA-specific legalizations must be used by the same ISA" + ); + gen_transform_group(group, transform_groups, &mut type_sets, fmt); + } + None => { + shared_group_names.insert(group.name); + } + } + } + + gen_typesets_table(&type_sets, fmt); + + let direct_groups = isa.direct_transform_groups(); + fmtln!( + fmt, + "pub static LEGALIZE_ACTIONS: [isa::Legalize; {}] = [", + direct_groups.len() + ); + fmt.indent(|fmt| { + for &group_index in direct_groups { + fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name()); + } + }); + fmtln!(fmt, "];"); +} + +/// Generate the legalizer files. +pub(crate) fn generate( + isas: &[TargetIsa], + transform_groups: &TransformGroups, + filename_prefix: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut shared_group_names = HashSet::new(); + + for isa in isas { + let mut fmt = Formatter::new(); + gen_isa(isa, transform_groups, &mut shared_group_names, &mut fmt); + fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?; + } + + // Generate shared legalize groups. + let mut fmt = Formatter::new(); + let mut type_sets = UniqueTable::new(); + let mut sorted_shared_group_names = Vec::from_iter(shared_group_names); + sorted_shared_group_names.sort(); + for group_name in &sorted_shared_group_names { + let group = transform_groups.by_name(group_name); + gen_transform_group(group, transform_groups, &mut type_sets, &mut fmt); + } + gen_typesets_table(&type_sets, &mut fmt); + fmt.update_file(format!("{}r.rs", filename_prefix), out_dir)?; + + Ok(()) +} diff --git a/cranelift/codegen/meta/src/gen_registers.rs b/cranelift/codegen/meta/src/gen_registers.rs new file mode 100644 index 0000000000..fbb61beb37 --- /dev/null +++ b/cranelift/codegen/meta/src/gen_registers.rs @@ -0,0 +1,142 @@ +//! Generate the ISA-specific registers. +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::regs::{RegBank, RegClass}; +use crate::error; +use crate::srcgen::Formatter; +use cranelift_entity::EntityRef; + +fn gen_regbank(fmt: &mut Formatter, reg_bank: &RegBank) { + let names = if !reg_bank.names.is_empty() { + format!(r#""{}""#, reg_bank.names.join(r#"", ""#)) + } else { + "".to_string() + }; + fmtln!(fmt, "RegBank {"); + fmt.indent(|fmt| { + fmtln!(fmt, r#"name: "{}","#, reg_bank.name); + fmtln!(fmt, "first_unit: {},", reg_bank.first_unit); + fmtln!(fmt, "units: {},", reg_bank.units); + fmtln!(fmt, "names: &[{}],", names); + fmtln!(fmt, r#"prefix: "{}","#, reg_bank.prefix); + fmtln!(fmt, "first_toprc: {},", reg_bank.toprcs[0].index()); + fmtln!(fmt, "num_toprcs: {},", reg_bank.toprcs.len()); + fmtln!( + fmt, + "pressure_tracking: {},", + if reg_bank.pressure_tracking { + "true" + } else { + "false" + } + ); + }); + fmtln!(fmt, "},"); +} + +fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) { + let reg_bank = isa.regs.banks.get(reg_class.bank).unwrap(); + + let mask: Vec = reg_class + .mask(reg_bank.first_unit) + .iter() + .map(|x| format!("0x{:08x}", x)) + .collect(); + let mask = mask.join(", "); + + fmtln!( + fmt, + "pub static {}_DATA: RegClassData = RegClassData {{", + reg_class.name + ); + fmt.indent(|fmt| { + fmtln!(fmt, r#"name: "{}","#, reg_class.name); + fmtln!(fmt, "index: {},", reg_class.index.index()); + fmtln!(fmt, "width: {},", reg_class.width); + fmtln!(fmt, "bank: {},", reg_class.bank.index()); + fmtln!(fmt, "toprc: {},", reg_class.toprc.index()); + fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start); + fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask()); + fmtln!(fmt, "mask: [{}],", mask); + fmtln!(fmt, "pinned_reg: {:?},", reg_bank.pinned_reg); + fmtln!(fmt, "info: &INFO,"); + }); + fmtln!(fmt, "};"); + + fmtln!(fmt, "#[allow(dead_code)]"); + fmtln!( + fmt, + "pub static {}: RegClass = &{}_DATA;", + reg_class.name, + reg_class.name + ); +} + +fn gen_regbank_units(reg_bank: &RegBank, fmt: &mut Formatter) { + for unit in 0..reg_bank.units { + let v = unit + reg_bank.first_unit; + if (unit as usize) < reg_bank.names.len() { + fmtln!(fmt, "{} = {},", reg_bank.names[unit as usize], v); + continue; + } + fmtln!(fmt, "{}{} = {},", reg_bank.prefix, unit, v); + } +} + +fn gen_isa(isa: &TargetIsa, fmt: &mut Formatter) { + // Emit RegInfo. + fmtln!(fmt, "pub static INFO: RegInfo = RegInfo {"); + + fmt.indent(|fmt| { + fmtln!(fmt, "banks: &["); + // Bank descriptors. + fmt.indent(|fmt| { + for reg_bank in isa.regs.banks.values() { + gen_regbank(fmt, ®_bank); + } + }); + fmtln!(fmt, "],"); + // References to register classes. + fmtln!(fmt, "classes: &["); + fmt.indent(|fmt| { + for reg_class in isa.regs.classes.values() { + fmtln!(fmt, "&{}_DATA,", reg_class.name); + } + }); + fmtln!(fmt, "],"); + }); + fmtln!(fmt, "};"); + + // Register class descriptors. + for rc in isa.regs.classes.values() { + gen_regclass(&isa, rc, fmt); + } + + // Emit constants for all the register units. + fmtln!(fmt, "#[allow(dead_code, non_camel_case_types)]"); + fmtln!(fmt, "#[derive(Clone, Copy)]"); + fmtln!(fmt, "pub enum RU {"); + fmt.indent(|fmt| { + for reg_bank in isa.regs.banks.values() { + gen_regbank_units(reg_bank, fmt); + } + }); + fmtln!(fmt, "}"); + + // Emit Into conversion for the RU class. + fmtln!(fmt, "impl Into for RU {"); + fmt.indent(|fmt| { + fmtln!(fmt, "fn into(self) -> RegUnit {"); + fmt.indent(|fmt| { + fmtln!(fmt, "self as RegUnit"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + +pub(crate) fn generate(isa: &TargetIsa, filename: &str, out_dir: &str) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_isa(&isa, &mut fmt); + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/cranelift/codegen/meta/src/gen_settings.rs b/cranelift/codegen/meta/src/gen_settings.rs new file mode 100644 index 0000000000..2ed5941b80 --- /dev/null +++ b/cranelift/codegen/meta/src/gen_settings.rs @@ -0,0 +1,447 @@ +//! Generate the ISA-specific settings. +use std::collections::HashMap; + +use cranelift_codegen_shared::constant_hash::{generate_table, simple_hash}; + +use crate::cdsl::camel_case; +use crate::cdsl::settings::{ + BoolSetting, Predicate, Preset, Setting, SettingGroup, SpecificSetting, +}; +use crate::error; +use crate::srcgen::{Formatter, Match}; +use crate::unique_table::UniqueSeqTable; + +pub(crate) enum ParentGroup { + None, + Shared, +} + +/// Emits the constructor of the Flags structure. +fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) { + let args = match parent { + ParentGroup::None => "builder: Builder", + ParentGroup::Shared => "shared: &settings::Flags, builder: Builder", + }; + fmtln!(fmt, "impl Flags {"); + fmt.indent(|fmt| { + fmt.doc_comment(format!("Create flags {} settings group.", group.name)); + fmtln!(fmt, "#[allow(unused_variables)]"); + fmtln!(fmt, "pub fn new({}) -> Self {{", args); + fmt.indent(|fmt| { + fmtln!(fmt, "let bvec = builder.state_for(\"{}\");", group.name); + fmtln!( + fmt, + "let mut {} = Self {{ bytes: [0; {}] }};", + group.name, + group.byte_size() + ); + fmtln!( + fmt, + "debug_assert_eq!(bvec.len(), {});", + group.settings_size + ); + fmtln!( + fmt, + "{}.bytes[0..{}].copy_from_slice(&bvec);", + group.name, + group.settings_size + ); + + // Now compute the predicates. + for p in &group.predicates { + fmt.comment(format!("Precompute #{}.", p.number)); + fmtln!(fmt, "if {} {{", p.render(group)); + fmt.indent(|fmt| { + fmtln!( + fmt, + "{}.bytes[{}] |= 1 << {};", + group.name, + group.bool_start_byte_offset + p.number / 8, + p.number % 8 + ); + }); + fmtln!(fmt, "}"); + } + + fmtln!(fmt, group.name); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + +/// Emit Display and FromStr implementations for enum settings. +fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) { + fmtln!(fmt, "impl fmt::Display for {} {{", name); + fmt.indent(|fmt| { + fmtln!( + fmt, + "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {" + ); + fmt.indent(|fmt| { + fmtln!(fmt, "f.write_str(match *self {"); + fmt.indent(|fmt| { + for v in values.iter() { + fmtln!(fmt, "Self::{} => \"{}\",", camel_case(v), v); + } + }); + fmtln!(fmt, "})"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); + + fmtln!(fmt, "impl str::FromStr for {} {{", name); + fmt.indent(|fmt| { + fmtln!(fmt, "type Err = ();"); + fmtln!(fmt, "fn from_str(s: &str) -> Result {"); + fmt.indent(|fmt| { + fmtln!(fmt, "match s {"); + fmt.indent(|fmt| { + for v in values.iter() { + fmtln!(fmt, "\"{}\" => Ok(Self::{}),", v, camel_case(v)); + } + fmtln!(fmt, "_ => Err(()),"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + +/// Emit real enum for the Enum settings. +fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) { + for setting in group.settings.iter() { + let values = match setting.specific { + SpecificSetting::Bool(_) | SpecificSetting::Num(_) => continue, + SpecificSetting::Enum(ref values) => values, + }; + let name = camel_case(setting.name); + + fmt.doc_comment(format!("Values for `{}.{}`.", group.name, setting.name)); + fmtln!(fmt, "#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]"); + fmtln!(fmt, "pub enum {} {{", name); + fmt.indent(|fmt| { + for v in values.iter() { + fmt.doc_comment(format!("`{}`.", v)); + fmtln!(fmt, "{},", camel_case(v)); + } + }); + fmtln!(fmt, "}"); + + gen_to_and_from_str(&name, values, fmt); + } +} + +/// Emit a getter function for `setting`. +fn gen_getter(setting: &Setting, fmt: &mut Formatter) { + fmt.doc_comment(setting.comment); + match setting.specific { + SpecificSetting::Bool(BoolSetting { + predicate_number, .. + }) => { + fmtln!(fmt, "pub fn {}(&self) -> bool {{", setting.name); + fmt.indent(|fmt| { + fmtln!(fmt, "self.numbered_predicate({})", predicate_number); + }); + fmtln!(fmt, "}"); + } + SpecificSetting::Enum(ref values) => { + let ty = camel_case(setting.name); + fmtln!(fmt, "pub fn {}(&self) -> {} {{", setting.name, ty); + fmt.indent(|fmt| { + let mut m = Match::new(format!("self.bytes[{}]", setting.byte_offset)); + for (i, v) in values.iter().enumerate() { + m.arm_no_fields(format!("{}", i), format!("{}::{}", ty, camel_case(v))); + } + m.arm_no_fields("_", "panic!(\"Invalid enum value\")"); + fmt.add_match(m); + }); + fmtln!(fmt, "}"); + } + SpecificSetting::Num(_) => { + fmtln!(fmt, "pub fn {}(&self) -> u8 {{", setting.name); + fmt.indent(|fmt| { + fmtln!(fmt, "self.bytes[{}]", setting.byte_offset); + }); + fmtln!(fmt, "}"); + } + } +} + +fn gen_pred_getter(predicate: &Predicate, group: &SettingGroup, fmt: &mut Formatter) { + fmt.doc_comment(format!("Computed predicate `{}`.", predicate.render(group))); + fmtln!(fmt, "pub fn {}(&self) -> bool {{", predicate.name); + fmt.indent(|fmt| { + fmtln!(fmt, "self.numbered_predicate({})", predicate.number); + }); + fmtln!(fmt, "}"); +} + +/// Emits getters for each setting value. +fn gen_getters(group: &SettingGroup, fmt: &mut Formatter) { + fmt.doc_comment("User-defined settings."); + fmtln!(fmt, "#[allow(dead_code)]"); + fmtln!(fmt, "impl Flags {"); + fmt.indent(|fmt| { + fmt.doc_comment("Get a view of the boolean predicates."); + fmtln!( + fmt, + "pub fn predicate_view(&self) -> crate::settings::PredicateView {" + ); + fmt.indent(|fmt| { + fmtln!( + fmt, + "crate::settings::PredicateView::new(&self.bytes[{}..])", + group.bool_start_byte_offset + ); + }); + fmtln!(fmt, "}"); + + if !group.settings.is_empty() { + fmt.doc_comment("Dynamic numbered predicate getter."); + fmtln!(fmt, "fn numbered_predicate(&self, p: usize) -> bool {"); + fmt.indent(|fmt| { + fmtln!( + fmt, + "self.bytes[{} + p / 8] & (1 << (p % 8)) != 0", + group.bool_start_byte_offset + ); + }); + fmtln!(fmt, "}"); + } + + for setting in &group.settings { + gen_getter(&setting, fmt); + } + for predicate in &group.predicates { + gen_pred_getter(&predicate, &group, fmt); + } + }); + fmtln!(fmt, "}"); +} + +#[derive(Hash, PartialEq, Eq)] +enum SettingOrPreset<'a> { + Setting(&'a Setting), + Preset(&'a Preset), +} + +impl<'a> SettingOrPreset<'a> { + fn name(&self) -> &str { + match *self { + SettingOrPreset::Setting(s) => s.name, + SettingOrPreset::Preset(p) => p.name, + } + } +} + +/// Emits DESCRIPTORS, ENUMERATORS, HASH_TABLE and PRESETS. +fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) { + let mut enum_table = UniqueSeqTable::new(); + + let mut descriptor_index_map: HashMap = HashMap::new(); + + // Generate descriptors. + fmtln!( + fmt, + "static DESCRIPTORS: [detail::Descriptor; {}] = [", + group.settings.len() + group.presets.len() + ); + fmt.indent(|fmt| { + for (idx, setting) in group.settings.iter().enumerate() { + fmtln!(fmt, "detail::Descriptor {"); + fmt.indent(|fmt| { + fmtln!(fmt, "name: \"{}\",", setting.name); + fmtln!(fmt, "offset: {},", setting.byte_offset); + match setting.specific { + SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => { + fmtln!( + fmt, + "detail: detail::Detail::Bool {{ bit: {} }},", + bit_offset + ); + } + SpecificSetting::Enum(ref values) => { + let offset = enum_table.add(values); + fmtln!( + fmt, + "detail: detail::Detail::Enum {{ last: {}, enumerators: {} }},", + values.len() - 1, + offset + ); + } + SpecificSetting::Num(_) => { + fmtln!(fmt, "detail: detail::Detail::Num,"); + } + } + + descriptor_index_map.insert(SettingOrPreset::Setting(setting), idx); + }); + fmtln!(fmt, "},"); + } + + for (idx, preset) in group.presets.iter().enumerate() { + fmtln!(fmt, "detail::Descriptor {"); + fmt.indent(|fmt| { + fmtln!(fmt, "name: \"{}\",", preset.name); + fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size); + fmtln!(fmt, "detail: detail::Detail::Preset,"); + }); + fmtln!(fmt, "},"); + + let whole_idx = idx + group.settings.len(); + descriptor_index_map.insert(SettingOrPreset::Preset(preset), whole_idx); + } + }); + fmtln!(fmt, "];"); + + // Generate enumerators. + fmtln!(fmt, "static ENUMERATORS: [&str; {}] = [", enum_table.len()); + fmt.indent(|fmt| { + for enum_val in enum_table.iter() { + fmtln!(fmt, "\"{}\",", enum_val); + } + }); + fmtln!(fmt, "];"); + + // Generate hash table. + let mut hash_entries: Vec = Vec::new(); + hash_entries.extend(group.settings.iter().map(|x| SettingOrPreset::Setting(x))); + hash_entries.extend(group.presets.iter().map(|x| SettingOrPreset::Preset(x))); + + let hash_table = generate_table(hash_entries.iter(), hash_entries.len(), |entry| { + simple_hash(entry.name()) + }); + fmtln!(fmt, "static HASH_TABLE: [u16; {}] = [", hash_table.len()); + fmt.indent(|fmt| { + for h in &hash_table { + match *h { + Some(setting_or_preset) => fmtln!( + fmt, + "{},", + &descriptor_index_map + .get(setting_or_preset) + .unwrap() + .to_string() + ), + None => fmtln!(fmt, "0xffff,"), + } + } + }); + fmtln!(fmt, "];"); + + // Generate presets. + fmtln!( + fmt, + "static PRESETS: [(u8, u8); {}] = [", + group.presets.len() * (group.settings_size as usize) + ); + fmt.indent(|fmt| { + for preset in &group.presets { + fmt.comment(preset.name); + for (mask, value) in preset.layout(&group) { + fmtln!(fmt, "(0b{:08b}, 0b{:08b}),", mask, value); + } + } + }); + fmtln!(fmt, "];"); +} + +fn gen_template(group: &SettingGroup, fmt: &mut Formatter) { + let mut default_bytes: Vec = vec![0; group.settings_size as usize]; + for setting in &group.settings { + *default_bytes.get_mut(setting.byte_offset as usize).unwrap() |= setting.default_byte(); + } + + let default_bytes: Vec = default_bytes + .iter() + .map(|x| format!("{:#04x}", x)) + .collect(); + let default_bytes_str = default_bytes.join(", "); + + fmtln!( + fmt, + "static TEMPLATE: detail::Template = detail::Template {" + ); + fmt.indent(|fmt| { + fmtln!(fmt, "name: \"{}\",", group.name); + fmtln!(fmt, "descriptors: &DESCRIPTORS,"); + fmtln!(fmt, "enumerators: &ENUMERATORS,"); + fmtln!(fmt, "hash_table: &HASH_TABLE,"); + fmtln!(fmt, "defaults: &[{}],", default_bytes_str); + fmtln!(fmt, "presets: &PRESETS,"); + }); + fmtln!(fmt, "};"); + + fmt.doc_comment(format!( + "Create a `settings::Builder` for the {} settings group.", + group.name + )); + fmtln!(fmt, "pub fn builder() -> Builder {"); + fmt.indent(|fmt| { + fmtln!(fmt, "Builder::new(&TEMPLATE)"); + }); + fmtln!(fmt, "}"); +} + +fn gen_display(group: &SettingGroup, fmt: &mut Formatter) { + fmtln!(fmt, "impl fmt::Display for Flags {"); + fmt.indent(|fmt| { + fmtln!( + fmt, + "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {" + ); + fmt.indent(|fmt| { + fmtln!(fmt, "writeln!(f, \"[{}]\")?;", group.name); + fmtln!(fmt, "for d in &DESCRIPTORS {"); + fmt.indent(|fmt| { + fmtln!(fmt, "if !d.detail.is_preset() {"); + fmt.indent(|fmt| { + fmtln!(fmt, "write!(f, \"{} = \", d.name)?;"); + fmtln!( + fmt, + "TEMPLATE.format_toml_value(d.detail, self.bytes[d.offset as usize], f)?;", + ); + fmtln!(fmt, "writeln!(f)?;"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); + fmtln!(fmt, "Ok(())"); + }); + fmtln!(fmt, "}") + }); + fmtln!(fmt, "}"); +} + +fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) { + // Generate struct. + fmtln!(fmt, "#[derive(Clone)]"); + fmt.doc_comment(format!("Flags group `{}`.", group.name)); + fmtln!(fmt, "pub struct Flags {"); + fmt.indent(|fmt| { + fmtln!(fmt, "bytes: [u8; {}],", group.byte_size()); + }); + fmtln!(fmt, "}"); + + gen_constructor(group, parent, fmt); + gen_enum_types(group, fmt); + gen_getters(group, fmt); + gen_descriptors(group, fmt); + gen_template(group, fmt); + gen_display(group, fmt); +} + +pub(crate) fn generate( + settings: &SettingGroup, + parent_group: ParentGroup, + filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_group(&settings, parent_group, &mut fmt); + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/cranelift/codegen/meta/src/gen_types.rs b/cranelift/codegen/meta/src/gen_types.rs new file mode 100644 index 0000000000..6ced212b8d --- /dev/null +++ b/cranelift/codegen/meta/src/gen_types.rs @@ -0,0 +1,76 @@ +//! Generate sources with type info. +//! +//! This generates a `types.rs` file which is included in +//! `cranelift-codegen/ir/types.rs`. The file provides constant definitions for the +//! most commonly used types, including all of the scalar types. +//! +//! This ensures that the metaprogram and the generated program see the same +//! type numbering. + +use crate::cdsl::types as cdsl_types; +use crate::error; +use crate::srcgen; + +/// Emit a constant definition of a single value type. +fn emit_type(ty: &cdsl_types::ValueType, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { + let name = ty.to_string().to_uppercase(); + let number = ty.number().ok_or_else(|| { + error::Error::with_msg(format!( + "Could not emit type `{}` which has no number.", + name + )) + })?; + + fmt.doc_comment(&ty.doc()); + fmtln!(fmt, "pub const {}: Type = Type({:#x});\n", name, number); + + Ok(()) +} + +/// Emit definition for all vector types with `bits` total size. +fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { + let vec_size: u64 = bits / 8; + for vec in cdsl_types::ValueType::all_lane_types() + .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes())) + .filter(|&(_, lane_size)| lane_size != 0 && lane_size < vec_size) + .map(|(ty, lane_size)| (ty, vec_size / lane_size)) + .map(|(ty, lanes)| cdsl_types::VectorType::new(ty, lanes)) + { + emit_type(&cdsl_types::ValueType::from(vec), fmt)?; + } + + Ok(()) +} + +/// Emit types using the given formatter object. +fn emit_types(fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { + // Emit all of the special types, such as types for CPU flags. + for spec in cdsl_types::ValueType::all_special_types().map(cdsl_types::ValueType::from) { + emit_type(&spec, fmt)?; + } + + // Emit all of the lane types, such integers, floats, and booleans. + for ty in cdsl_types::ValueType::all_lane_types().map(cdsl_types::ValueType::from) { + emit_type(&ty, fmt)?; + } + + // Emit all reference types. + for ty in cdsl_types::ValueType::all_reference_types().map(cdsl_types::ValueType::from) { + emit_type(&ty, fmt)?; + } + + // Emit vector definitions for common SIMD sizes. + for vec_size in &[64_u64, 128, 256, 512] { + emit_vectors(*vec_size, fmt)?; + } + + Ok(()) +} + +/// Generate the types file. +pub(crate) fn generate(filename: &str, out_dir: &str) -> Result<(), error::Error> { + let mut fmt = srcgen::Formatter::new(); + emit_types(&mut fmt)?; + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/cranelift/codegen/meta/src/isa/arm32/mod.rs b/cranelift/codegen/meta/src/isa/arm32/mod.rs new file mode 100644 index 0000000000..5cb1761a7e --- /dev/null +++ b/cranelift/codegen/meta/src/isa/arm32/mod.rs @@ -0,0 +1,84 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let setting = SettingGroupBuilder::new("arm32"); + setting.build() +} + +fn define_regs() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + let builder = RegBankBuilder::new("FloatRegs", "s") + .units(64) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("IntRegs", "r") + .units(16) + .track_pressure(true); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FlagRegs", "") + .units(1) + .names(vec!["nzcv"]) + .track_pressure(false); + let flag_reg = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("S", float_regs).count(32); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("D", float_regs).width(2); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("Q", float_regs).width(4); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); + regs.add_class(builder); + + regs.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + let regs = define_regs(); + + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + + // CPU modes for 32-bit ARM and Thumb2. + let mut a32 = CpuMode::new("A32"); + let mut t32 = CpuMode::new("T32"); + + // TODO refine these. + let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags"); + a32.legalize_default(narrow_flags); + t32.legalize_default(narrow_flags); + + let cpu_modes = vec![a32, t32]; + + // TODO implement arm32 recipes. + let recipes = Recipes::new(); + + // TODO implement arm32 encodings and predicates. + let encodings_predicates = InstructionPredicateMap::new(); + + TargetIsa::new( + "arm32", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/cranelift/codegen/meta/src/isa/arm64/mod.rs b/cranelift/codegen/meta/src/isa/arm64/mod.rs new file mode 100644 index 0000000000..3440c8af82 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/arm64/mod.rs @@ -0,0 +1,77 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let setting = SettingGroupBuilder::new("arm64"); + setting.build() +} + +fn define_registers() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + // The `x31` regunit serves as the stack pointer / zero register depending on context. We + // reserve it and don't model the difference. + let builder = RegBankBuilder::new("IntRegs", "x") + .units(32) + .track_pressure(true); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FloatRegs", "v") + .units(32) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FlagRegs", "") + .units(1) + .names(vec!["nzcv"]) + .track_pressure(false); + let flag_reg = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FPR", float_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); + regs.add_class(builder); + + regs.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + let regs = define_registers(); + + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + + let mut a64 = CpuMode::new("A64"); + + // TODO refine these. + let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags"); + a64.legalize_default(narrow_flags); + + let cpu_modes = vec![a64]; + + // TODO implement arm64 recipes. + let recipes = Recipes::new(); + + // TODO implement arm64 encodings and predicates. + let encodings_predicates = InstructionPredicateMap::new(); + + TargetIsa::new( + "arm64", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs new file mode 100644 index 0000000000..39cd913300 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -0,0 +1,67 @@ +//! Define supported ISAs; includes ISA-specific instructions, encodings, registers, settings, etc. +use crate::cdsl::isa::TargetIsa; +use crate::shared::Definitions as SharedDefinitions; +use std::fmt; + +mod arm32; +mod arm64; +mod riscv; +mod x86; + +/// Represents known ISA target. +#[derive(Copy, Clone)] +pub enum Isa { + Riscv, + X86, + Arm32, + Arm64, +} + +impl Isa { + /// Creates isa target using name. + pub fn from_name(name: &str) -> Option { + Isa::all() + .iter() + .cloned() + .find(|isa| isa.to_string() == name) + } + + /// Creates isa target from arch. + pub fn from_arch(arch: &str) -> Option { + match arch { + "riscv" => Some(Isa::Riscv), + "aarch64" => Some(Isa::Arm64), + x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), + x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32), + _ => None, + } + } + + /// Returns all supported isa targets. + pub fn all() -> &'static [Isa] { + &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64] + } +} + +impl fmt::Display for Isa { + // These names should be kept in sync with the crate features. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Isa::Riscv => write!(f, "riscv"), + Isa::X86 => write!(f, "x86"), + Isa::Arm32 => write!(f, "arm32"), + Isa::Arm64 => write!(f, "arm64"), + } + } +} + +pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec { + isas.iter() + .map(|isa| match isa { + Isa::Riscv => riscv::define(shared_defs), + Isa::X86 => x86::define(shared_defs), + Isa::Arm32 => arm32::define(shared_defs), + Isa::Arm64 => arm64::define(shared_defs), + }) + .collect() +} diff --git a/cranelift/codegen/meta/src/isa/riscv/encodings.rs b/cranelift/codegen/meta/src/isa/riscv/encodings.rs new file mode 100644 index 0000000000..c255ddb483 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/riscv/encodings.rs @@ -0,0 +1,431 @@ +use crate::cdsl::ast::{Apply, Expr, Literal, VarPool}; +use crate::cdsl::encodings::{Encoding, EncodingBuilder}; +use crate::cdsl::instructions::{ + Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, +}; +use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::SettingGroup; + +use crate::shared::types::Bool::B1; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +use super::recipes::RecipeGroup; + +pub(crate) struct PerCpuModeEncodings<'defs> { + pub inst_pred_reg: InstructionPredicateRegistry, + pub enc32: Vec, + pub enc64: Vec, + recipes: &'defs Recipes, +} + +impl<'defs> PerCpuModeEncodings<'defs> { + fn new(recipes: &'defs Recipes) -> Self { + Self { + inst_pred_reg: InstructionPredicateRegistry::new(), + enc32: Vec::new(), + enc64: Vec::new(), + recipes, + } + } + fn enc( + &self, + inst: impl Into, + recipe: EncodingRecipeNumber, + bits: u16, + ) -> EncodingBuilder { + EncodingBuilder::new(inst.into(), recipe, bits) + } + fn add32(&mut self, encoding: EncodingBuilder) { + self.enc32 + .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); + } + fn add64(&mut self, encoding: EncodingBuilder) { + self.enc64 + .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); + } +} + +// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as +// the two low bits, with bits 6:2 determining the base opcode. +// +// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ... +// The functions below encode the encbits. + +fn load_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + funct3 << 5 +} + +fn store_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b01000 | (funct3 << 5) +} + +fn branch_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b11000 | (funct3 << 5) +} + +fn jalr_bits() -> u16 { + // This was previously accepting an argument funct3 of 3 bits and used the following formula: + //0b11001 | (funct3 << 5) + 0b11001 +} + +fn jal_bits() -> u16 { + 0b11011 +} + +fn opimm_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b00100 | (funct3 << 5) | (funct7 << 8) +} + +fn opimm32_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b00110 | (funct3 << 5) | (funct7 << 8) +} + +fn op_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + assert!(funct7 <= 0b111_1111); + 0b01100 | (funct3 << 5) | (funct7 << 8) +} + +fn op32_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + assert!(funct7 <= 0b111_1111); + 0b01110 | (funct3 << 5) | (funct7 << 8) +} + +fn lui_bits() -> u16 { + 0b01101 +} + +pub(crate) fn define<'defs>( + shared_defs: &'defs SharedDefinitions, + isa_settings: &SettingGroup, + recipes: &'defs RecipeGroup, +) -> PerCpuModeEncodings<'defs> { + // Instructions shorthands. + let shared = &shared_defs.instructions; + + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let br_icmp = shared.by_name("br_icmp"); + let brz = shared.by_name("brz"); + let brnz = shared.by_name("brnz"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let copy_to_ssa = shared.by_name("copy_to_ssa"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let iadd = shared.by_name("iadd"); + let iadd_imm = shared.by_name("iadd_imm"); + let iconst = shared.by_name("iconst"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let imul = shared.by_name("imul"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let isub = shared.by_name("isub"); + let jump = shared.by_name("jump"); + let regmove = shared.by_name("regmove"); + let spill = shared.by_name("spill"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let return_ = shared.by_name("return"); + + // Recipes shorthands, prefixed with r_. + let r_copytossa = recipes.by_name("copytossa"); + let r_fillnull = recipes.by_name("fillnull"); + let r_icall = recipes.by_name("Icall"); + let r_icopy = recipes.by_name("Icopy"); + let r_ii = recipes.by_name("Ii"); + let r_iicmp = recipes.by_name("Iicmp"); + let r_iret = recipes.by_name("Iret"); + let r_irmov = recipes.by_name("Irmov"); + let r_iz = recipes.by_name("Iz"); + let r_gp_sp = recipes.by_name("GPsp"); + let r_gp_fi = recipes.by_name("GPfi"); + let r_r = recipes.by_name("R"); + let r_ricmp = recipes.by_name("Ricmp"); + let r_rshamt = recipes.by_name("Rshamt"); + let r_sb = recipes.by_name("SB"); + let r_sb_zero = recipes.by_name("SBzero"); + let r_stacknull = recipes.by_name("stacknull"); + let r_u = recipes.by_name("U"); + let r_uj = recipes.by_name("UJ"); + let r_uj_call = recipes.by_name("UJcall"); + + // Predicates shorthands. + let use_m = isa_settings.predicate_by_name("use_m"); + + // Definitions. + let mut e = PerCpuModeEncodings::new(&recipes.recipes); + + // Basic arithmetic binary instructions are encoded in an R-type instruction. + for &(inst, inst_imm, f3, f7) in &[ + (iadd, Some(iadd_imm), 0b000, 0b000_0000), + (isub, None, 0b000, 0b010_0000), + (bxor, Some(bxor_imm), 0b100, 0b000_0000), + (bor, Some(bor_imm), 0b110, 0b000_0000), + (band, Some(band_imm), 0b111, 0b000_0000), + ] { + e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7))); + + // Immediate versions for add/xor/or/and. + if let Some(inst_imm) = inst_imm { + e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); + e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); + } + } + + // 32-bit ops in RV64. + e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000))); + e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000))); + // There are no andiw/oriw/xoriw variations. + e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); + + // Use iadd_imm with %x0 to materialize constants. + e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); + + // Dynamic shifts have the same masking semantics as the clif base instructions. + for &(inst, inst_imm, f3, f7) in &[ + (ishl, ishl_imm, 0b1, 0b0), + (ushr, ushr_imm, 0b101, 0b0), + (sshr, sshr_imm, 0b101, 0b10_0000), + ] { + e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); + // Allow i32 shift amounts in 64-bit shifts. + e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); + + // Immediate shifts. + e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); + } + + // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit + // numbers in RV64. + { + let mut var_pool = VarPool::new(); + + // Helper that creates an instruction predicate for an instruction in the icmp family. + let mut icmp_instp = |bound_inst: &BoundInstruction, + intcc_field: &'static str| + -> InstructionPredicateNode { + let x = var_pool.create("x"); + let y = var_pool.create("y"); + let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); + Apply::new( + bound_inst.clone().into(), + vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)], + ) + .inst_predicate(&var_pool) + .unwrap() + }; + + let icmp_i32 = icmp.bind(I32); + let icmp_i64 = icmp.bind(I64); + e.add32( + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i32, "slt")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i64, "slt")), + ); + + e.add32( + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i32, "ult")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i64, "ult")), + ); + + // Immediate variants. + let icmp_i32 = icmp_imm.bind(I32); + let icmp_i64 = icmp_imm.bind(I64); + e.add32( + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) + .inst_predicate(icmp_instp(&icmp_i32, "slt")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) + .inst_predicate(icmp_instp(&icmp_i64, "slt")), + ); + + e.add32( + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) + .inst_predicate(icmp_instp(&icmp_i32, "ult")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) + .inst_predicate(icmp_instp(&icmp_i64, "ult")), + ); + } + + // Integer constants with the low 12 bits clear are materialized by lui. + e.add32(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I64), r_u, lui_bits())); + + // "M" Standard Extension for Integer Multiplication and Division. + // Gated by the `use_m` flag. + e.add32( + e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + + // Control flow. + + // Unconditional branches. + e.add32(e.enc(jump, r_uj, jal_bits())); + e.add64(e.enc(jump, r_uj, jal_bits())); + e.add32(e.enc(call, r_uj_call, jal_bits())); + e.add64(e.enc(call, r_uj_call, jal_bits())); + + // Conditional branches. + { + let mut var_pool = VarPool::new(); + + // Helper that creates an instruction predicate for an instruction in the icmp family. + let mut br_icmp_instp = |bound_inst: &BoundInstruction, + intcc_field: &'static str| + -> InstructionPredicateNode { + let x = var_pool.create("x"); + let y = var_pool.create("y"); + let dest = var_pool.create("dest"); + let args = var_pool.create("args"); + let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); + Apply::new( + bound_inst.clone().into(), + vec![ + Expr::Literal(cc), + Expr::Var(x), + Expr::Var(y), + Expr::Var(dest), + Expr::Var(args), + ], + ) + .inst_predicate(&var_pool) + .unwrap() + }; + + let br_icmp_i32 = br_icmp.bind(I32); + let br_icmp_i64 = br_icmp.bind(I64); + for &(cond, f3) in &[ + ("eq", 0b000), + ("ne", 0b001), + ("slt", 0b100), + ("sge", 0b101), + ("ult", 0b110), + ("uge", 0b111), + ] { + e.add32( + e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) + .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)), + ); + e.add64( + e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) + .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)), + ); + } + } + + for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] { + e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); + e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + } + + // Returns are a special case of jalr_bits using %x1 to hold the return address. + // The return address is provided by a special-purpose `link` return value that + // is added by legalize_signature(). + e.add32(e.enc(return_, r_iret, jalr_bits())); + e.add64(e.enc(return_, r_iret, jalr_bits())); + e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits())); + e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits())); + + // Spill and fill. + e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); + e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); + + // No-op fills, created by late-stage redundant-fill removal. + for &ty in &[I64, I32] { + e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + } + e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + + // Register copies. + e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); + + e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); + + e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + + // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + // into a no-op. + // The same encoding is generated for both the 64- and 32-bit architectures. + for &ty in &[I64, I32, I16, I8] { + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + } + for &ty in &[F64, F32] { + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + } + + // Copy-to-SSA + e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0))); + + e +} diff --git a/cranelift/codegen/meta/src/isa/riscv/mod.rs b/cranelift/codegen/meta/src/isa/riscv/mod.rs new file mode 100644 index 0000000000..801e61a3d2 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/riscv/mod.rs @@ -0,0 +1,134 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::InstructionGroupBuilder; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; +use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; + +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I32, I64}; +use crate::shared::Definitions as SharedDefinitions; + +mod encodings; +mod recipes; + +fn define_settings(shared: &SettingGroup) -> SettingGroup { + let mut setting = SettingGroupBuilder::new("riscv"); + + let supports_m = setting.add_bool( + "supports_m", + "CPU supports the 'M' extension (mul/div)", + false, + ); + let supports_a = setting.add_bool( + "supports_a", + "CPU supports the 'A' extension (atomics)", + false, + ); + let supports_f = setting.add_bool( + "supports_f", + "CPU supports the 'F' extension (float)", + false, + ); + let supports_d = setting.add_bool( + "supports_d", + "CPU supports the 'D' extension (double)", + false, + ); + + let enable_m = setting.add_bool( + "enable_m", + "Enable the use of 'M' instructions if available", + true, + ); + + setting.add_bool( + "enable_e", + "Enable the 'RV32E' instruction set with only 16 registers", + false, + ); + + let shared_enable_atomics = shared.get_bool("enable_atomics"); + let shared_enable_float = shared.get_bool("enable_float"); + let shared_enable_simd = shared.get_bool("enable_simd"); + + setting.add_predicate("use_m", predicate!(supports_m && enable_m)); + setting.add_predicate("use_a", predicate!(supports_a && shared_enable_atomics)); + setting.add_predicate("use_f", predicate!(supports_f && shared_enable_float)); + setting.add_predicate("use_d", predicate!(supports_d && shared_enable_float)); + setting.add_predicate( + "full_float", + predicate!(shared_enable_simd && supports_f && supports_d), + ); + + setting.build() +} + +fn define_registers() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + let builder = RegBankBuilder::new("IntRegs", "x") + .units(32) + .track_pressure(true); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FloatRegs", "f") + .units(32) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FPR", float_regs); + regs.add_class(builder); + + regs.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + let regs = define_registers(); + + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + + // CPU modes for 32-bit and 64-bit operation. + let mut rv_32 = CpuMode::new("RV32"); + let mut rv_64 = CpuMode::new("RV64"); + + let expand = shared_defs.transform_groups.by_name("expand"); + let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags"); + + rv_32.legalize_monomorphic(expand); + rv_32.legalize_default(narrow_no_flags); + rv_32.legalize_type(I32, expand); + rv_32.legalize_type(F32, expand); + rv_32.legalize_type(F64, expand); + + rv_64.legalize_monomorphic(expand); + rv_64.legalize_default(narrow_no_flags); + rv_64.legalize_type(I32, expand); + rv_64.legalize_type(I64, expand); + rv_64.legalize_type(F32, expand); + rv_64.legalize_type(F64, expand); + + let recipes = recipes::define(shared_defs, ®s); + + let encodings = encodings::define(shared_defs, &settings, &recipes); + rv_32.set_encodings(encodings.enc32); + rv_64.set_encodings(encodings.enc64); + let encodings_predicates = encodings.inst_pred_reg.extract(); + + let recipes = recipes.collect(); + + let cpu_modes = vec![rv_32, rv_64]; + + TargetIsa::new( + "riscv", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/cranelift/codegen/meta/src/isa/riscv/recipes.rs b/cranelift/codegen/meta/src/isa/riscv/recipes.rs new file mode 100644 index 0000000000..a75e42c236 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/riscv/recipes.rs @@ -0,0 +1,278 @@ +use std::collections::HashMap; + +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack}; +use crate::cdsl::regs::IsaRegs; +use crate::shared::Definitions as SharedDefinitions; + +/// An helper to create recipes and use them when defining the RISCV encodings. +pub(crate) struct RecipeGroup { + /// The actualy list of recipes explicitly created in this file. + pub recipes: Recipes, + + /// Provides fast lookup from a name to an encoding recipe. + name_to_recipe: HashMap, +} + +impl RecipeGroup { + fn new() -> Self { + Self { + recipes: Recipes::new(), + name_to_recipe: HashMap::new(), + } + } + + fn push(&mut self, builder: EncodingRecipeBuilder) { + assert!( + self.name_to_recipe.get(&builder.name).is_none(), + format!("riscv recipe '{}' created twice", builder.name) + ); + let name = builder.name.clone(); + let number = self.recipes.push(builder.build()); + self.name_to_recipe.insert(name, number); + } + + pub fn by_name(&self, name: &str) -> EncodingRecipeNumber { + *self + .name_to_recipe + .get(name) + .unwrap_or_else(|| panic!("unknown riscv recipe name {}", name)) + } + + pub fn collect(self) -> Recipes { + self.recipes + } +} + +pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeGroup { + let formats = &shared_defs.formats; + + // Register classes shorthands. + let gpr = regs.class_by_name("GPR"); + + // Definitions. + let mut recipes = RecipeGroup::new(); + + // R-type 32-bit instructions: These are mostly binary arithmetic instructions. + // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8) + recipes.push( + EncodingRecipeBuilder::new("R", &formats.binary, 4) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), + ); + + // R-type with an immediate shift amount instead of rs2. + recipes.push( + EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"), + ); + + // R-type encoding of an integer comparison. + recipes.push( + EncodingRecipeBuilder::new("Ricmp", &formats.int_compare, 4) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), + ); + + recipes.push( + EncodingRecipeBuilder::new("Ii", &formats.binary_imm, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 12, + 0, + )) + .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), + ); + + // I-type instruction with a hardcoded %x0 rs1. + recipes.push( + EncodingRecipeBuilder::new("Iz", &formats.unary_imm, 4) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &formats.unary_imm, + "imm", + 12, + 0, + )) + .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"), + ); + + // I-type encoding of an integer comparison. + recipes.push( + EncodingRecipeBuilder::new("Iicmp", &formats.int_compare_imm, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &formats.int_compare_imm, + "imm", + 12, + 0, + )) + .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), + ); + + // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset. The + // variable return values are not encoded. + recipes.push( + EncodingRecipeBuilder::new("Iret", &formats.multiary, 4).emit( + r#" + // Return instructions are always a jalr to %x1. + // The return address is provided as a special-purpose link argument. + put_i( + bits, + 1, // rs1 = %x1 + 0, // no offset. + 0, // rd = %x0: no address written. + sink, + ); + "#, + ), + ); + + // I-type encoding for `jalr` as a call_indirect. + recipes.push( + EncodingRecipeBuilder::new("Icall", &formats.call_indirect, 4) + .operands_in(vec![gpr]) + .emit( + r#" + // call_indirect instructions are jalr with rd=%x1. + put_i( + bits, + in_reg0, + 0, // no offset. + 1, // rd = %x1: link register. + sink, + ); + "#, + ), + ); + + // Copy of a GPR is implemented as addi x, 0. + recipes.push( + EncodingRecipeBuilder::new("Icopy", &formats.unary, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"), + ); + + // Same for a GPR regmove. + recipes.push( + EncodingRecipeBuilder::new("Irmov", &formats.reg_move, 4) + .operands_in(vec![gpr]) + .emit("put_i(bits, src, 0, dst, sink);"), + ); + + // Same for copy-to-SSA -- GPR regmove. + recipes.push( + EncodingRecipeBuilder::new("copytossa", &formats.copy_to_ssa, 4) + // No operands_in to mention, because a source register is specified directly. + .operands_out(vec![gpr]) + .emit("put_i(bits, src, 0, out_reg0, sink);"), + ); + + // U-type instructions have a 20-bit immediate that targets bits 12-31. + recipes.push( + EncodingRecipeBuilder::new("U", &formats.unary_imm, 4) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &formats.unary_imm, + "imm", + 32, + 12, + )) + .emit("put_u(bits, imm.into(), out_reg0, sink);"), + ); + + // UJ-type unconditional branch instructions. + recipes.push( + EncodingRecipeBuilder::new("UJ", &formats.jump, 4) + .branch_range((0, 21)) + .emit( + r#" + let dest = i64::from(func.offsets[destination]); + let disp = dest - i64::from(sink.offset()); + put_uj(bits, disp, 0, sink); + "#, + ), + ); + + recipes.push(EncodingRecipeBuilder::new("UJcall", &formats.call, 4).emit( + r#" + sink.reloc_external(Reloc::RiscvCall, + &func.dfg.ext_funcs[func_ref].name, + 0); + // rd=%x1 is the standard link register. + put_uj(bits, 0, 1, sink); + "#, + )); + + // SB-type branch instructions. + recipes.push( + EncodingRecipeBuilder::new("SB", &formats.branch_icmp, 4) + .operands_in(vec![gpr, gpr]) + .branch_range((0, 13)) + .emit( + r#" + let dest = i64::from(func.offsets[destination]); + let disp = dest - i64::from(sink.offset()); + put_sb(bits, disp, in_reg0, in_reg1, sink); + "#, + ), + ); + + // SB-type branch instruction with rs2 fixed to zero. + recipes.push( + EncodingRecipeBuilder::new("SBzero", &formats.branch, 4) + .operands_in(vec![gpr]) + .branch_range((0, 13)) + .emit( + r#" + let dest = i64::from(func.offsets[destination]); + let disp = dest - i64::from(sink.offset()); + put_sb(bits, disp, in_reg0, 0, sink); + "#, + ), + ); + + // Spill of a GPR. + recipes.push( + EncodingRecipeBuilder::new("GPsp", &formats.unary, 4) + .operands_in(vec![gpr]) + .operands_out(vec![Stack::new(gpr)]) + .emit("unimplemented!();"), + ); + + // Fill of a GPR. + recipes.push( + EncodingRecipeBuilder::new("GPfi", &formats.unary, 4) + .operands_in(vec![Stack::new(gpr)]) + .operands_out(vec![gpr]) + .emit("unimplemented!();"), + ); + + // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op. + recipes.push( + EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) + .operands_in(vec![Stack::new(gpr)]) + .operands_out(vec![Stack::new(gpr)]) + .emit(""), + ); + + // No-op fills, created by late-stage redundant-fill removal. + recipes.push( + EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) + .operands_in(vec![Stack::new(gpr)]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit(""), + ); + + recipes +} diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs new file mode 100644 index 0000000000..5dd6edc380 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -0,0 +1,2420 @@ +#![allow(non_snake_case)] + +use cranelift_codegen_shared::condcodes::IntCC; +use std::collections::HashMap; + +use crate::cdsl::encodings::{Encoding, EncodingBuilder}; +use crate::cdsl::instructions::{ + vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate, + InstructionPredicateNode, InstructionPredicateRegistry, +}; +use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +use crate::isa::x86::opcodes::*; + +use super::recipes::{RecipeGroup, Template}; +use crate::cdsl::instructions::BindParameter::Any; + +pub(crate) struct PerCpuModeEncodings { + pub enc32: Vec, + pub enc64: Vec, + pub recipes: Recipes, + recipes_by_name: HashMap, + pub inst_pred_reg: InstructionPredicateRegistry, +} + +impl PerCpuModeEncodings { + fn new() -> Self { + Self { + enc32: Vec::new(), + enc64: Vec::new(), + recipes: Recipes::new(), + recipes_by_name: HashMap::new(), + inst_pred_reg: InstructionPredicateRegistry::new(), + } + } + + fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber { + if let Some(found_index) = self.recipes_by_name.get(&recipe.name) { + assert!( + self.recipes[*found_index] == recipe, + format!( + "trying to insert different recipes with a same name ({})", + recipe.name + ) + ); + *found_index + } else { + let recipe_name = recipe.name.clone(); + let index = self.recipes.push(recipe); + self.recipes_by_name.insert(recipe_name, index); + index + } + } + + fn make_encoding( + &mut self, + inst: InstSpec, + template: Template, + builder_closure: T, + ) -> Encoding + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let (recipe, bits) = template.build(); + let recipe_number = self.add_recipe(recipe); + let builder = EncodingBuilder::new(inst, recipe_number, bits); + builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) + } + + fn enc32_func(&mut self, inst: impl Into, template: Template, builder_closure: T) + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc32.push(encoding); + } + fn enc32(&mut self, inst: impl Into, template: Template) { + self.enc32_func(inst, template, |x| x); + } + fn enc32_isap( + &mut self, + inst: impl Into, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap)); + } + fn enc32_instp( + &mut self, + inst: impl Into, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp)); + } + fn enc32_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { + let recipe_number = self.add_recipe(recipe.clone()); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); + self.enc32.push(encoding); + } + + fn enc64_func(&mut self, inst: impl Into, template: Template, builder_closure: T) + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc64.push(encoding); + } + fn enc64(&mut self, inst: impl Into, template: Template) { + self.enc64_func(inst, template, |x| x); + } + fn enc64_isap( + &mut self, + inst: impl Into, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap)); + } + fn enc64_instp( + &mut self, + inst: impl Into, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp)); + } + fn enc64_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { + let recipe_number = self.add_recipe(recipe.clone()); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); + self.enc64.push(encoding); + } + + /// Adds I32/I64 encodings as appropriate for a typed instruction. + /// The REX prefix is always inferred at runtime. + /// + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + + // I32 on x86: no REX prefix. + self.enc32(inst.bind(I32), template.infer_rex()); + + // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(I32), template.infer_rex()); + + // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(I64), template.infer_rex().w()); + } + + /// Adds I32/I64 encodings as appropriate for a typed instruction. + /// All variants of REX prefix are explicitly emitted, not inferred. + /// + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with and without REX. + fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(I32), template.nonrex()); + + // REX-less encoding must come after REX encoding so we don't use it by default. + // Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.bind(I32), template.rex()); + self.enc64(inst.bind(I32), template.nonrex()); + self.enc64(inst.bind(I64), template.rex().w()); + } + + /// Adds B32/B64 encodings as appropriate for a typed instruction. + /// The REX prefix is always inferred at runtime. + /// + /// Adds encoding for `inst.b32` to X86_32. + /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX. + /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix. + fn enc_b32_b64(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + + // B32 on x86: no REX prefix. + self.enc32(inst.bind(B32), template.infer_rex()); + + // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(B32), template.infer_rex()); + + // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(B64), template.infer_rex().w()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with a REX prefix. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64_rex_only(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(I32), template.nonrex()); + self.enc64(inst.bind(I32), template.rex()); + self.enc64(inst.bind(I64), template.rex().w()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64_instp( + &mut self, + inst: &Instruction, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst.bind(I32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + + // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise + // reg-alloc would never use r8 and up. + self.enc64_func(inst.bind(I32), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(I32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(I64), template.rex().w(), |builder| { + builder.inst_predicate(instp) + }); + } + + /// Add encodings for `inst.r32` to X86_32. + /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. + fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(R32), template.nonrex()); + self.enc64(inst.bind(R64), template.rex().w()); + } + + fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { + self.enc32(inst.clone().bind(R32).bind(Any), template.clone()); + + // REX-less encoding must come after REX encoding so we don't use it by + // default. Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(R32).bind(Any), template.clone()); + + if w_bit { + self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w()); + } else { + self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(R64).bind(Any), template); + } + } + + /// Add encodings for `inst` to X86_64 with and without a REX prefix. + fn enc_x86_64(&mut self, inst: impl Into + Clone, template: Template) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64(inst.clone(), template.rex()); + self.enc64(inst, template); + } + + /// Add encodings for `inst` to X86_64 with and without a REX prefix. + fn enc_x86_64_instp( + &mut self, + inst: impl Clone + Into, + template: Template, + instp: InstructionPredicateNode, + ) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64_func(inst.clone(), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst, template, |builder| builder.inst_predicate(instp)); + } + fn enc_x86_64_isap( + &mut self, + inst: impl Clone + Into, + template: Template, + isap: SettingPredicateNumber, + ) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64_isap(inst.clone(), template.rex(), isap); + self.enc64_isap(inst, template, isap); + } + + /// Add all three encodings for `inst`: + /// - X86_32 + /// - X86_64 with and without the REX prefix. + fn enc_both(&mut self, inst: impl Clone + Into, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc_x86_64(inst, template); + } + fn enc_both_isap( + &mut self, + inst: impl Clone + Into, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc32_isap(inst.clone(), template.clone(), isap); + self.enc_x86_64_isap(inst, template, isap); + } + fn enc_both_instp( + &mut self, + inst: impl Clone + Into, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_instp(inst.clone(), template.clone(), instp.clone()); + self.enc_x86_64_instp(inst, template, instp); + } + + /// Add two encodings for `inst`: + /// - X86_32 + /// - X86_64 with the REX prefix. + fn enc_both_rex_only(&mut self, inst: impl Clone + Into, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc64(inst, template.rex()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` + /// argument to determine whether or not to set the REX.W bit. + fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { + self.enc32(inst.clone().bind(I32).bind(Any), template.clone()); + + // REX-less encoding must come after REX encoding so we don't use it by + // default. Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(I32).bind(Any), template.clone()); + + if w_bit { + self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w()); + } else { + self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(I64).bind(Any), template); + } + } + + /// Add the same encoding/template pairing to both X86_32 and X86_64 + fn enc_32_64(&mut self, inst: impl Clone + Into, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc64(inst, template); + } + + /// Add the same encoding/recipe pairing to both X86_32 and X86_64 + fn enc_32_64_rec( + &mut self, + inst: impl Clone + Into, + recipe: &EncodingRecipe, + bits: u16, + ) { + self.enc32_rec(inst.clone(), recipe, bits); + self.enc64_rec(inst, recipe, bits); + } + + /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened + fn enc_32_64_func( + &mut self, + inst: impl Clone + Into, + template: Template, + builder_closure: T, + ) where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc32.push(encoding.clone()); + self.enc64.push(encoding); + } + + /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand + /// binding) has already happened. + fn enc_32_64_maybe_isap( + &mut self, + inst: impl Clone + Into, + template: Template, + isap: Option, + ) { + self.enc32_maybe_isap(inst.clone(), template.clone(), isap); + self.enc64_maybe_isap(inst, template, isap); + } + + fn enc32_maybe_isap( + &mut self, + inst: impl Into, + template: Template, + isap: Option, + ) { + match isap { + None => self.enc32(inst, template), + Some(isap) => self.enc32_isap(inst, template, isap), + } + } + + fn enc64_maybe_isap( + &mut self, + inst: impl Into, + template: Template, + isap: Option, + ) { + match isap { + None => self.enc64(inst, template), + Some(isap) => self.enc64_isap(inst, template, isap), + } + } +} + +// Definitions. + +#[inline(never)] +fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let bconst = shared.by_name("bconst"); + let bint = shared.by_name("bint"); + let copy = shared.by_name("copy"); + let copy_special = shared.by_name("copy_special"); + let copy_to_ssa = shared.by_name("copy_to_ssa"); + let get_pinned_reg = shared.by_name("get_pinned_reg"); + let iconst = shared.by_name("iconst"); + let ireduce = shared.by_name("ireduce"); + let regmove = shared.by_name("regmove"); + let sextend = shared.by_name("sextend"); + let set_pinned_reg = shared.by_name("set_pinned_reg"); + let uextend = shared.by_name("uextend"); + + // Shorthands for recipes. + let rec_copysp = r.template("copysp"); + let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); + let rec_get_pinned_reg = r.recipe("get_pinned_reg"); + let rec_null = r.recipe("null"); + let rec_pu_id = r.template("pu_id"); + let rec_pu_id_bool = r.template("pu_id_bool"); + let rec_pu_iq = r.template("pu_iq"); + let rec_rmov = r.template("rmov"); + let rec_set_pinned_reg = r.template("set_pinned_reg"); + let rec_u_id = r.template("u_id"); + let rec_u_id_z = r.template("u_id_z"); + let rec_umr = r.template("umr"); + let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); + let rec_urm_noflags = r.template("urm_noflags"); + let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); + + // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! + e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); + e.enc_x86_64( + set_pinned_reg.bind(I64), + rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), + ); + + e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE)); + + // TODO For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + for &ty in &[I8, I16, I32] { + e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); + } + for &ty in &[B8, B16, B32] { + e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); + } + e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); + e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); + e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); + e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); + e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); + + // Immediate constants. + e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); + + e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM)); + e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); + + // The 32-bit immediate movl also zero-extends to 64 bits. + let is_unsigned_int32 = + InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0); + + e.enc64_func( + iconst.bind(I64), + rec_pu_id.opcodes(&MOV_IMM).rex(), + |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), + ); + e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| { + encoding.inst_predicate(is_unsigned_int32) + }); + + // Sign-extended 32-bit immediate. + e.enc64( + iconst.bind(I64), + rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(), + ); + + // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix. + e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w()); + + // Bool constants (uses MOV) + for &ty in &[B1, B8, B16, B32] { + e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM)); + } + e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex()); + + let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm"); + e.enc_both_instp( + iconst.bind(I8), + rec_u_id_z.opcodes(&XORB), + is_zero_int.clone(), + ); + + // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that + // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not + // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these + // scenarios, so we explicitly select a wider but permissible opcode. + // + // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't + // an appropriate i16 encoding available. + e.enc_both_instp( + iconst.bind(I16), + rec_u_id_z.opcodes(&XOR), + is_zero_int.clone(), + ); + e.enc_both_instp( + iconst.bind(I32), + rec_u_id_z.opcodes(&XOR), + is_zero_int.clone(), + ); + e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); + + // Numerical conversions. + + // Reducing an integer is a no-op. + e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + + e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); + + // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending + // instructions for %al/%ax/%eax to %ax/%eax/%rax. + + // movsbl + e.enc32( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), + ); + + // movswl + e.enc32( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD), + ); + + // movsbq + e.enc64( + sextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), + ); + + // movswq + e.enc64( + sextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), + ); + + // movslq + e.enc64( + sextend.bind(I64).bind(I32), + rec_urm_noflags.opcodes(&MOVSXD).rex().w(), + ); + + // movzbl + e.enc32( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + + // movzwl + e.enc32( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + + // movzbq, encoded as movzbl because it's equivalent and shorter. + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + + // movzwq, encoded as movzwl because it's equivalent and shorter + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + + // A 32-bit register copy clears the high 32 bits. + e.enc64( + uextend.bind(I64).bind(I32), + rec_umr.opcodes(&MOV_STORE).rex(), + ); + e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); + + // Convert bool to int. + // + // This assumes that b1 is represented as an 8-bit low register with the value 0 + // or 1. + // + // Encode movzbq as movzbl, because it's equivalent and shorter. + for &to in &[I8, I16, I32, I64] { + for &from in &[B1, B8] { + e.enc64( + bint.bind(to).bind(from), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + bint.bind(to).bind(from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + if to != I64 { + e.enc32( + bint.bind(to).bind(from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + } + } + } + + // Copy Special + // For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); + e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); + + // Copy to SSA. These have to be done with special _rex_only encoders, because the standard + // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account + // the source register, which is specified directly in the instruction. + e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only( + copy_to_ssa.bind(I16), + rec_umr_reg_to_ssa.opcodes(&MOV_STORE), + ); + e.enc_both_rex_only( + copy_to_ssa.bind(F64), + rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), + ); + e.enc_both_rex_only( + copy_to_ssa.bind(F32), + rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), + ); +} + +#[inline(never)] +fn define_memory( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let adjust_sp_down = shared.by_name("adjust_sp_down"); + let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); + let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); + let copy_nop = shared.by_name("copy_nop"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let istore16 = shared.by_name("istore16"); + let istore16_complex = shared.by_name("istore16_complex"); + let istore32 = shared.by_name("istore32"); + let istore32_complex = shared.by_name("istore32_complex"); + let istore8 = shared.by_name("istore8"); + let istore8_complex = shared.by_name("istore8_complex"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let regfill = shared.by_name("regfill"); + let regspill = shared.by_name("regspill"); + let sload16 = shared.by_name("sload16"); + let sload16_complex = shared.by_name("sload16_complex"); + let sload32 = shared.by_name("sload32"); + let sload32_complex = shared.by_name("sload32_complex"); + let sload8 = shared.by_name("sload8"); + let sload8_complex = shared.by_name("sload8_complex"); + let spill = shared.by_name("spill"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + let uload16 = shared.by_name("uload16"); + let uload16_complex = shared.by_name("uload16_complex"); + let uload32 = shared.by_name("uload32"); + let uload32_complex = shared.by_name("uload32_complex"); + let uload8 = shared.by_name("uload8"); + let uload8_complex = shared.by_name("uload8_complex"); + let x86_pop = x86.by_name("x86_pop"); + let x86_push = x86.by_name("x86_push"); + + // Shorthands for recipes. + let rec_adjustsp = r.template("adjustsp"); + let rec_adjustsp_ib = r.template("adjustsp_ib"); + let rec_adjustsp_id = r.template("adjustsp_id"); + let rec_ffillnull = r.recipe("ffillnull"); + let rec_fillnull = r.recipe("fillnull"); + let rec_fillSib32 = r.template("fillSib32"); + let rec_ld = r.template("ld"); + let rec_ldDisp32 = r.template("ldDisp32"); + let rec_ldDisp8 = r.template("ldDisp8"); + let rec_ldWithIndex = r.template("ldWithIndex"); + let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); + let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); + let rec_popq = r.template("popq"); + let rec_pushq = r.template("pushq"); + let rec_regfill32 = r.template("regfill32"); + let rec_regspill32 = r.template("regspill32"); + let rec_spillSib32 = r.template("spillSib32"); + let rec_st = r.template("st"); + let rec_stacknull = r.recipe("stacknull"); + let rec_stDisp32 = r.template("stDisp32"); + let rec_stDisp32_abcd = r.template("stDisp32_abcd"); + let rec_stDisp8 = r.template("stDisp8"); + let rec_stDisp8_abcd = r.template("stDisp8_abcd"); + let rec_stWithIndex = r.template("stWithIndex"); + let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); + let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); + let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); + let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); + let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); + let rec_st_abcd = r.template("st_abcd"); + + // Loads and stores. + let is_load_complex_length_two = + InstructionPredicate::new_length_equals(&*formats.load_complex, 2); + + for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { + e.enc_i32_i64_instp( + load_complex, + recipe.opcodes(&MOV_LOAD), + is_load_complex_length_two.clone(), + ); + e.enc_x86_64_instp( + uload32_complex, + recipe.opcodes(&MOV_LOAD), + is_load_complex_length_two.clone(), + ); + + e.enc64_instp( + sload32_complex, + recipe.opcodes(&MOVSXD).rex().w(), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + uload16_complex, + recipe.opcodes(&MOVZX_WORD), + is_load_complex_length_two.clone(), + ); + e.enc_i32_i64_instp( + sload16_complex, + recipe.opcodes(&MOVSX_WORD), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + uload8_complex, + recipe.opcodes(&MOVZX_BYTE), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + sload8_complex, + recipe.opcodes(&MOVSX_BYTE), + is_load_complex_length_two.clone(), + ); + } + + let is_store_complex_length_three = + InstructionPredicate::new_length_equals(&*formats.store_complex, 3); + + for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { + e.enc_i32_i64_instp( + store_complex, + recipe.opcodes(&MOV_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore32_complex, + recipe.opcodes(&MOV_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_both_instp( + istore16_complex.bind(I32), + recipe.opcodes(&MOV_STORE_16), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore16_complex.bind(I64), + recipe.opcodes(&MOV_STORE_16), + is_store_complex_length_three.clone(), + ); + } + + for recipe in &[ + rec_stWithIndex_abcd, + rec_stWithIndexDisp8_abcd, + rec_stWithIndexDisp32_abcd, + ] { + e.enc_both_instp( + istore8_complex.bind(I32), + recipe.opcodes(&MOV_BYTE_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore8_complex.bind(I64), + recipe.opcodes(&MOV_BYTE_STORE), + is_store_complex_length_three.clone(), + ); + } + + for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { + e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); + e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); + e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE)); + e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); + } + + // Byte stores are more complicated because the registers they can address + // depends of the presence of a REX prefix. The st*_abcd recipes fall back to + // the corresponding st* recipes when a REX prefix is applied. + + for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { + e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); + e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); + } + + e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); + + // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid + // constraining the permitted registers. + // See MIN_SPILL_SLOT_SIZE which makes this safe. + + e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE)); + for &ty in &[I8, I16] { + e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE)); + } + + for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { + e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); + e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); + e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD)); + e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w()); + e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD)); + e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD)); + e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE)); + e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); + } + + e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD)); + e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); + + // No-op fills, created by late-stage redundant-fill removal. + for &ty in &[I64, I32, I16, I8] { + e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); + e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); + } + e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0); + e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0); + for &ty in &[F64, F32] { + e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0); + e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0); + } + + // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. + + e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD)); + for &ty in &[I8, I16] { + e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD)); + } + + // Push and Pop. + e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG)); + e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG)); + + e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); + e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); + + // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + // into a no-op. + // The same encoding is generated for both the 64- and 32-bit architectures. + for &ty in &[I64, I32, I16, I8] { + e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); + e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); + } + for &ty in &[F64, F32] { + e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); + e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); + } + + // Adjust SP down by a dynamic value (or up, with a negative operand). + e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB)); + e.enc64( + adjust_sp_down.bind(I64), + rec_adjustsp.opcodes(&SUB).rex().w(), + ); + + // Adjust SP up by an immediate (or down, with a negative immediate). + e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8)); + e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM)); + e.enc64( + adjust_sp_up_imm, + rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(), + ); + e.enc64( + adjust_sp_up_imm, + rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(), + ); + + // Adjust SP down by an immediate (or up, with a negative immediate). + e.enc32( + adjust_sp_down_imm, + rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5), + ); + e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5)); + e.enc64( + adjust_sp_down_imm, + rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(), + ); + e.enc64( + adjust_sp_down_imm, + rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), + ); +} + +#[inline(never)] +fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let bitcast = shared.by_name("bitcast"); + let copy = shared.by_name("copy"); + let regmove = shared.by_name("regmove"); + + // Shorthands for recipes. + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_furm = r.template("furm"); + let rec_rfumr = r.template("rfumr"); + + // Floating-point moves. + // movd + e.enc_both( + bitcast.bind(F32).bind(I32), + rec_frurm.opcodes(&MOVD_LOAD_XMM), + ); + e.enc_both( + bitcast.bind(I32).bind(F32), + rec_rfumr.opcodes(&MOVD_STORE_XMM), + ); + + // movq + e.enc64( + bitcast.bind(F64).bind(I64), + rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), + ); + e.enc64( + bitcast.bind(I64).bind(F64), + rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), + ); + + // movaps + e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); + e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); +} + +#[inline(never)] +fn define_fpu_memory( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let fill = shared.by_name("fill"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let regfill = shared.by_name("regfill"); + let regspill = shared.by_name("regspill"); + let spill = shared.by_name("spill"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + + // Shorthands for recipes. + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fldWithIndex = r.template("fldWithIndex"); + let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); + let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_fstWithIndex = r.template("fstWithIndex"); + let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); + let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); + + // Float loads and stores. + e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD)); + + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndex.opcodes(&MOVSS_LOAD), + ); + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD), + ); + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), + ); + + e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD)); + + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndex.opcodes(&MOVSD_LOAD), + ); + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD), + ); + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), + ); + + e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE)); + e.enc_both( + store.bind(F32).bind(Any), + rec_fstDisp8.opcodes(&MOVSS_STORE), + ); + e.enc_both( + store.bind(F32).bind(Any), + rec_fstDisp32.opcodes(&MOVSS_STORE), + ); + + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndex.opcodes(&MOVSS_STORE), + ); + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE), + ); + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), + ); + + e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE)); + e.enc_both( + store.bind(F64).bind(Any), + rec_fstDisp8.opcodes(&MOVSD_STORE), + ); + e.enc_both( + store.bind(F64).bind(Any), + rec_fstDisp32.opcodes(&MOVSD_STORE), + ); + + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndex.opcodes(&MOVSD_STORE), + ); + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE), + ); + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE), + ); + + e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD)); + e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD)); + e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD)); + e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD)); + + e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE)); + e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); + e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); + e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); +} + +#[inline(never)] +fn define_fpu_ops( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let ceil = shared.by_name("ceil"); + let f32const = shared.by_name("f32const"); + let f64const = shared.by_name("f64const"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fcvt_from_sint = shared.by_name("fcvt_from_sint"); + let fdemote = shared.by_name("fdemote"); + let fdiv = shared.by_name("fdiv"); + let ffcmp = shared.by_name("ffcmp"); + let floor = shared.by_name("floor"); + let fmul = shared.by_name("fmul"); + let fpromote = shared.by_name("fpromote"); + let fsub = shared.by_name("fsub"); + let nearest = shared.by_name("nearest"); + let sqrt = shared.by_name("sqrt"); + let trunc = shared.by_name("trunc"); + let x86_cvtt2si = x86.by_name("x86_cvtt2si"); + let x86_fmax = x86.by_name("x86_fmax"); + let x86_fmin = x86.by_name("x86_fmin"); + + // Shorthands for recipes. + let rec_f32imm_z = r.template("f32imm_z"); + let rec_f64imm_z = r.template("f64imm_z"); + let rec_fa = r.template("fa"); + let rec_fcmp = r.template("fcmp"); + let rec_fcscc = r.template("fcscc"); + let rec_frurm = r.template("frurm"); + let rec_furm = r.template("furm"); + let rec_furmi_rnd = r.template("furmi_rnd"); + let rec_rfurm = r.template("rfurm"); + + // Predicates shorthands. + let use_sse41 = settings.predicate_by_name("use_sse41"); + + // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for + // 32-bit and 64-bit floats respectively. + let is_zero_32_bit_float = + InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm"); + e.enc32_instp( + f32const, + rec_f32imm_z.opcodes(&XORPS), + is_zero_32_bit_float.clone(), + ); + + let is_zero_64_bit_float = + InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm"); + e.enc32_instp( + f64const, + rec_f64imm_z.opcodes(&XORPD), + is_zero_64_bit_float.clone(), + ); + + e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); + e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); + + // cvtsi2ss + e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); + + // cvtsi2sd + e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD)); + + // cvtss2sd + e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD)); + + // cvtsd2ss + e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS)); + + // cvttss2si + e.enc_both( + x86_cvtt2si.bind(I32).bind(F32), + rec_rfurm.opcodes(&CVTTSS2SI), + ); + e.enc64( + x86_cvtt2si.bind(I64).bind(F32), + rec_rfurm.opcodes(&CVTTSS2SI).rex().w(), + ); + + // cvttsd2si + e.enc_both( + x86_cvtt2si.bind(I32).bind(F64), + rec_rfurm.opcodes(&CVTTSD2SI), + ); + e.enc64( + x86_cvtt2si.bind(I64).bind(F64), + rec_rfurm.opcodes(&CVTTSD2SI).rex().w(), + ); + + // Exact square roots. + e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS)); + e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD)); + + // Rounding. The recipe looks at the opcode to pick an immediate. + for inst in &[nearest, floor, ceil, trunc] { + e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41); + e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41); + } + + // Binary arithmetic ops. + e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS)); + e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD)); + + e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS)); + e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD)); + + e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS)); + e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD)); + + e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS)); + e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD)); + + e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS)); + e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD)); + + e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); + e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); + + // Comparisons. + // + // This only covers the condition codes in `supported_floatccs`, the rest are + // handled by legalization patterns. + e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); + e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); + e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); + e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); +} + +#[inline(never)] +fn define_alu( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let clz = shared.by_name("clz"); + let ctz = shared.by_name("ctz"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let ifcmp = shared.by_name("ifcmp"); + let ifcmp_imm = shared.by_name("ifcmp_imm"); + let ifcmp_sp = shared.by_name("ifcmp_sp"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let popcnt = shared.by_name("popcnt"); + let rotl = shared.by_name("rotl"); + let rotl_imm = shared.by_name("rotl_imm"); + let rotr = shared.by_name("rotr"); + let rotr_imm = shared.by_name("rotr_imm"); + let selectif = shared.by_name("selectif"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let trueff = shared.by_name("trueff"); + let trueif = shared.by_name("trueif"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let x86_bsf = x86.by_name("x86_bsf"); + let x86_bsr = x86.by_name("x86_bsr"); + + // Shorthands for recipes. + let rec_bsf_and_bsr = r.template("bsf_and_bsr"); + let rec_cmov = r.template("cmov"); + let rec_icscc = r.template("icscc"); + let rec_icscc_ib = r.template("icscc_ib"); + let rec_icscc_id = r.template("icscc_id"); + let rec_rcmp = r.template("rcmp"); + let rec_rcmp_ib = r.template("rcmp_ib"); + let rec_rcmp_id = r.template("rcmp_id"); + let rec_rcmp_sp = r.template("rcmp_sp"); + let rec_rc = r.template("rc"); + let rec_setf_abcd = r.template("setf_abcd"); + let rec_seti_abcd = r.template("seti_abcd"); + let rec_urm = r.template("urm"); + + // Predicates shorthands. + let use_popcnt = settings.predicate_by_name("use_popcnt"); + let use_lzcnt = settings.predicate_by_name("use_lzcnt"); + let use_bmi1 = settings.predicate_by_name("use_bmi1"); + + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let band_not = shared.by_name("band_not"); + let bnot = shared.by_name("bnot"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let iadd = shared.by_name("iadd"); + let iadd_ifcarry = shared.by_name("iadd_ifcarry"); + let iadd_ifcin = shared.by_name("iadd_ifcin"); + let iadd_ifcout = shared.by_name("iadd_ifcout"); + let iadd_imm = shared.by_name("iadd_imm"); + let imul = shared.by_name("imul"); + let isub = shared.by_name("isub"); + let isub_ifbin = shared.by_name("isub_ifbin"); + let isub_ifborrow = shared.by_name("isub_ifborrow"); + let isub_ifbout = shared.by_name("isub_ifbout"); + let x86_sdivmodx = x86.by_name("x86_sdivmodx"); + let x86_smulx = x86.by_name("x86_smulx"); + let x86_udivmodx = x86.by_name("x86_udivmodx"); + let x86_umulx = x86.by_name("x86_umulx"); + + let rec_div = r.template("div"); + let rec_fa = r.template("fa"); + let rec_fax = r.template("fax"); + let rec_mulx = r.template("mulx"); + let rec_r_ib = r.template("r_ib"); + let rec_r_id = r.template("r_id"); + let rec_rin = r.template("rin"); + let rec_rio = r.template("rio"); + let rec_rout = r.template("rout"); + let rec_rr = r.template("rr"); + let rec_rrx = r.template("rrx"); + let rec_ur = r.template("ur"); + + e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); + e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); + e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); + e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); + + e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); + e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); + + e.enc_i32_i64(band, rec_rr.opcodes(&AND)); + e.enc_b32_b64(band, rec_rr.opcodes(&AND)); + + // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can + // even use the single-byte immediate for 0xffff_ffXX masks. + + e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); + e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); + + e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); + e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); + e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); + e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); + + e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); + e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); + e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); + e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); + + // x86 has a bitwise not instruction NOT. + e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + + // Also add a `b1` encodings for the logic instructions. + // TODO: Should this be done with 8-bit instructions? It would improve partial register + // dependencies. + e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); + e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); + e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); + + e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); + e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); + e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); + + e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); + e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); + + // Binary bitwise ops. + // + // The F64 version is intentionally encoded using the single-precision opcode: + // the operation is identical and the encoding is one byte shorter. + e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS)); + e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS)); + + e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS)); + e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS)); + + e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS)); + e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS)); + + // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. + e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); + e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); + + // Shifts and rotates. + // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit + // and 16-bit shifts would need explicit masking. + + for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { + // Cannot use enc_i32_i64 for this pattern because instructions require + // to bind any. + e.enc32( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + e.enc64( + inst.bind(I64).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), + ); + e.enc64( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), + ); + e.enc64( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + } + + e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); + e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); + e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); + e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); + e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); + + // Population count. + e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + e.enc64_isap( + popcnt.bind(I64), + rec_urm.opcodes(&POPCNT).rex().w(), + use_popcnt, + ); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + + // Count leading zero bits. + e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + + // Count trailing zero bits. + e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + + // Bit scan forwards and reverse + e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); + e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); + + // Comparisons + e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); + e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); + e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); + // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). + + e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); + e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); + + // Convert flags to bool. + // This encodes `b1` as an 8-bit low register with the value 0 or 1. + e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + + // Conditional move (a.k.a integer select). + e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); +} + +#[inline(never)] +#[allow(clippy::cognitive_complexity)] +fn define_simd( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let avg_round = shared.by_name("avg_round"); + let bitcast = shared.by_name("bitcast"); + let bor = shared.by_name("bor"); + let bxor = shared.by_name("bxor"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fdiv = shared.by_name("fdiv"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let fmax = shared.by_name("fmax"); + let fmin = shared.by_name("fmin"); + let fmul = shared.by_name("fmul"); + let fsub = shared.by_name("fsub"); + let iadd = shared.by_name("iadd"); + let icmp = shared.by_name("icmp"); + let imul = shared.by_name("imul"); + let ishl_imm = shared.by_name("ishl_imm"); + let load = shared.by_name("load"); + let raw_bitcast = shared.by_name("raw_bitcast"); + let regfill = shared.by_name("regfill"); + let regmove = shared.by_name("regmove"); + let regspill = shared.by_name("regspill"); + let sadd_sat = shared.by_name("sadd_sat"); + let scalar_to_vector = shared.by_name("scalar_to_vector"); + let spill = shared.by_name("spill"); + let sqrt = shared.by_name("sqrt"); + let sshr_imm = shared.by_name("sshr_imm"); + let ssub_sat = shared.by_name("ssub_sat"); + let store = shared.by_name("store"); + let uadd_sat = shared.by_name("uadd_sat"); + let ushr_imm = shared.by_name("ushr_imm"); + let usub_sat = shared.by_name("usub_sat"); + let vconst = shared.by_name("vconst"); + let x86_insertps = x86.by_name("x86_insertps"); + let x86_movlhps = x86.by_name("x86_movlhps"); + let x86_movsd = x86.by_name("x86_movsd"); + let x86_pextr = x86.by_name("x86_pextr"); + let x86_pinsr = x86.by_name("x86_pinsr"); + let x86_pmaxs = x86.by_name("x86_pmaxs"); + let x86_pmaxu = x86.by_name("x86_pmaxu"); + let x86_pmins = x86.by_name("x86_pmins"); + let x86_pminu = x86.by_name("x86_pminu"); + let x86_pshufb = x86.by_name("x86_pshufb"); + let x86_pshufd = x86.by_name("x86_pshufd"); + let x86_psll = x86.by_name("x86_psll"); + let x86_psra = x86.by_name("x86_psra"); + let x86_psrl = x86.by_name("x86_psrl"); + let x86_ptest = x86.by_name("x86_ptest"); + + // Shorthands for recipes. + let rec_f_ib = r.template("f_ib"); + let rec_fa = r.template("fa"); + let rec_fa_ib = r.template("fa_ib"); + let rec_fax = r.template("fax"); + let rec_fcmp = r.template("fcmp"); + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_ffillnull = r.recipe("ffillnull"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_furm = r.template("furm"); + let rec_icscc_fpr = r.template("icscc_fpr"); + let rec_null_fpr = r.recipe("null_fpr"); + let rec_pfcmp = r.template("pfcmp"); + let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); + let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); + let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); + let rec_stacknull = r.recipe("stacknull"); + let rec_vconst = r.template("vconst"); + let rec_vconst_optimized = r.template("vconst_optimized"); + + // Predicates shorthands. + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); + let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); + let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); + + // SIMD vector size: eventually multiple vector sizes may be supported but for now only + // SSE-sized vectors are available. + let sse_vector_size: u64 = 128; + + // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see + // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the + // value across the register. + + let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; + + // PSHUFB, 8-bit shuffle using two XMM registers. + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = x86_pshufb.bind(vector(ty, sse_vector_size)); + let template = rec_fa.nonrex().opcodes(&PSHUFB); + e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd); + e.enc64_isap(instruction, template, use_ssse3_simd); + } + + // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { + let instruction = x86_pshufd.bind(vector(ty, sse_vector_size)); + let template = rec_r_ib_unsigned_fpr.nonrex().opcodes(&PSHUFD); + e.enc32(instruction.clone(), template.clone()); + e.enc64(instruction, template); + } + + // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according + // to the Intel manual: "When the destination operand is an XMM register, the source operand is + // written to the low doubleword of the register and the register is zero-extended to 128 bits." + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size)); + if ty.is_float() { + e.enc_32_64_rec(instruction, rec_null_fpr, 0); + } else { + let template = rec_frurm.opcodes(&MOVD_LOAD_XMM); + if ty.lane_bits() < 64 { + e.enc32(instruction.clone(), template.clone()); + e.enc_x86_64(instruction, template); + } else { + // No 32-bit encodings for 64-bit widths. + assert_eq!(ty.lane_bits(), 64); + e.enc64(instruction, template.rex().w()); + } + } + } + + // SIMD insertlane + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let (opcode, isap): (&[_], _) = match ty.lane_bits() { + 8 => (&PINSRB, Some(use_sse41_simd)), + 16 => (&PINSRW, None), + 32 | 64 => (&PINSR, Some(use_sse41_simd)), + _ => panic!("invalid size for SIMD insertlane"), + }; + + let instruction = x86_pinsr.bind(vector(ty, sse_vector_size)); + let template = rec_r_ib_unsigned_r.opcodes(opcode); + if ty.lane_bits() < 64 { + e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap); + } else { + // It turns out the 64-bit widths have REX/W encodings and only are available on + // x86_64. + e.enc64_maybe_isap(instruction, template.rex().w(), isap); + } + } + + // For legalizing insertlane with floats, INSERTPS from SSE4.1. + { + let instruction = x86_insertps.bind(vector(F32, sse_vector_size)); + let template = rec_fa_ib.nonrex().opcodes(&INSERTPS); + e.enc_32_64_maybe_isap(instruction, template, Some(use_sse41_simd)); + } + + // For legalizing insertlane with floats, MOVSD from SSE2. + { + let instruction = x86_movsd.bind(vector(F64, sse_vector_size)); + let template = rec_fa.nonrex().opcodes(&MOVSD_LOAD); + e.enc_32_64_maybe_isap(instruction, template, None); // from SSE2 + } + + // For legalizing insertlane with floats, MOVLHPS from SSE. + { + let instruction = x86_movlhps.bind(vector(F64, sse_vector_size)); + let template = rec_fa.nonrex().opcodes(&MOVLHPS); + e.enc_32_64_maybe_isap(instruction, template, None); // from SSE + } + + // SIMD extractlane + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let opcode = match ty.lane_bits() { + 8 => &PEXTRB, + 16 => &PEXTRW, + 32 | 64 => &PEXTR, + _ => panic!("invalid size for SIMD extractlane"), + }; + + let instruction = x86_pextr.bind(vector(ty, sse_vector_size)); + let template = rec_r_ib_unsigned_gpr.opcodes(opcode); + if ty.lane_bits() < 64 { + e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd)); + } else { + // It turns out the 64-bit widths have REX/W encodings and only are available on + // x86_64. + e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd)); + } + } + + // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). + for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { + for to_type in + ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) + { + let instruction = raw_bitcast + .bind(vector(to_type, sse_vector_size)) + .bind(vector(from_type, sse_vector_size)); + e.enc_32_64_rec(instruction, rec_null_fpr, 0); + } + } + + // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an + // XMM register. + for float_type in &[F32, F64] { + for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { + e.enc_32_64_rec( + raw_bitcast + .bind(vector(lane_type, sse_vector_size)) + .bind(*float_type), + rec_null_fpr, + 0, + ); + e.enc_32_64_rec( + raw_bitcast + .bind(*float_type) + .bind(vector(lane_type, sse_vector_size)), + rec_null_fpr, + 0, + ); + } + } + + // SIMD vconst for special cases (all zeroes, all ones) + // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this + // encoding first + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = vconst.bind(vector(ty, sse_vector_size)); + + let is_zero_128bit = + InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle"); + let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex(); + e.enc_32_64_func(instruction.clone(), template, |builder| { + builder.inst_predicate(is_zero_128bit) + }); + + let is_ones_128bit = + InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle"); + let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex(); + e.enc_32_64_func(instruction, template, |builder| { + builder.inst_predicate(is_ones_128bit) + }); + } + + // SIMD vconst using MOVUPS + // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have + // to guarantee that the constants are aligned when emitted and there is currently no mechanism + // for that; alternately, constants could be loaded into XMM registers using a sequence like: + // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored + // in memory) but some performance measurements are needed. + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = vconst.bind(vector(ty, sse_vector_size)); + let template = rec_vconst.opcodes(&MOVUPS_LOAD).infer_rex(); + e.enc_32_64_maybe_isap(instruction, template, None); // from SSE + } + + // SIMD register movement: store, load, spill, fill, regmove. All of these use encodings of + // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have + // alignment or type-specific encodings, see https://github.com/bytecodealliance/cranelift/issues/1039). + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + // Store + let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); + e.enc_32_64( + bound_store.clone(), + rec_fst.opcodes(&MOVUPS_STORE).infer_rex(), + ); + e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); + e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); + + // Load + let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); + e.enc_32_64( + bound_load.clone(), + rec_fld.opcodes(&MOVUPS_LOAD).infer_rex(), + ); + e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); + e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); + + // Spill + let bound_spill = spill.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); + let bound_regspill = regspill.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); + + // Fill + let bound_fill = fill.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); + let bound_regfill = regfill.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); + let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size)); + e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); + + // Regmove + let bound_regmove = regmove.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); + + // Copy + let bound_copy = copy.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); + let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size)); + e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); + } + + // SIMD integer addition + for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { + let iadd = iadd.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(iadd, rec_fa.opcodes(*opcodes)); + } + + // SIMD integer saturating addition + e.enc_32_64( + sadd_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PADDSB), + ); + e.enc_32_64( + sadd_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PADDSW), + ); + e.enc_32_64( + uadd_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PADDUSB), + ); + e.enc_32_64( + uadd_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PADDUSW), + ); + + // SIMD integer subtraction + let isub = shared.by_name("isub"); + for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { + let isub = isub.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(isub, rec_fa.opcodes(*opcodes)); + } + + // SIMD integer saturating subtraction + e.enc_32_64( + ssub_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PSUBSB), + ); + e.enc_32_64( + ssub_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PSUBSW), + ); + e.enc_32_64( + usub_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PSUBUSB), + ); + e.enc_32_64( + usub_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PSUBUSW), + ); + + // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 + // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD. + for (ty, opcodes, isap) in &[ + (I16, &PMULLW[..], None), + (I32, &PMULLD[..], Some(use_sse41_simd)), + ] { + let imul = imul.bind(vector(*ty, sse_vector_size)); + e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); + } + + // SIMD integer average with rounding. + for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] { + let avgr = avg_round.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(avgr, rec_fa.opcodes(opcodes)); + } + + // SIMD logical operations + let band = shared.by_name("band"); + let band_not = shared.by_name("band_not"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + // and + let band = band.bind(vector(ty, sse_vector_size)); + e.enc_32_64(band, rec_fa.opcodes(&PAND)); + + // and not (note flipped recipe operands to match band_not order) + let band_not = band_not.bind(vector(ty, sse_vector_size)); + e.enc_32_64(band_not, rec_fax.opcodes(&PANDN)); + + // or + let bor = bor.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bor, rec_fa.opcodes(&POR)); + + // xor + let bxor = bxor.bind(vector(ty, sse_vector_size)); + e.enc_32_64(bxor, rec_fa.opcodes(&PXOR)); + + // ptest + let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size)); + e.enc_32_64_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd)); + } + + // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement + // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an + // I128x1 but restrictions on the type builder prevent this; the general idea here is that + // the upper bits are all zeroed and do not form parts of any separate lane. See + // https://github.com/bytecodealliance/cranelift/issues/1146. + e.enc_both( + bitcast.bind(vector(I64, sse_vector_size)).bind(I32), + rec_frurm.opcodes(&MOVD_LOAD_XMM), + ); + e.enc64( + bitcast.bind(vector(I64, sse_vector_size)).bind(I64), + rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), + ); + + // SIMD shift left + for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] { + let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(x86_psll, rec_fa.opcodes(*opcodes)); + } + + // SIMD shift right (logical) + for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] { + let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(x86_psrl, rec_fa.opcodes(*opcodes)); + } + + // SIMD shift right (arithmetic) + for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] { + let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes)); + } + + // SIMD immediate shift + for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] { + let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6)); + + let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2)); + + let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size)); + e.enc_32_64(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4)); + } + + // SIMD integer comparisons + { + use IntCC::*; + for (ty, cc, opcodes, isa_predicate) in &[ + (I8, Equal, &PCMPEQB[..], None), + (I16, Equal, &PCMPEQW[..], None), + (I32, Equal, &PCMPEQD[..], None), + (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)), + (I8, SignedGreaterThan, &PCMPGTB[..], None), + (I16, SignedGreaterThan, &PCMPGTW[..], None), + (I32, SignedGreaterThan, &PCMPGTD[..], None), + (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)), + ] { + let instruction = icmp + .bind(Immediate::IntCC(*cc)) + .bind(vector(*ty, sse_vector_size)); + let template = rec_icscc_fpr.nonrex().opcodes(opcodes); + e.enc_32_64_maybe_isap(instruction, template, *isa_predicate); + } + } + + // SIMD min/max + for (ty, inst, opcodes, isa_predicate) in &[ + (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)), + (I16, x86_pmaxs, &PMAXSW[..], None), + (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)), + (I8, x86_pmaxu, &PMAXUB[..], None), + (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)), + (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)), + (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)), + (I16, x86_pmins, &PMINSW[..], None), + (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)), + (I8, x86_pminu, &PMINUB[..], None), + (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)), + (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)), + ] { + let inst = inst.bind(vector(*ty, sse_vector_size)); + e.enc_32_64_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate); + } + + // SIMD float comparisons + e.enc_both( + fcmp.bind(vector(F32, sse_vector_size)), + rec_pfcmp.opcodes(&CMPPS), + ); + e.enc_both( + fcmp.bind(vector(F64, sse_vector_size)), + rec_pfcmp.opcodes(&CMPPD), + ); + + // SIMD float arithmetic + for (ty, inst, opcodes) in &[ + (F32, fadd, &ADDPS[..]), + (F64, fadd, &ADDPD[..]), + (F32, fsub, &SUBPS[..]), + (F64, fsub, &SUBPD[..]), + (F32, fmul, &MULPS[..]), + (F64, fmul, &MULPD[..]), + (F32, fdiv, &DIVPS[..]), + (F64, fdiv, &DIVPD[..]), + (F32, fmin, &MINPS[..]), + (F64, fmin, &MINPD[..]), + (F32, fmax, &MAXPS[..]), + (F64, fmax, &MAXPD[..]), + ] { + let inst = inst.bind(vector(*ty, sse_vector_size)); + e.enc_both(inst, rec_fa.opcodes(opcodes)); + } + for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] { + let inst = inst.bind(vector(*ty, sse_vector_size)); + e.enc_both(inst, rec_furm.opcodes(opcodes)); + } +} + +#[inline(never)] +fn define_entity_ref( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let func_addr = shared.by_name("func_addr"); + let stack_addr = shared.by_name("stack_addr"); + let symbol_value = shared.by_name("symbol_value"); + + // Shorthands for recipes. + let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); + let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); + let rec_fnaddr4 = r.template("fnaddr4"); + let rec_fnaddr8 = r.template("fnaddr8"); + let rec_got_fnaddr8 = r.template("got_fnaddr8"); + let rec_got_gvaddr8 = r.template("got_gvaddr8"); + let rec_gvaddr4 = r.template("gvaddr4"); + let rec_gvaddr8 = r.template("gvaddr8"); + let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); + let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); + let rec_spaddr4_id = r.template("spaddr4_id"); + let rec_spaddr8_id = r.template("spaddr8_id"); + + // Predicates shorthands. + let all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + let is_pic = settings.predicate_by_name("is_pic"); + let not_all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let not_is_pic = settings.predicate_by_name("not_is_pic"); + + // Function addresses. + + // Non-PIC, all-ones funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_fnaddr4.opcodes(&MOV_IMM), + not_all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), + not_all_ones_funcaddrs_and_not_is_pic, + ); + + // Non-PIC, all-zeros funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_allones_fnaddr4.opcodes(&MOV_IMM), + all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), + all_ones_funcaddrs_and_not_is_pic, + ); + + // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. + let is_colocated_func = + InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); + e.enc64_instp( + func_addr.bind(I64), + rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), + is_colocated_func, + ); + + // 64-bit, non-colocated, PIC. + e.enc64_isap( + func_addr.bind(I64), + rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), + is_pic, + ); + + // Global addresses. + + // Non-PIC. + e.enc32_isap( + symbol_value.bind(I32), + rec_gvaddr4.opcodes(&MOV_IMM), + not_is_pic, + ); + e.enc64_isap( + symbol_value.bind(I64), + rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), + not_is_pic, + ); + + // PIC, colocated. + e.enc64_func( + symbol_value.bind(I64), + rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), + |encoding| { + encoding + .isa_predicate(is_pic) + .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) + }, + ); + + // PIC, non-colocated. + e.enc64_isap( + symbol_value.bind(I64), + rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), + is_pic, + ); + + // Stack addresses. + // + // TODO: Add encoding rules for stack_load and stack_store, so that they + // don't get legalized to stack_addr + load/store. + e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA)); + e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w()); +} + +/// Control flow opcodes. +#[inline(never)] +fn define_control_flow( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let brff = shared.by_name("brff"); + let brif = shared.by_name("brif"); + let brnz = shared.by_name("brnz"); + let brz = shared.by_name("brz"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let debugtrap = shared.by_name("debugtrap"); + let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); + let jump = shared.by_name("jump"); + let jump_table_base = shared.by_name("jump_table_base"); + let jump_table_entry = shared.by_name("jump_table_entry"); + let return_ = shared.by_name("return"); + let trap = shared.by_name("trap"); + let trapff = shared.by_name("trapff"); + let trapif = shared.by_name("trapif"); + let resumable_trap = shared.by_name("resumable_trap"); + + // Shorthands for recipes. + let rec_brfb = r.template("brfb"); + let rec_brfd = r.template("brfd"); + let rec_brib = r.template("brib"); + let rec_brid = r.template("brid"); + let rec_call_id = r.template("call_id"); + let rec_call_plt_id = r.template("call_plt_id"); + let rec_call_r = r.template("call_r"); + let rec_debugtrap = r.recipe("debugtrap"); + let rec_indirect_jmp = r.template("indirect_jmp"); + let rec_jmpb = r.template("jmpb"); + let rec_jmpd = r.template("jmpd"); + let rec_jt_base = r.template("jt_base"); + let rec_jt_entry = r.template("jt_entry"); + let rec_ret = r.template("ret"); + let rec_t8jccb_abcd = r.template("t8jccb_abcd"); + let rec_t8jccd_abcd = r.template("t8jccd_abcd"); + let rec_t8jccd_long = r.template("t8jccd_long"); + let rec_tjccb = r.template("tjccb"); + let rec_tjccd = r.template("tjccd"); + let rec_trap = r.template("trap"); + let rec_trapif = r.recipe("trapif"); + let rec_trapff = r.recipe("trapff"); + + // Predicates shorthands. + let is_pic = settings.predicate_by_name("is_pic"); + + // Call/return + + // 32-bit, both PIC and non-PIC. + e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); + + // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. + let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); + e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); + + // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC + // is currently using the large model, which requires calls be lowered to + // func_addr+call_indirect. + e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); + + e.enc32( + call_indirect.bind(I32), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), + ); + + e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); + e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); + + // Branches. + e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + + e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); + + // Not all float condition codes are legal, see `supported_floatccs`. + e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); + + // Note that the tjccd opcode will be prefixed with 0x0f. + e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); + e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); + e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); + + // Branch on a b1 value in a register only looks at the low 8 bits. See also + // bint encodings below. + // + // Start with the worst-case encoding for X86_32 only. The register allocator + // can't handle a branch with an ABCD-constrained operand. + e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); + e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); + + e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); + e.enc_both( + brnz.bind(B1), + rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), + ); + e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); + + // Jump tables. + e.enc64( + jump_table_entry.bind(I64), + rec_jt_entry.opcodes(&MOVSXD).rex().w(), + ); + e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); + + e.enc64( + jump_table_base.bind(I64), + rec_jt_base.opcodes(&LEA).rex().w(), + ); + e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); + + e.enc_x86_64( + indirect_jump_table_br.bind(I64), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), + ); + e.enc32( + indirect_jump_table_br.bind(I32), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), + ); + + // Trap as ud2 + e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + + // Debug trap as int3 + e.enc32_rec(debugtrap, rec_debugtrap, 0); + e.enc64_rec(debugtrap, rec_debugtrap, 0); + + e.enc32_rec(trapif, rec_trapif, 0); + e.enc64_rec(trapif, rec_trapif, 0); + e.enc32_rec(trapff, rec_trapff, 0); + e.enc64_rec(trapff, rec_trapff, 0); +} + +/// Reference type instructions. +#[inline(never)] +fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + + let is_null = shared.by_name("is_null"); + let is_invalid = shared.by_name("is_invalid"); + let null = shared.by_name("null"); + let safepoint = shared.by_name("safepoint"); + + let rec_is_zero = r.template("is_zero"); + let rec_is_invalid = r.template("is_invalid"); + let rec_pu_id_ref = r.template("pu_id_ref"); + let rec_safepoint = r.recipe("safepoint"); + + // Null references implemented as iconst 0. + e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); + + e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); + e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); + + // is_null, implemented by testing whether the value is 0. + e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); + + // is_invalid, implemented by testing whether the value is -1. + e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7)); + + // safepoint instruction calls sink, no actual encoding. + e.enc32_rec(safepoint, rec_safepoint, 0); + e.enc64_rec(safepoint, rec_safepoint, 0); +} + +#[allow(clippy::cognitive_complexity)] +pub(crate) fn define( + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) -> PerCpuModeEncodings { + // Definitions. + let mut e = PerCpuModeEncodings::new(); + + define_moves(&mut e, shared_defs, r); + define_memory(&mut e, shared_defs, x86, r); + define_fpu_moves(&mut e, shared_defs, r); + define_fpu_memory(&mut e, shared_defs, r); + define_fpu_ops(&mut e, shared_defs, settings, x86, r); + define_alu(&mut e, shared_defs, settings, x86, r); + define_simd(&mut e, shared_defs, settings, x86, r); + define_entity_ref(&mut e, shared_defs, settings, r); + define_control_flow(&mut e, shared_defs, settings, r); + define_reftypes(&mut e, shared_defs, r); + + let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr"); + let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr"); + + let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr"); + let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr"); + + e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0); + e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0); + + e +} diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs new file mode 100644 index 0000000000..3be23cea9d --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -0,0 +1,582 @@ +#![allow(non_snake_case)] + +use crate::cdsl::instructions::{ + AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, +}; +use crate::cdsl::operands::Operand; +use crate::cdsl::types::ValueType; +use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; + +use crate::shared::entities::EntityRefs; +use crate::shared::formats::Formats; +use crate::shared::immediates::Immediates; +use crate::shared::types; + +#[allow(clippy::many_single_char_names)] +pub(crate) fn define( + mut all_instructions: &mut AllInstructions, + formats: &Formats, + immediates: &Immediates, + entities: &EntityRefs, +) -> InstructionGroup { + let mut ig = InstructionGroupBuilder::new(&mut all_instructions); + + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + + let iWord = &TypeVar::new( + "iWord", + "A scalar integer machine word", + TypeSetBuilder::new().ints(32..64).build(), + ); + let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator"); + let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator"); + let d = &Operand::new("d", iWord).with_doc("Denominator"); + let q = &Operand::new("q", iWord).with_doc("Quotient"); + let r = &Operand::new("r", iWord).with_doc("Remainder"); + + ig.push( + Inst::new( + "x86_udivmodx", + r#" + Extended unsigned division. + + Concatenate the bits in `nhi` and `nlo` to form the numerator. + Interpret the bits as an unsigned number and divide by the unsigned + denominator `d`. Trap when `d` is zero or if the quotient is larger + than the range of the output. + + Return both quotient and remainder. + "#, + &formats.ternary, + ) + .operands_in(vec![nlo, nhi, d]) + .operands_out(vec![q, r]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "x86_sdivmodx", + r#" + Extended signed division. + + Concatenate the bits in `nhi` and `nlo` to form the numerator. + Interpret the bits as a signed number and divide by the signed + denominator `d`. Trap when `d` is zero or if the quotient is outside + the range of the output. + + Return both quotient and remainder. + "#, + &formats.ternary, + ) + .operands_in(vec![nlo, nhi, d]) + .operands_out(vec![q, r]) + .can_trap(true), + ); + + let argL = &Operand::new("argL", iWord); + let argR = &Operand::new("argR", iWord); + let resLo = &Operand::new("resLo", iWord); + let resHi = &Operand::new("resHi", iWord); + + ig.push( + Inst::new( + "x86_umulx", + r#" + Unsigned integer multiplication, producing a double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![argL, argR]) + .operands_out(vec![resLo, resHi]), + ); + + ig.push( + Inst::new( + "x86_smulx", + r#" + Signed integer multiplication, producing a double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![argL, argR]) + .operands_out(vec![resLo, resHi]), + ); + + let Float = &TypeVar::new( + "Float", + "A scalar or vector floating point number", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let IntTo = &TypeVar::new( + "IntTo", + "An integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(32..64) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Float); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "x86_cvtt2si", + r#" + Convert with truncation floating point to signed integer. + + The source floating point operand is converted to a signed integer by + rounding towards zero. If the result can't be represented in the output + type, returns the smallest signed value the output type can represent. + + This instruction does not trap. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Float); + let a = &Operand::new("a", Float); + let y = &Operand::new("y", Float); + + ig.push( + Inst::new( + "x86_fmin", + r#" + Floating point minimum with x86 semantics. + + This is equivalent to the C ternary operator `x < y ? x : y` which + differs from `fmin` when either operand is NaN or when comparing + +0.0 to -0.0. + + When the two operands don't compare as LT, `y` is returned unchanged, + even if it is a signalling NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_fmax", + r#" + Floating point maximum with x86 semantics. + + This is equivalent to the C ternary operator `x > y ? x : y` which + differs from `fmax` when either operand is NaN or when comparing + +0.0 to -0.0. + + When the two operands don't compare as GT, `y` is returned unchanged, + even if it is a signalling NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", iWord); + + ig.push( + Inst::new( + "x86_push", + r#" + Pushes a value onto the stack. + + Decrements the stack pointer and stores the specified value on to the top. + + This is polymorphic in i32 and i64. However, it is only implemented for i64 + in 64-bit mode, and only for i32 in 32-bit mode. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .other_side_effects(true) + .can_store(true), + ); + + ig.push( + Inst::new( + "x86_pop", + r#" + Pops a value from the stack. + + Loads a value from the top of the stack and then increments the stack + pointer. + + This is polymorphic in i32 and i64. However, it is only implemented for i64 + in 64-bit mode, and only for i32 in 32-bit mode. + "#, + &formats.nullary, + ) + .operands_out(vec![x]) + .other_side_effects(true) + .can_load(true), + ); + + let y = &Operand::new("y", iWord); + let rflags = &Operand::new("rflags", iflags); + + ig.push( + Inst::new( + "x86_bsr", + r#" + Bit Scan Reverse -- returns the bit-index of the most significant 1 + in the word. Result is undefined if the argument is zero. However, it + sets the Z flag depending on the argument, so it is at least easy to + detect and handle that case. + + This is polymorphic in i32 and i64. It is implemented for both i64 and + i32 in 64-bit mode, and only for i32 in 32-bit mode. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![y, rflags]), + ); + + ig.push( + Inst::new( + "x86_bsf", + r#" + Bit Scan Forwards -- returns the bit-index of the least significant 1 + in the word. Is otherwise identical to 'bsr', just above. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![y, rflags]), + ); + + let uimm8 = &immediates.uimm8; + let TxN = &TypeVar::new( + "TxN", + "A SIMD vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)"); + let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)"); + let i = &Operand::new("i", uimm8,).with_doc( "An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details"); + + ig.push( + Inst::new( + "x86_pshufd", + r#" + Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended + register and re-orders the data according to the passed immediate byte. + "#, + &formats.extract_lane, + ) + .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pshufb", + r#" + Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle + mask from either memory or another extended register + "#, + &formats.binary, + ) + .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN) + .operands_out(vec![a]), + ); + + let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index"); + let x = &Operand::new("x", TxN); + let a = &Operand::new("a", &TxN.lane_of()); + + ig.push( + Inst::new( + "x86_pextr", + r#" + Extract lane ``Idx`` from ``x``. + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. + "#, + &formats.extract_lane, + ) + .operands_in(vec![x, Idx]) + .operands_out(vec![a]), + ); + + let IBxN = &TypeVar::new( + "IBxN", + "A SIMD vector type containing only booleans and integers", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let x = &Operand::new("x", IBxN); + let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value"); + let a = &Operand::new("a", IBxN); + + ig.push( + Inst::new( + "x86_pinsr", + r#" + Insert ``y`` into ``x`` at lane ``Idx``. + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. + "#, + &formats.insert_lane, + ) + .operands_in(vec![x, Idx, y]) + .operands_out(vec![a]), + ); + + let FxN = &TypeVar::new( + "FxN", + "A SIMD vector type containing floats", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let x = &Operand::new("x", FxN); + let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value"); + let a = &Operand::new("a", FxN); + + ig.push( + Inst::new( + "x86_insertps", + r#" + Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is + extracted from and which it is inserted to. This is similar to x86_pinsr but inserts + floats, which are already stored in an XMM register. + "#, + &formats.insert_lane, + ) + .operands_in(vec![x, Idx, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", FxN); + let y = &Operand::new("y", FxN); + let a = &Operand::new("a", FxN); + + ig.push( + Inst::new( + "x86_movsd", + r#" + Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x`` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_movlhps", + r#" + Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x`` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let IxN = &TypeVar::new( + "IxN", + "A SIMD vector type containing integers", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let I64x2 = &TypeVar::new( + "I64x2", + "A SIMD vector type containing one large integer (the upper lane is concatenated with \ + the lower lane to form the integer)", + TypeSetBuilder::new() + .ints(64..64) + .simd_lanes(2..2) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", IxN).with_doc("Vector value to shift"); + let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift"); + let a = &Operand::new("a", IxN); + + ig.push( + Inst::new( + "x86_psll", + r#" + Shift Packed Data Left Logical -- This implements the behavior of the shared instruction + ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL* + family of instructions. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_psrl", + r#" + Shift Packed Data Right Logical -- This implements the behavior of the shared instruction + ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL* + family of instructions. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_psra", + r#" + Shift Packed Data Right Arithmetic -- This implements the behavior of the shared + instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by + the PSRA* family of instructions. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN); + let y = &Operand::new("y", TxN); + let f = &Operand::new("f", iflags); + ig.push( + Inst::new( + "x86_ptest", + r#" + Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the + bitwise AND of the first source operand (first operand) and the second source operand + (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise + AND of the second source operand (second operand) and the logical NOT of the destination + operand (first operand). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + + let x = &Operand::new("x", IxN); + let y = &Operand::new("y", IxN); + let a = &Operand::new("a", IxN); + ig.push( + Inst::new( + "x86_pmaxs", + r#" + Maximum of Packed Signed Integers -- Compare signed integers in the first and second + operand and return the maximum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmaxu", + r#" + Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second + operand and return the maximum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmins", + r#" + Minimum of Packed Signed Integers -- Compare signed integers in the first and second + operand and return the minimum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pminu", + r#" + Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second + operand and return the minimum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let i64_t = &TypeVar::new( + "i64_t", + "A scalar 64bit integer", + TypeSetBuilder::new().ints(64..64).build(), + ); + + let GV = &Operand::new("GV", &entities.global_value); + let addr = &Operand::new("addr", i64_t); + + ig.push( + Inst::new( + "x86_elf_tls_get_addr", + r#" + Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should + not be used. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![addr]), + ); + ig.push( + Inst::new( + "x86_macho_tls_get_addr", + r#" + Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should + not be used. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![addr]), + ); + + ig.build() +} diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs new file mode 100644 index 0000000000..de748a0bae --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -0,0 +1,650 @@ +use crate::cdsl::ast::{constant, var, ExprBuilder, Literal}; +use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::cdsl::xform::TransformGroupBuilder; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::Definitions as SharedDefinitions; + +#[allow(clippy::many_single_char_names)] +pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) { + let mut group = TransformGroupBuilder::new( + "x86_expand", + r#" + Legalize instructions by expansion. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("expand_flags").id); + + // List of instructions. + let insts = &shared.instructions; + let band = insts.by_name("band"); + let band_not = insts.by_name("band_not"); + let bitcast = insts.by_name("bitcast"); + let bitselect = insts.by_name("bitselect"); + let bor = insts.by_name("bor"); + let bnot = insts.by_name("bnot"); + let bxor = insts.by_name("bxor"); + let clz = insts.by_name("clz"); + let ctz = insts.by_name("ctz"); + let extractlane = insts.by_name("extractlane"); + let fcmp = insts.by_name("fcmp"); + let fcvt_from_uint = insts.by_name("fcvt_from_uint"); + let fcvt_to_sint = insts.by_name("fcvt_to_sint"); + let fcvt_to_uint = insts.by_name("fcvt_to_uint"); + let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); + let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); + let fabs = insts.by_name("fabs"); + let fmax = insts.by_name("fmax"); + let fmin = insts.by_name("fmin"); + let fneg = insts.by_name("fneg"); + let iadd = insts.by_name("iadd"); + let icmp = insts.by_name("icmp"); + let iconst = insts.by_name("iconst"); + let imax = insts.by_name("imax"); + let imin = insts.by_name("imin"); + let imul = insts.by_name("imul"); + let ineg = insts.by_name("ineg"); + let insertlane = insts.by_name("insertlane"); + let ishl = insts.by_name("ishl"); + let ishl_imm = insts.by_name("ishl_imm"); + let isub = insts.by_name("isub"); + let popcnt = insts.by_name("popcnt"); + let raw_bitcast = insts.by_name("raw_bitcast"); + let scalar_to_vector = insts.by_name("scalar_to_vector"); + let sdiv = insts.by_name("sdiv"); + let selectif = insts.by_name("selectif"); + let smulhi = insts.by_name("smulhi"); + let splat = insts.by_name("splat"); + let shuffle = insts.by_name("shuffle"); + let srem = insts.by_name("srem"); + let sshr = insts.by_name("sshr"); + let tls_value = insts.by_name("tls_value"); + let trueif = insts.by_name("trueif"); + let udiv = insts.by_name("udiv"); + let umax = insts.by_name("umax"); + let umin = insts.by_name("umin"); + let umulhi = insts.by_name("umulhi"); + let ushr_imm = insts.by_name("ushr_imm"); + let urem = insts.by_name("urem"); + let ushr = insts.by_name("ushr"); + let vconst = insts.by_name("vconst"); + let vall_true = insts.by_name("vall_true"); + let vany_true = insts.by_name("vany_true"); + + let x86_bsf = x86_instructions.by_name("x86_bsf"); + let x86_bsr = x86_instructions.by_name("x86_bsr"); + let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); + let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); + let x86_pmins = x86_instructions.by_name("x86_pmins"); + let x86_pminu = x86_instructions.by_name("x86_pminu"); + let x86_pshufb = x86_instructions.by_name("x86_pshufb"); + let x86_pshufd = x86_instructions.by_name("x86_pshufd"); + let x86_psll = x86_instructions.by_name("x86_psll"); + let x86_psra = x86_instructions.by_name("x86_psra"); + let x86_psrl = x86_instructions.by_name("x86_psrl"); + let x86_ptest = x86_instructions.by_name("x86_ptest"); + let x86_umulx = x86_instructions.by_name("x86_umulx"); + let x86_smulx = x86_instructions.by_name("x86_smulx"); + + let imm = &shared.imm; + + // Division and remainder. + // + // The srem expansion requires custom code because srem INT_MIN, -1 is not + // allowed to trap. The other ops need to check avoid_div_traps. + group.custom_legalize(sdiv, "expand_sdivrem"); + group.custom_legalize(srem, "expand_sdivrem"); + group.custom_legalize(udiv, "expand_udivrem"); + group.custom_legalize(urem, "expand_udivrem"); + + // Double length (widening) multiplication. + let a = var("a"); + let x = var("x"); + let y = var("y"); + let a1 = var("a1"); + let a2 = var("a2"); + let res_lo = var("res_lo"); + let res_hi = var("res_hi"); + + group.legalize( + def!(res_hi = umulhi(x, y)), + vec![def!((res_lo, res_hi) = x86_umulx(x, y))], + ); + + group.legalize( + def!(res_hi = smulhi(x, y)), + vec![def!((res_lo, res_hi) = x86_smulx(x, y))], + ); + + // Floating point condition codes. + // + // The 8 condition codes in `supported_floatccs` are directly supported by a + // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization + // patterns. + + let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq"); + let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord"); + let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq"); + let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne"); + let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno"); + let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one"); + + // Equality needs an explicit `ord` test which checks the parity bit. + group.legalize( + def!(a = fcmp(floatcc_eq, x, y)), + vec![ + def!(a1 = fcmp(floatcc_ord, x, y)), + def!(a2 = fcmp(floatcc_ueq, x, y)), + def!(a = band(a1, a2)), + ], + ); + group.legalize( + def!(a = fcmp(floatcc_ne, x, y)), + vec![ + def!(a1 = fcmp(floatcc_uno, x, y)), + def!(a2 = fcmp(floatcc_one, x, y)), + def!(a = bor(a1, a2)), + ], + ); + + let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt"); + let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt"); + let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le"); + let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge"); + let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt"); + let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult"); + let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge"); + let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule"); + + // Inequalities that need to be reversed. + for &(cc, rev_cc) in &[ + (floatcc_lt, floatcc_gt), + (floatcc_le, floatcc_ge), + (floatcc_ugt, floatcc_ult), + (floatcc_uge, floatcc_ule), + ] { + group.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]); + } + + // We need to modify the CFG for min/max legalization. + group.custom_legalize(fmin, "expand_minmax"); + group.custom_legalize(fmax, "expand_minmax"); + + // Conversions from unsigned need special handling. + group.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint"); + // Conversions from float to int can trap and modify the control flow graph. + group.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint"); + group.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint"); + group.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat"); + group.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat"); + + // Count leading and trailing zeroes, for baseline x86_64 + let c_minus_one = var("c_minus_one"); + let c_thirty_one = var("c_thirty_one"); + let c_thirty_two = var("c_thirty_two"); + let c_sixty_three = var("c_sixty_three"); + let c_sixty_four = var("c_sixty_four"); + let index1 = var("index1"); + let r2flags = var("r2flags"); + let index2 = var("index2"); + + let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); + let imm64_minus_one = Literal::constant(&imm.imm64, -1); + let imm64_63 = Literal::constant(&imm.imm64, 63); + group.legalize( + def!(a = clz.I64(x)), + vec![ + def!(c_minus_one = iconst(imm64_minus_one)), + def!(c_sixty_three = iconst(imm64_63)), + def!((index1, r2flags) = x86_bsr(x)), + def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), + def!(a = isub(c_sixty_three, index2)), + ], + ); + + let imm64_31 = Literal::constant(&imm.imm64, 31); + group.legalize( + def!(a = clz.I32(x)), + vec![ + def!(c_minus_one = iconst(imm64_minus_one)), + def!(c_thirty_one = iconst(imm64_31)), + def!((index1, r2flags) = x86_bsr(x)), + def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), + def!(a = isub(c_thirty_one, index2)), + ], + ); + + let imm64_64 = Literal::constant(&imm.imm64, 64); + group.legalize( + def!(a = ctz.I64(x)), + vec![ + def!(c_sixty_four = iconst(imm64_64)), + def!((index1, r2flags) = x86_bsf(x)), + def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)), + ], + ); + + let imm64_32 = Literal::constant(&imm.imm64, 32); + group.legalize( + def!(a = ctz.I32(x)), + vec![ + def!(c_thirty_two = iconst(imm64_32)), + def!((index1, r2flags) = x86_bsf(x)), + def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)), + ], + ); + + // Population count for baseline x86_64 + let x = var("x"); + let r = var("r"); + + let qv3 = var("qv3"); + let qv4 = var("qv4"); + let qv5 = var("qv5"); + let qv6 = var("qv6"); + let qv7 = var("qv7"); + let qv8 = var("qv8"); + let qv9 = var("qv9"); + let qv10 = var("qv10"); + let qv11 = var("qv11"); + let qv12 = var("qv12"); + let qv13 = var("qv13"); + let qv14 = var("qv14"); + let qv15 = var("qv15"); + let qc77 = var("qc77"); + #[allow(non_snake_case)] + let qc0F = var("qc0F"); + let qc01 = var("qc01"); + + let imm64_1 = Literal::constant(&imm.imm64, 1); + let imm64_4 = Literal::constant(&imm.imm64, 4); + group.legalize( + def!(r = popcnt.I64(x)), + vec![ + def!(qv3 = ushr_imm(x, imm64_1)), + def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))), + def!(qv4 = band(qv3, qc77)), + def!(qv5 = isub(x, qv4)), + def!(qv6 = ushr_imm(qv4, imm64_1)), + def!(qv7 = band(qv6, qc77)), + def!(qv8 = isub(qv5, qv7)), + def!(qv9 = ushr_imm(qv7, imm64_1)), + def!(qv10 = band(qv9, qc77)), + def!(qv11 = isub(qv8, qv10)), + def!(qv12 = ushr_imm(qv11, imm64_4)), + def!(qv13 = iadd(qv11, qv12)), + def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))), + def!(qv14 = band(qv13, qc0F)), + def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))), + def!(qv15 = imul(qv14, qc01)), + def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))), + ], + ); + + let lv3 = var("lv3"); + let lv4 = var("lv4"); + let lv5 = var("lv5"); + let lv6 = var("lv6"); + let lv7 = var("lv7"); + let lv8 = var("lv8"); + let lv9 = var("lv9"); + let lv10 = var("lv10"); + let lv11 = var("lv11"); + let lv12 = var("lv12"); + let lv13 = var("lv13"); + let lv14 = var("lv14"); + let lv15 = var("lv15"); + let lc77 = var("lc77"); + #[allow(non_snake_case)] + let lc0F = var("lc0F"); + let lc01 = var("lc01"); + + group.legalize( + def!(r = popcnt.I32(x)), + vec![ + def!(lv3 = ushr_imm(x, imm64_1)), + def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))), + def!(lv4 = band(lv3, lc77)), + def!(lv5 = isub(x, lv4)), + def!(lv6 = ushr_imm(lv4, imm64_1)), + def!(lv7 = band(lv6, lc77)), + def!(lv8 = isub(lv5, lv7)), + def!(lv9 = ushr_imm(lv7, imm64_1)), + def!(lv10 = band(lv9, lc77)), + def!(lv11 = isub(lv8, lv10)), + def!(lv12 = ushr_imm(lv11, imm64_4)), + def!(lv13 = iadd(lv11, lv12)), + def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))), + def!(lv14 = band(lv13, lc0F)), + def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))), + def!(lv15 = imul(lv14, lc01)), + def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))), + ], + ); + + group.custom_legalize(ineg, "convert_ineg"); + + group.custom_legalize(tls_value, "expand_tls_value"); + + group.build_and_add_to(&mut shared.transform_groups); + + let mut narrow = TransformGroupBuilder::new( + "x86_narrow", + r#" + Legalize instructions by narrowing. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("narrow_flags").id); + + // SIMD + let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); + let uimm8_one = Literal::constant(&imm.uimm8, 0x01); + let u128_zeroes = constant(vec![0x00; 16]); + let u128_ones = constant(vec![0xff; 16]); + let b = var("b"); + let c = var("c"); + let d = var("d"); + let e = var("e"); + + // SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available + let sse_vector_size: u64 = 128; + let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; + + // SIMD splat: 8-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { + let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = splat_any8x16(x)), + vec![ + def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register + def!(b = vconst(u128_zeroes)), // zero out a different XMM register; the shuffle mask + // for moving the lowest byte to all other byte lanes is 0x0 + def!(y = x86_pshufb(a, b)), // PSHUFB takes two XMM operands, one of which is a + // shuffle mask (i.e. b) + ], + ); + } + + // SIMD splat: 16-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { + let splat_x16x8 = splat.bind(vector(ty, sse_vector_size)); + let raw_bitcast_any16x8_to_i32x4 = raw_bitcast + .bind(vector(I32, sse_vector_size)) + .bind(vector(ty, sse_vector_size)); + let raw_bitcast_i32x4_to_any16x8 = raw_bitcast + .bind(vector(ty, sse_vector_size)) + .bind(vector(I32, sse_vector_size)); + narrow.legalize( + def!(y = splat_x16x8(x)), + vec![ + def!(a = scalar_to_vector(x)), // move into the lowest 16 bits of an XMM register + def!(b = insertlane(a, uimm8_one, x)), // insert the value again but in the next lowest 16 bits + def!(c = raw_bitcast_any16x8_to_i32x4(b)), // no instruction emitted; pretend this is an I32x4 so we can use PSHUFD + def!(d = x86_pshufd(c, uimm8_zero)), // broadcast the bytes in the XMM register with PSHUFD + def!(y = raw_bitcast_i32x4_to_any16x8(d)), // no instruction emitted; pretend this is an X16x8 again + ], + ); + } + + // SIMD splat: 32-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { + let splat_any32x4 = splat.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = splat_any32x4(x)), + vec![ + def!(a = scalar_to_vector(x)), // translate to an x86 MOV to get the value in an XMM register + def!(y = x86_pshufd(a, uimm8_zero)), // broadcast the bytes in the XMM register with PSHUF + ], + ); + } + + // SIMD splat: 64-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { + let splat_any64x2 = splat.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = splat_any64x2(x)), + vec![ + def!(a = scalar_to_vector(x)), // move into the lowest 64 bits of an XMM register + def!(y = insertlane(a, uimm8_one, x)), // move into the highest 64 bits of the same XMM register + ], + ); + } + + // SIMD bnot + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let bnot = bnot.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = bnot(x)), + vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))], + ); + } + + // SIMD shift left (logical) + for ty in &[I16, I32, I64] { + let ishl = ishl.bind(vector(*ty, sse_vector_size)); + let bitcast = bitcast.bind(vector(I64, sse_vector_size)); + narrow.legalize( + def!(a = ishl(x, y)), + vec![def!(b = bitcast(y)), def!(a = x86_psll(x, b))], + ); + } + + // SIMD shift right (logical) + for ty in &[I16, I32, I64] { + let ushr = ushr.bind(vector(*ty, sse_vector_size)); + let bitcast = bitcast.bind(vector(I64, sse_vector_size)); + narrow.legalize( + def!(a = ushr(x, y)), + vec![def!(b = bitcast(y)), def!(a = x86_psrl(x, b))], + ); + } + + // SIMD shift left (arithmetic) + for ty in &[I16, I32, I64] { + let sshr = sshr.bind(vector(*ty, sse_vector_size)); + let bitcast = bitcast.bind(vector(I64, sse_vector_size)); + narrow.legalize( + def!(a = sshr(x, y)), + vec![def!(b = bitcast(y)), def!(a = x86_psra(x, b))], + ); + } + + // SIMD select + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c + narrow.legalize( + def!(d = bitselect(c, x, y)), + vec![ + def!(a = band(x, c)), + def!(b = band_not(y, c)), + def!(d = bor(a, b)), + ], + ); + } + + // SIMD vany_true + let ne = Literal::enumerator_for(&imm.intcc, "ne"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vany_true = vany_true.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = vany_true(x)), + vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))], + ); + } + + // SIMD vall_true + let eq = Literal::enumerator_for(&imm.intcc, "eq"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vall_true = vall_true.bind(vector(ty, sse_vector_size)); + if ty.is_int() { + // In the common case (Wasm's integer-only all_true), we do not require a bitcast. + narrow.legalize( + def!(y = vall_true(x)), + vec![ + def!(a = vconst(u128_zeroes)), + def!(c = icmp(eq, x, a)), + def!(d = x86_ptest(c, c)), + def!(y = trueif(eq, d)), + ], + ); + } else { + // However, to support other types we must bitcast them to an integer vector to use + // icmp. + let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16); + let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size)); + narrow.legalize( + def!(y = vall_true(x)), + vec![ + def!(a = vconst(u128_zeroes)), + def!(b = raw_bitcast_to_int(x)), + def!(c = icmp(eq, b, a)), + def!(d = x86_ptest(c, c)), + def!(y = trueif(eq, d)), + ], + ); + } + } + + // SIMD icmp ne + let ne = Literal::enumerator_for(&imm.intcc, "ne"); + for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) { + let icmp_ = icmp.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(ne, a, b)), + vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))], + ); + } + + // SIMD icmp greater-/less-than + let sgt = Literal::enumerator_for(&imm.intcc, "sgt"); + let ugt = Literal::enumerator_for(&imm.intcc, "ugt"); + let sge = Literal::enumerator_for(&imm.intcc, "sge"); + let uge = Literal::enumerator_for(&imm.intcc, "uge"); + let slt = Literal::enumerator_for(&imm.intcc, "slt"); + let ult = Literal::enumerator_for(&imm.intcc, "ult"); + let sle = Literal::enumerator_for(&imm.intcc, "sle"); + let ule = Literal::enumerator_for(&imm.intcc, "ule"); + for ty in &[I8, I16, I32] { + // greater-than + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(ugt, a, b)), + vec![ + def!(x = x86_pmaxu(a, b)), + def!(y = icmp(eq, x, b)), + def!(c = bnot(y)), + ], + ); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(sge, a, b)), + vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))], + ); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(uge, a, b)), + vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))], + ); + + // less-than + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]); + } + + // SIMD integer min/max + for ty in &[I8, I16, I32] { + let imin = imin.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]); + let umin = umin.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]); + let imax = imax.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]); + let umax = umax.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]); + } + + // SIMD fcmp greater-/less-than + let gt = Literal::enumerator_for(&imm.floatcc, "gt"); + let lt = Literal::enumerator_for(&imm.floatcc, "lt"); + let ge = Literal::enumerator_for(&imm.floatcc, "ge"); + let le = Literal::enumerator_for(&imm.floatcc, "le"); + let ugt = Literal::enumerator_for(&imm.floatcc, "ugt"); + let ult = Literal::enumerator_for(&imm.floatcc, "ult"); + let uge = Literal::enumerator_for(&imm.floatcc, "uge"); + let ule = Literal::enumerator_for(&imm.floatcc, "ule"); + for ty in &[F32, F64] { + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]); + } + + for ty in &[F32, F64] { + let fneg = fneg.bind(vector(*ty, sse_vector_size)); + let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); + let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1); + let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); + let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = fneg(a)), + vec![ + def!(c = vconst(u128_ones)), + def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB. + def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. + def!(b = bxor(a, e)), // Flip the MSB. + ], + ); + } + + // SIMD fabs + for ty in &[F32, F64] { + let fabs = fabs.bind(vector(*ty, sse_vector_size)); + let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); + let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); + let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = fabs(a)), + vec![ + def!(c = vconst(u128_ones)), + def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. + def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. + def!(b = band(a, e)), // Unset the MSB. + ], + ); + } + + narrow.custom_legalize(shuffle, "convert_shuffle"); + narrow.custom_legalize(extractlane, "convert_extractlane"); + narrow.custom_legalize(insertlane, "convert_insertlane"); + narrow.custom_legalize(ineg, "convert_ineg"); + + narrow.build_and_add_to(&mut shared.transform_groups); + + let mut widen = TransformGroupBuilder::new( + "x86_widen", + r#" + Legalize instructions by widening. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("widen").id); + + widen.custom_legalize(ineg, "convert_ineg"); + widen.build_and_add_to(&mut shared.transform_groups); +} diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs new file mode 100644 index 0000000000..3b4848b166 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -0,0 +1,81 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::types::ReferenceType; + +use crate::shared::types::Bool::B1; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +mod encodings; +mod instructions; +mod legalize; +mod opcodes; +mod recipes; +mod registers; +mod settings; + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = settings::define(&shared_defs.settings); + let regs = registers::define(); + + let inst_group = instructions::define( + &mut shared_defs.all_instructions, + &shared_defs.formats, + &shared_defs.imm, + &shared_defs.entities, + ); + legalize::define(shared_defs, &inst_group); + + // CPU modes for 32-bit and 64-bit operations. + let mut x86_64 = CpuMode::new("I64"); + let mut x86_32 = CpuMode::new("I32"); + + let expand_flags = shared_defs.transform_groups.by_name("expand_flags"); + let x86_widen = shared_defs.transform_groups.by_name("x86_widen"); + let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow"); + let x86_expand = shared_defs.transform_groups.by_name("x86_expand"); + + x86_32.legalize_monomorphic(expand_flags); + x86_32.legalize_default(x86_narrow); + x86_32.legalize_type(B1, expand_flags); + x86_32.legalize_type(I8, x86_widen); + x86_32.legalize_type(I16, x86_widen); + x86_32.legalize_type(I32, x86_expand); + x86_32.legalize_value_type(ReferenceType(R32), x86_expand); + x86_32.legalize_type(F32, x86_expand); + x86_32.legalize_type(F64, x86_expand); + + x86_64.legalize_monomorphic(expand_flags); + x86_64.legalize_default(x86_narrow); + x86_64.legalize_type(B1, expand_flags); + x86_64.legalize_type(I8, x86_widen); + x86_64.legalize_type(I16, x86_widen); + x86_64.legalize_type(I32, x86_expand); + x86_64.legalize_type(I64, x86_expand); + x86_64.legalize_value_type(ReferenceType(R64), x86_expand); + x86_64.legalize_type(F32, x86_expand); + x86_64.legalize_type(F64, x86_expand); + + let recipes = recipes::define(shared_defs, &settings, ®s); + + let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes); + x86_32.set_encodings(encodings.enc32); + x86_64.set_encodings(encodings.enc64); + let encodings_predicates = encodings.inst_pred_reg.extract(); + + let recipes = encodings.recipes; + + let cpu_modes = vec![x86_64, x86_32]; + + TargetIsa::new( + "x86", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs new file mode 100644 index 0000000000..5bd4153414 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -0,0 +1,604 @@ +//! Static, named definitions of instruction opcodes. + +/// Empty opcode for use as a default. +pub static EMPTY: [u8; 0] = []; + +/// Add with carry flag r{16,32,64} to r/m of the same size. +pub static ADC: [u8; 1] = [0x11]; + +/// Add r{16,32,64} to r/m of the same size. +pub static ADD: [u8; 1] = [0x01]; + +/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended. +pub static ADD_IMM: [u8; 1] = [0x81]; + +/// Add sign-extended imm8 to r/m{16,32,64}. +pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in +/// xmm1 (SSE2). +pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58]; + +/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in +/// xmm1 (SSE). +pub static ADDPS: [u8; 2] = [0x0f, 0x58]; + +/// Add the low double-precision floating-point value from xmm2/mem to xmm1 +/// and store the result in xmm1. +pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58]; + +/// Add the low single-precision floating-point value from xmm2/mem to xmm1 +/// and store the result in xmm1. +pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58]; + +/// r/m{16,32,64} AND register of the same size (Intel docs have a typo). +pub static AND: [u8; 1] = [0x21]; + +/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended. +pub static AND_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} AND sign-extended imm8. +pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Return the bitwise logical AND NOT of packed single-precision floating-point +/// values in xmm1 and xmm2/mem. +pub static ANDNPS: [u8; 2] = [0x0f, 0x55]; + +/// Return the bitwise logical AND of packed single-precision floating-point values +/// in xmm1 and xmm2/mem. +pub static ANDPS: [u8; 2] = [0x0f, 0x54]; + +/// Bit scan forward (stores index of first encountered 1 from the front). +pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc]; + +/// Bit scan reverse (stores index of first encountered 1 from the back). +pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd]; + +/// Call near, relative, displacement relative to next instruction (sign-extended). +pub static CALL_RELATIVE: [u8; 1] = [0xe8]; + +/// Move r/m{16,32,64} if overflow (OF=1). +pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40]; + +/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64). +pub static CMP_IMM: [u8; 1] = [0x81]; + +/// Compare imm8 with r/m{16,32,64}. +pub static CMP_IMM8: [u8; 1] = [0x83]; + +/// Compare r{16,32,64} with r/m of the same size. +pub static CMP_REG: [u8; 1] = [0x39]; + +/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of +/// imm8 as comparison predicate (SSE2). +pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2]; + +/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of +/// imm8 as comparison predicate (SSE). +pub static CMPPS: [u8; 2] = [0x0f, 0xc2]; + +/// Convert scalar double-precision floating-point value to scalar single-precision +/// floating-point value. +pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; + +/// Convert doubleword integer to scalar double-precision floating-point value. +pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a]; + +/// Convert doubleword integer to scalar single-precision floating-point value. +pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a]; + +/// Convert scalar single-precision floating-point value to scalar double-precision +/// float-point value. +pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a]; + +/// Convert with truncation scalar double-precision floating-point value to signed +/// integer. +pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c]; + +/// Convert with truncation scalar single-precision floating-point value to integer. +pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c]; + +/// Unsigned divide for {16,32,64}-bit. +pub static DIV: [u8; 1] = [0xf7]; + +/// Divide packed double-precision floating-point values in xmm1 by packed double-precision +/// floating-point values in xmm2/mem (SSE2). +pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e]; + +/// Divide packed single-precision floating-point values in xmm1 by packed single-precision +/// floating-point values in xmm2/mem (SSE). +pub static DIVPS: [u8; 2] = [0x0f, 0x5e]; + +/// Divide low double-precision floating-point value in xmm1 by low double-precision +/// floating-point value in xmm2/m64. +pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e]; + +/// Divide low single-precision floating-point value in xmm1 by low single-precision +/// floating-point value in xmm2/m32. +pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e]; + +/// Signed divide for {16,32,64}-bit. +pub static IDIV: [u8; 1] = [0xf7]; + +/// Signed multiply for {16,32,64}-bit, generic registers. +pub static IMUL: [u8; 2] = [0x0f, 0xaf]; + +/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX. +pub static IMUL_RDX_RAX: [u8; 1] = [0xf7]; + +/// Insert scalar single-precision floating-point value. +pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21]; + +/// Either: +/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory. +/// 2. Jump far, absolute indirect, address given in m16:64. +pub static JUMP_ABSOLUTE: [u8; 1] = [0xff]; + +/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits. +pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9]; + +/// Jump near (rel32) if overflow (OF=1). +pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80]; + +/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits. +pub static JUMP_SHORT: [u8; 1] = [0xeb]; + +/// Jump short (rel8) if equal (ZF=1). +pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74]; + +/// Jump short (rel8) if not equal (ZF=0). +pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75]; + +/// Jump short (rel8) if overflow (OF=1). +pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70]; + +/// Store effective address for m in register r{16,32,64}. +pub static LEA: [u8; 1] = [0x8d]; + +/// Count the number of leading zero bits. +pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd]; + +/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE2). +pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f]; + +/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE). +pub static MAXPS: [u8; 2] = [0x0f, 0x5f]; + +/// Return the maximum scalar double-precision floating-point value between +/// xmm2/m64 and xmm1. +pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f]; + +/// Return the maximum scalar single-precision floating-point value between +/// xmm2/m32 and xmm1. +pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f]; + +/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE2). +pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d]; + +/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE). +pub static MINPS: [u8; 2] = [0x0f, 0x5d]; + +/// Return the minimum scalar double-precision floating-point value between +/// xmm2/m64 and xmm1. +pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d]; + +/// Return the minimum scalar single-precision floating-point value between +/// xmm2/m32 and xmm1. +pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d]; + +/// Move r8 to r/m8. +pub static MOV_BYTE_STORE: [u8; 1] = [0x88]; + +/// Move imm{16,32,64} to same-sized register. +pub static MOV_IMM: [u8; 1] = [0xb8]; + +/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target. +pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7]; + +/// Move {r/m16, r/m32, r/m64} to same-sized register. +pub static MOV_LOAD: [u8; 1] = [0x8b]; + +/// Move r16 to r/m16. +pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89]; + +/// Move {r16, r32, r64} to same-sized register or memory. +pub static MOV_STORE: [u8; 1] = [0x89]; + +/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE). +pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28]; + +/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix. +pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e]; + +/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix. +pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e]; + +/// Move packed single-precision floating-point values low to high (SSE). +pub static MOVLHPS: [u8; 2] = [0x0f, 0x16]; + +/// Move scalar double-precision floating-point value (from reg/mem to reg). +pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10]; + +/// Move scalar double-precision floating-point value (from reg to reg/mem). +pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11]; + +/// Move scalar single-precision floating-point value (from reg to reg/mem). +pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11]; + +/// Move scalar single-precision floating-point-value (from reg/mem to reg). +pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10]; + +/// Move byte to register with sign-extension. +pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe]; + +/// Move word to register with sign-extension. +pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf]; + +/// Move doubleword to register with sign-extension. +pub static MOVSXD: [u8; 1] = [0x63]; + +/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE). +pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10]; + +/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE). +pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11]; + +/// Move byte to register with zero-extension. +pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6]; + +/// Move word to register with zero-extension. +pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7]; + +/// Unsigned multiply for {16,32,64}-bit. +pub static MUL: [u8; 1] = [0xf7]; + +/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result +/// in xmm1 (SSE2). +pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59]; + +/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result +/// in xmm1 (SSE). +pub static MULPS: [u8; 2] = [0x0f, 0x59]; + +/// Multiply the low double-precision floating-point value in xmm2/m64 by the +/// low double-precision floating-point value in xmm1. +pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59]; + +/// Multiply the low single-precision floating-point value in xmm2/m32 by the +/// low single-precision floating-point value in xmm1. +pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59]; + +/// Reverse each bit of r/m{16,32,64}. +pub static NOT: [u8; 1] = [0xf7]; + +/// r{16,32,64} OR register of same size. +pub static OR: [u8; 1] = [0x09]; + +/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended. +pub static OR_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} OR sign-extended imm8. +pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). +pub static ORPS: [u8; 2] = [0x0f, 0x56]; + +/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc]; + +/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe]; + +/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; + +/// Add packed word integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; + +/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec]; + +/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed]; + +/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; + +/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; + +/// Bitwise AND of xmm2/m128 and xmm1 (SSE2). +pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; + +/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2). +pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf]; + +/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2). +pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0]; + +/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2). +pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76]; + +/// Compare packed data for equal (SSE4.1). +pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; + +/// Compare packed signed byte integers for greater than (SSE2). +pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64]; + +/// Compare packed signed doubleword integers for greater than (SSE2). +pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66]; + +/// Compare packed signed quadword integers for greater than (SSE4.2). +pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37]; + +/// Compare packed signed word integers for greater than (SSE2). +pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65]; + +/// Extract doubleword or quadword, depending on REX.W (SSE4.1). +pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; + +/// Extract byte (SSE4.1). +pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; + +/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16. +pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15]; + +/// Insert doubleword or quadword, depending on REX.W (SSE4.1). +pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; + +/// Insert byte (SSE4.1). +pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; + +/// Insert word (SSE2). +pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; + +/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE4.1). +pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c]; + +/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum +/// values in xmm1 (SSE4.1). +pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d]; + +/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE2). +pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee]; + +/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE2). +pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde]; + +/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum +/// values in xmm1 (SSE4.1). +pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f]; + +/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE4.1). +pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e]; + +/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE4.1). +pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38]; + +/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum +/// values in xmm1 (SSE4.1). +pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39]; + +/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE2). +pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea]; + +/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE2). +pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda]; + +/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum +/// values in xmm1 (SSE4.1). +pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b]; + +/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE4.1). +pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a]; + +/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of +/// the results in xmm1 (SSE2). +pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; + +/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32 +/// bits of each product in xmm1 (SSE4.1). +pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; + +/// Pop top of stack into r{16,32,64}; increment stack pointer. +pub static POP_REG: [u8; 1] = [0x58]; + +/// Returns the count of number of bits set to 1. +pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8]; + +/// Bitwise OR of xmm2/m128 and xmm1 (SSE2). +pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb]; + +/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3). +pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; + +/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and +/// store the result in xmm1 (SSE2). +pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; + +/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71]; + +/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72]; + +/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73]; + +/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). +pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1]; + +/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). +pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2]; + +/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). +pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3]; + +/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). +pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1]; + +/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). +pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2]; + +/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). +pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3]; + +/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). +pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1]; + +/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). +pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2]; + +/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2). +pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8]; + +/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2). +pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9]; + +/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2). +pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa]; + +/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2). +pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb]; + +/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8]; + +/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9]; + +/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8]; + +/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9]; + +/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all +/// 0s (SSE4.1). +pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; + +/// Push r{16,32,64}. +pub static PUSH_REG: [u8; 1] = [0x50]; + +/// Logical exclusive OR (SSE2). +pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef]; + +/// Near return to calling procedure. +pub static RET_NEAR: [u8; 1] = [0xc3]; + +/// General rotation opcode. Kind of rotation depends on encoding. +pub static ROTATE_CL: [u8; 1] = [0xd3]; + +/// General rotation opcode. Kind of rotation depends on encoding. +pub static ROTATE_IMM8: [u8; 1] = [0xc1]; + +/// Round scalar doubl-precision floating-point values. +pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b]; + +/// Round scalar single-precision floating-point values. +pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a]; + +/// Subtract with borrow r{16,32,64} from r/m of the same size. +pub static SBB: [u8; 1] = [0x19]; + +/// Set byte if overflow (OF=1). +pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90]; + +/// Compute the square root of the packed double-precision floating-point values and store the +/// result in xmm1 (SSE2). +pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51]; + +/// Compute the square root of the packed double-precision floating-point values and store the +/// result in xmm1 (SSE). +pub static SQRTPS: [u8; 2] = [0x0f, 0x51]; + +/// Compute square root of scalar double-precision floating-point value. +pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51]; + +/// Compute square root of scalar single-precision value. +pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51]; + +/// Subtract r{16,32,64} from r/m of same size. +pub static SUB: [u8; 1] = [0x29]; + +/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result +/// in xmm1 (SSE2). +pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c]; + +/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result +/// in xmm1 (SSE). +pub static SUBPS: [u8; 2] = [0x0f, 0x5c]; + +/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 +/// and store the result in xmm1. +pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c]; + +/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 +/// and store the result in xmm1. +pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c]; + +/// AND r8 with r/m8; set SF, ZF, PF according to result. +pub static TEST_BYTE_REG: [u8; 1] = [0x84]; + +/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result. +pub static TEST_REG: [u8; 1] = [0x85]; + +/// Count the number of trailing zero bits. +pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc]; + +/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64 +/// and set the EFLAGS flags accordingly. +pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e]; + +/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32 +/// and set the EFLAGS flags accordingly. +pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; + +/// Raise invalid opcode instruction. +pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; + +/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. +pub static XOR_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} XOR sign-extended imm8. +pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// r/m{16,32,64} XOR register of the same size. +pub static XOR: [u8; 1] = [0x31]; + +/// r/m8 XOR r8. +pub static XORB: [u8; 1] = [0x30]; + +/// Bitwise logical XOR of packed double-precision floating-point values. +pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57]; + +/// Bitwise logical XOR of packed single-precision floating-point values. +pub static XORPS: [u8; 2] = [0x0f, 0x57]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs new file mode 100644 index 0000000000..4bab09c306 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -0,0 +1,3331 @@ +//! Encoding recipes for x86/x86_64. +use std::rc::Rc; + +use cranelift_codegen_shared::isa::x86::EncodingBits; + +use crate::cdsl::ast::Literal; +use crate::cdsl::formats::InstructionFormat; +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::recipes::{ + EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack, +}; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingGroup; +use crate::shared::Definitions as SharedDefinitions; + +use crate::isa::x86::opcodes; + +/// Helper data structure to create recipes and template recipes. +/// It contains all the recipes and recipe templates that might be used in the encodings crate of +/// this same directory. +pub(crate) struct RecipeGroup<'builder> { + /// Memoized registers description, to pass it to builders later. + regs: &'builder IsaRegs, + + /// All the recipes explicitly created in this file. This is different from the final set of + /// recipes, which is definitive only once encodings have generated new recipes on the fly. + recipes: Vec, + + /// All the recipe templates created in this file. + templates: Vec>>, +} + +impl<'builder> RecipeGroup<'builder> { + fn new(regs: &'builder IsaRegs) -> Self { + Self { + regs, + recipes: Vec::new(), + templates: Vec::new(), + } + } + fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) { + self.recipes.push(recipe.build()); + } + fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc> { + let template = Rc::new(Template::new(recipe, self.regs)); + self.templates.push(template.clone()); + template + } + fn add_template_inferred( + &mut self, + recipe: EncodingRecipeBuilder, + infer_function: &'static str, + ) -> Rc> { + let template = + Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function)); + self.templates.push(template.clone()); + template + } + fn add_template(&mut self, template: Template<'builder>) -> Rc> { + let template = Rc::new(template); + self.templates.push(template.clone()); + template + } + pub fn recipe(&self, name: &str) -> &EncodingRecipe { + self.recipes + .iter() + .find(|recipe| recipe.name == name) + .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name)) + } + pub fn template(&self, name: &str) -> &Template { + self.templates + .iter() + .find(|recipe| recipe.name() == name) + .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name)) + } +} + +// Opcode representation. +// +// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are +// variable length, so we use separate recipes for different styles of opcodes and prefixes. The +// opcode format is indicated by the recipe name prefix. +// +// The match case below does not include the REX prefix which goes after the mandatory prefix. +// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are +// represented by separate recipes. +// +// The encoding bits are: +// +// 0-7: The opcode byte . +// 8-9: pp, mandatory prefix: +// 00 none (Op*) +// 01 66 (Mp*) +// 10 F3 (Mp*) +// 11 F2 (Mp*) +// 10-11: mm, opcode map: +// 00 (Op1/Mp1) +// 01 0F (Op2/Mp2) +// 10 0F 38 (Op3/Mp3) +// 11 0F 3A (Op3/Mp3) +// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. +// 15: REX.W bit (or VEX.W/E) +// +// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and +// the pp+mm format is ready for supporting VEX prefixes. +// +// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this +// could be simplified. + +/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits. +fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) { + let enc = EncodingBits::new(op_bytes, rrr, w); + (enc.prefix().recipe_name_prefix(), enc.bits()) +} + +/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the +/// corresponding `put_*` function from the `binemit.rs` module. +fn replace_put_op(code: Option, prefix: &str) -> Option { + code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase()))) +} + +/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class. +fn replace_nonrex_constraints( + regs: &IsaRegs, + constraints: Vec, +) -> Vec { + constraints + .into_iter() + .map(|constraint| match constraint { + OperandConstraint::RegClass(rc_index) => { + let new_rc_index = if rc_index == regs.class_by_name("GPR") { + regs.class_by_name("GPR8") + } else if rc_index == regs.class_by_name("FPR") { + regs.class_by_name("FPR8") + } else { + rc_index + }; + OperandConstraint::RegClass(new_rc_index) + } + _ => constraint, + }) + .collect() +} + +/// Specifies how the REX prefix is emitted by a Recipe. +#[derive(Copy, Clone, PartialEq)] +pub enum RexRecipeKind { + /// The REX emission behavior is not hardcoded for the Recipe + /// and may be overridden when using the Template. + Unspecified, + + /// The Recipe must hardcode the non-emission of the REX prefix. + NeverEmitRex, + + /// The Recipe must hardcode the emission of the REX prefix. + AlwaysEmitRex, + + /// The Recipe should infer the emission of the REX.RXB bits from registers, + /// and the REX.W bit from the EncodingBits. + /// + /// Because such a Recipe has a non-constant instruction size, it must have + /// a special `compute_size` handler for the inferrable-REX case. + InferRex, +} + +impl Default for RexRecipeKind { + fn default() -> Self { + Self::Unspecified + } +} + +/// Previously called a TailRecipe in the Python meta language, this allows to create multiple +/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different +/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating +/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be +/// reconsidered later. +#[derive(Clone)] +pub(crate) struct Template<'builder> { + /// Description of registers, used in the build() method. + regs: &'builder IsaRegs, + + /// The recipe template, which is to be specialized (by copy). + recipe: EncodingRecipeBuilder, + + /// How is the REX prefix emitted? + rex_kind: RexRecipeKind, + + /// Function for `compute_size()` when REX is inferrable. + inferred_rex_compute_size: Option<&'static str>, + + /// Other recipe to use when REX-prefixed. + when_prefixed: Option>>, + + // Parameters passed in the EncodingBits. + /// Value of the W bit (0 or 1), stored in the EncodingBits. + w_bit: u16, + /// Value of the RRR bits (between 0 and 0b111). + rrr_bits: u16, + /// Opcode bytes. + op_bytes: &'static [u8], +} + +impl<'builder> Template<'builder> { + fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self { + Self { + regs, + recipe, + rex_kind: RexRecipeKind::default(), + inferred_rex_compute_size: None, + when_prefixed: None, + w_bit: 0, + rrr_bits: 0, + op_bytes: &opcodes::EMPTY, + } + } + + fn name(&self) -> &str { + &self.recipe.name + } + fn rex_kind(self, kind: RexRecipeKind) -> Self { + Self { + rex_kind: kind, + ..self + } + } + fn inferred_rex_compute_size(self, function: &'static str) -> Self { + Self { + inferred_rex_compute_size: Some(function), + ..self + } + } + fn when_prefixed(self, template: Rc>) -> Self { + assert!(self.when_prefixed.is_none()); + Self { + when_prefixed: Some(template), + ..self + } + } + + // Copy setters. + pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self { + assert!(!op_bytes.is_empty()); + let mut copy = self.clone(); + copy.op_bytes = op_bytes; + copy + } + pub fn w(&self) -> Self { + let mut copy = self.clone(); + copy.w_bit = 1; + copy + } + pub fn rrr(&self, value: u16) -> Self { + assert!(value <= 0b111); + let mut copy = self.clone(); + copy.rrr_bits = value; + copy + } + pub fn nonrex(&self) -> Self { + assert!( + self.rex_kind != RexRecipeKind::AlwaysEmitRex, + "Template requires REX prefix." + ); + let mut copy = self.clone(); + copy.rex_kind = RexRecipeKind::NeverEmitRex; + copy + } + pub fn rex(&self) -> Self { + assert!( + self.rex_kind != RexRecipeKind::NeverEmitRex, + "Template requires no REX prefix." + ); + if let Some(prefixed) = &self.when_prefixed { + let mut ret = prefixed.rex(); + // Forward specialized parameters. + ret.op_bytes = self.op_bytes; + ret.w_bit = self.w_bit; + ret.rrr_bits = self.rrr_bits; + return ret; + } + let mut copy = self.clone(); + copy.rex_kind = RexRecipeKind::AlwaysEmitRex; + copy + } + pub fn infer_rex(&self) -> Self { + assert!( + self.rex_kind != RexRecipeKind::NeverEmitRex, + "Template requires no REX prefix." + ); + assert!( + self.when_prefixed.is_none(), + "infer_rex used with when_prefixed()." + ); + let mut copy = self.clone(); + copy.rex_kind = RexRecipeKind::InferRex; + copy + } + + pub fn build(mut self) -> (EncodingRecipe, u16) { + let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); + + let (recipe_name, rex_prefix_size) = match self.rex_kind { + RexRecipeKind::Unspecified | RexRecipeKind::NeverEmitRex => { + // Ensure the operands are limited to non-REX constraints. + let operands_in = self.recipe.operands_in.unwrap_or_default(); + self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); + let operands_out = self.recipe.operands_out.unwrap_or_default(); + self.recipe.operands_out = + Some(replace_nonrex_constraints(self.regs, operands_out)); + + (opcode.into(), 0) + } + RexRecipeKind::AlwaysEmitRex => ("Rex".to_string() + opcode, 1), + RexRecipeKind::InferRex => { + // Hook up the right function for inferred compute_size(). + assert!( + self.inferred_rex_compute_size.is_some(), + "InferRex recipe '{}' needs an inferred_rex_compute_size function.", + &self.recipe.name + ); + self.recipe.compute_size = self.inferred_rex_compute_size; + + ("DynRex".to_string() + opcode, 0) + } + }; + + let size_addendum = self.op_bytes.len() as u64 + rex_prefix_size; + self.recipe.base_size += size_addendum; + + // Branch ranges are relative to the end of the instruction. + // For InferRex, the range should be the minimum, assuming no REX. + if let Some(range) = self.recipe.branch_range.as_mut() { + range.inst_size += size_addendum; + } + + self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name); + self.recipe.name = recipe_name + &self.recipe.name; + + (self.recipe.build(), bits) + } +} + +/// Returns a predicate checking that the "cond" field of the instruction contains one of the +/// directly supported floating point condition codes. +fn supported_floatccs_predicate( + supported_cc: &[Literal], + format: &InstructionFormat, +) -> InstructionPredicate { + supported_cc + .iter() + .fold(InstructionPredicate::new(), |pred, literal| { + pred.or(InstructionPredicate::new_is_field_equal( + format, + "cond", + literal.to_rust_code(), + )) + }) +} + +/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte. +fn valid_scale(format: &InstructionFormat) -> InstructionPredicate { + ["1", "2", "4", "8"] + .iter() + .fold(InstructionPredicate::new(), |pred, &literal| { + pred.or(InstructionPredicate::new_is_field_equal( + format, + "imm", + literal.into(), + )) + }) +} + +pub(crate) fn define<'shared>( + shared_defs: &'shared SharedDefinitions, + settings: &'shared SettingGroup, + regs: &'shared IsaRegs, +) -> RecipeGroup<'shared> { + // The set of floating point condition codes that are directly supported. + // Other condition codes need to be reversed or expressed as two tests. + let floatcc = &shared_defs.imm.floatcc; + let supported_floatccs: Vec = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"] + .iter() + .map(|name| Literal::enumerator_for(floatcc, name)) + .collect(); + + // Register classes shorthands. + let abcd = regs.class_by_name("ABCD"); + let gpr = regs.class_by_name("GPR"); + let fpr = regs.class_by_name("FPR"); + let flag = regs.class_by_name("FLAG"); + + // Operand constraints shorthands. + let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags")); + let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax")); + let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx")); + let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx")); + let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15")); + + // Stack operand with a 32-bit signed displacement from either RBP or RSP. + let stack_gpr32 = Stack::new(gpr); + let stack_fpr32 = Stack::new(fpr); + + let formats = &shared_defs.formats; + + // Predicates shorthands. + let use_sse41 = settings.predicate_by_name("use_sse41"); + + // Definitions. + let mut recipes = RecipeGroup::new(regs); + + // A null unary instruction that takes a GPR register. Can be used for identity copies and + // no-op conversions. + recipes.add_recipe( + EncodingRecipeBuilder::new("null", &formats.unary, 0) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0) + .operands_in(vec![fpr]) + .operands_out(vec![0]) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![stack_gpr32]) + .emit(""), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0) + .operands_out(vec![reg_r15]) + .emit(""), + ); + // umr with a fixed register output that's r15. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + let r15 = RU::r15.into(); + {{PUT_OP}}(bits, rex2(r15, in_reg0), sink); + modrm_rr(r15, in_reg0, sink); + "#, + ), + ); + + // No-op fills, created by late-stage redundant-fill removal. + recipes.add_recipe( + EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit(""), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"), + ); + + // XX opcode, no ModR/M. + recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit( + r#" + sink.trap(code, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + "#, + )); + + // Macro: conditional jump over a ud2. + recipes.add_recipe( + EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4) + .operands_in(vec![reg_rflags]) + .clobbers_flags(false) + .emit( + r#" + // Jump over a 2-byte ud2. + sink.put1(0x70 | (icc2opc(cond.inverse()) as u8)); + sink.put1(2); + // ud2. + sink.trap(code, func.srclocs[inst]); + sink.put1(0x0f); + sink.put1(0x0b); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4) + .operands_in(vec![reg_rflags]) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.float_cond_trap, + )) + .emit( + r#" + // Jump over a 2-byte ud2. + sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8)); + sink.put1(2); + // ud2. + sink.trap(code, func.srclocs[inst]); + sink.put1(0x0f); + sink.put1(0x0b); + "#, + ), + ); + + // XX /r + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rr", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // XX /r with operands swapped. (RM form). + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rrx", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // XX /r with FPR ins and outs. A form. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fa", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + + // XX /r with FPR ins and outs. A form with input operands swapped. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fax", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![1]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + ); + + // XX /r with FPR ins and outs. A form with a byte immediate. + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.insert_lane, + "lane", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + let imm:i64 = lane.into(); + sink.put1(imm as u8); + "#, + ), + ); + } + + // XX /n for a unary operation with extension bits. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("ur", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + // XX /r, but for a unary operator with separate input/output register, like + // copies. MR form, preserving flags. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("umr", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), + ); + + // Same as umr, but with FPR -> GPR registers. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rfumr", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + ); + + // Same as umr, but with the source register specified directly. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1) + // No operands_in to mention, because a source register is specified directly. + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, src), sink); + modrm_rr(out_reg0, src, sink); + "#, + ), + ); + + // XX /r, but for a unary operator with separate input/output register. + // RM form. Clobbers FLAGS. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("urm", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r. Same as urm, but doesn't clobber FLAGS. + let urm_noflags = recipes.add_template_recipe( + EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r. Same as urm_noflags, but input limited to ABCD. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1) + .operands_in(vec![abcd]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .when_prefixed(urm_noflags), + ); + + // XX /r, RM form, FPR -> FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furm", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // Same as furm, but with the source register specified directly. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1) + // No operands_in to mention, because a source register is specified directly. + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(src, out_reg0), sink); + modrm_rr(src, out_reg0, sink); + "#, + ), + ); + + // XX /r, RM form, GPR -> FPR. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("frurm", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), + ); + + // XX /r, RM form, FPR -> GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rfurm", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r, RMI form for one of the roundXX SSE 4.1 instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .isa_predicate(use_sse41) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + sink.put1(match opcode { + Opcode::Nearest => 0b00, + Opcode::Floor => 0b01, + Opcode::Ceil => 0b10, + Opcode::Trunc => 0b11, + x => panic!("{} unexpected for furmi_rnd", opcode), + }); + "#, + ), + ); + + // XX /r, for regmove instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(dst, src), sink); + modrm_rr(dst, src, sink); + "#, + ), + ); + + // XX /r, for regmove instructions (FPR version, RM encoded). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1) + .operands_in(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(src, dst), sink); + modrm_rr(src, dst, sink); + "#, + ), + ); + + // XX /n with one arg in %rcx, for shifts. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rc", &formats.binary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rcx), + ]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("div", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![reg_rax, reg_rdx]) + .emit( + r#" + sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg2), sink); + modrm_r_bits(in_reg2, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"), + ); + + // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("mulx", &formats.binary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg1), sink); + modrm_r_bits(in_reg1, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"), + ); + + // XX /n ib with 8-bit immediate sign-extended. + { + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2) + .operands_in(vec![fpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + + // XX /n id with 32-bit immediate sign-extended. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + } + + // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd) + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.extract_lane, + "lane", + 8, + 0, + )) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate? + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + let imm:i64 = lane.into(); + sink.put1(imm as u8); + "#, + ), + ); + } + + // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane) + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.extract_lane, "lane", 8, 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte + let imm:i64 = lane.into(); + sink.put1(imm as u8); + "#, + ), + ); + } + + // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane) + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2) + .operands_in(vec![fpr, gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.insert_lane, + "lane", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + let imm:i64 = lane.into(); + sink.put1(imm as u8); + "#, + ), + ); + } + + { + // XX /n id with 32-bit immediate sign-extended. UnaryImm version. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.unary_imm, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + // XX+rd id unary with 32-bit immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + + // XX+rd id unary with bool immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: u32 = if imm { 1 } else { 0 }; + sink.put4(imm); + "#, + ), + ); + + // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq unary with 64-bit immediate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put8(imm as u64); + "#, + ), + ); + + // XX+rd id unary with zero immediate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /n Unary with floating point 32-bit immediate equal to zero. + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_zero_32bit_float( + &*formats.unary_ieee32, + "imm", + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + } + + // XX /n Unary with floating point 64-bit immediate equal to zero. + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_zero_64bit_float( + &*formats.unary_ieee64, + "imm", + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + } + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pushq", &formats.unary, 0) + .operands_in(vec![gpr]) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("popq", &formats.nullary, 0) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + "#, + ), + ); + + // XX /r, for regmove instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(dst, src), sink); + modrm_rr(dst, src, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1) + .operands_in(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink); + modrm_rr(RU::rsp.into(), in_reg0, sink); + "#, + ), + ); + + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.unary_imm, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); + modrm_r_bits(RU::rsp.into(), bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.unary_imm, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); + modrm_r_bits(RU::rsp.into(), bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + // XX+rd id with Abs4 function relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs4, + &func.dfg.ext_funcs[func_ref].name, + 0); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 function relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs8, + &func.dfg.ext_funcs[func_ref].name, + 0); + sink.put8(0); + "#, + ), + ); + + // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs4, + &func.dfg.ext_funcs[func_ref].name, + 0); + // Write the immediate as `!0` for the benefit of BaldrMonkey. + sink.put4(!0); + "#, + ), + ); + + // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs8, + &func.dfg.ext_funcs[func_ref].name, + 0); + // Write the immediate as `!0` for the benefit of BaldrMonkey. + sink.put8(!0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5) + .operands_out(vec![gpr]) + // rex2 gets passed 0 for r/m register because the upper bit of + // r/m doesn't get decoded when in rip-relative addressing mode. + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86PCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5) + .operands_out(vec![gpr]) + // rex2 gets passed 0 for r/m register because the upper bit of + // r/m doesn't get decoded when in rip-relative addressing mode. + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86GOTPCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + // XX+rd id with Abs4 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs4, + &func.global_values[global_value].symbol_name(), + 0); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs8, + &func.global_values[global_value].symbol_name(), + 0); + sink.put8(0); + "#, + ), + ); + + // XX+rd iq with PCRel4 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_rm(5, out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86PCRel4, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_rm(5, out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86GOTPCRel4, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + "#, + ), + ); + + // Stack addresses. + // + // TODO Alternative forms for 8-bit immediates, when applicable. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spaddr4_id", &formats.stack_load, 6) + .operands_out(vec![gpr]) + .emit( + r#" + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + {{PUT_OP}}(bits, rex2(out_reg0, base), sink); + modrm_sib_disp8(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spaddr8_id", &formats.stack_load, 6) + .operands_out(vec![gpr]) + .emit( + r#" + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + "#, + ), + ); + + // Store recipes. + + { + // Simple stores. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into()); + + // XX /r register-indirect store with no offset. + let st = recipes.add_template_recipe( + EncodingRecipeBuilder::new("st", &formats.store, 1) + .operands_in(vec![gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with no offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("st_abcd", &formats.store, 1) + .operands_in(vec![abcd, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + regs, + ) + .when_prefixed(st), + ); + + // XX /r register-indirect store of FPR with no offset. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fst", &formats.store, 1) + .operands_in(vec![fpr, gpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1", + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0); + + // XX /r register-indirect store with 8-bit offset. + let st_disp8 = recipes.add_template_recipe( + EncodingRecipeBuilder::new("stDisp8", &formats.store, 2) + .operands_in(vec![gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with 8-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2) + .operands_in(vec![abcd, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + regs, + ) + .when_prefixed(st_disp8), + ); + + // XX /r register-indirect store with 8-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2) + .operands_in(vec![fpr, gpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with 32-bit offset. + let st_disp32 = recipes.add_template_recipe( + EncodingRecipeBuilder::new("stDisp32", &formats.store, 5) + .operands_in(vec![gpr, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with 32-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5) + .operands_in(vec![abcd, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + regs, + ) + .when_prefixed(st_disp32), + ); + + // XX /r register-indirect store with 32-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) + .operands_in(vec![fpr, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + { + // Complex stores. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into()); + + // XX /r register-indirect store with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with index and no offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with index and no offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0); + + // XX /r register-indirect store with index and 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with index and 8-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with index and 8-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0); + + // XX /r register-indirect store with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with index and 32-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with index and 32-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_big_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + // Unary spill with SIB and 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6) + .operands_in(vec![gpr]) + .operands_out(vec![stack_gpr32]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let base = stk_base(out_stk0.base); + {{PUT_OP}}(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + "#, + ), + ); + + // Like spillSib32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6) + .operands_in(vec![fpr]) + .operands_out(vec![stack_fpr32]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let base = stk_base(out_stk0.base); + {{PUT_OP}}(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + "#, + ), + ); + + // Regspill using RSP-relative addressing. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + {{PUT_OP}}(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + "#, + ), + ); + + // Like regspill32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6) + .operands_in(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + {{PUT_OP}}(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + "#, + ), + ); + + // Load recipes. + + { + // Simple loads. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into()); + + // XX /r load with no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ld", &formats.load, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } + "#, + ), + ); + + // XX /r float load with no offset. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fld", &formats.load, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } + "#, + ), + "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0); + + // XX /r load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r float load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0); + + // XX /r load with 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r float load with 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_big_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + { + // Complex loads. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into()); + + // XX /r load with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } + "#, + ), + ); + + // XX /r float load with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } + "#, + ), + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0); + + // XX /r load with index and 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r float load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0); + + // XX /r load with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r float load with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_big_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + // Unary fill with SIB and 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + let base = stk_base(in_stk0.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + "#, + ), + ); + + // Like fillSib32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6) + .operands_in(vec![stack_fpr32]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + let base = stk_base(in_stk0.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + "#, + ), + ); + + // Regfill with RSP-relative 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6) + .operands_in(vec![stack_gpr32]) + .clobbers_flags(false) + .emit( + r#" + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + {{PUT_OP}}(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + "#, + ), + ); + + // Like regfill32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6) + .operands_in(vec![stack_fpr32]) + .clobbers_flags(false) + .emit( + r#" + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + {{PUT_OP}}(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + "#, + ), + ); + + // Call/return. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86CallPCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + sink.reloc_external(Reloc::X86CallPLTRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1) + .operands_in(vec![gpr]) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ret", &formats.multiary, 0) + .emit("{{PUT_OP}}(bits, BASE_REX, sink);"), + ); + + // Branches. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jmpb", &formats.jump, 1) + .branch_range((1, 8)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jmpd", &formats.jump, 4) + .branch_range((4, 32)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brib", &formats.branch_int, 1) + .operands_in(vec![reg_rflags]) + .branch_range((1, 8)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brid", &formats.branch_int, 4) + .operands_in(vec![reg_rflags]) + .branch_range((4, 32)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1) + .operands_in(vec![reg_rflags]) + .branch_range((1, 8)) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.branch_float, + )) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4) + .operands_in(vec![reg_rflags]) + .branch_range((4, 32)) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.branch_float, + )) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .inst_predicate(valid_scale(&*formats.branch_table_entry)) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + } + "#, + ), + ); + + recipes.add_template_inferred( + EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + const_disp4(constant_handle, func, sink); + "#, + ), + "size_with_inferred_rex_for_outreg0", + ); + + recipes.add_template_inferred( + EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_outreg0", + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + + // No reloc is needed here as the jump table is emitted directly after + // the function body. + jt_disp4(table, func, sink); + "#, + ), + ); + + // Test flags and set a register. + // + // These setCC instructions only set the low 8 bits, and they can only write ABCD registers + // without a REX prefix. + // + // Other instruction encodings accepting `b1` inputs have the same constraints and only look at + // the low 8 bits of the input register. + + let seti = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("seti", &formats.int_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .rex_kind(RexRecipeKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![abcd]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .when_prefixed(seti), + ); + + let setf = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("setf", &formats.float_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .rex_kind(RexRecipeKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![abcd]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .when_prefixed(setf), + ); + + // Conditional move (a.k.a integer select) + // (maybe-REX.W) 0F 4x modrm(r,r) + // 1 byte, modrm(r,r), is after the opcode + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rflags), + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![2]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); + modrm_rr(in_reg1, in_reg2, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"), + ); + + // Bit scan forwards and reverse + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), + ); + + // Arithematic with flag I/O. + + // XX /r, MR form. Add two GPR registers and set carry flag. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rout", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![ + OperandConstraint::TiedInput(0), + OperandConstraint::FixedReg(reg_rflags), + ]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // XX /r, MR form. Add two GPR registers and get carry flag. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rin", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .operands_out(vec![0]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // XX /r, MR form. Add two GPR registers with carry flag. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rio", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .operands_out(vec![ + OperandConstraint::TiedInput(0), + OperandConstraint::FixedReg(reg_rflags), + ]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // Compare and set flags. + + // XX /r, MR form. Compare two GPR registers and set flags. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // Same as rcmp, but second operand is the stack pointer. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink); + modrm_rr(in_reg0, RU::rsp.into(), sink); + "#, + ), + ); + + // XX /r, RM form. Compare two FPR registers and set flags. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fcmp", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + + { + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0); + + // XX /n, MI form with imm8. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_small_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0); + + // XX /n, MI form with imm32. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_big_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + } + + // Test-and-branch. + // + // This recipe represents the macro fusion of a test and a conditional branch. + // This serves two purposes: + // + // 1. Guarantee that the test and branch get scheduled next to each other so + // macro fusion is guaranteed to be possible. + // 2. Hide the status flags from Cranelift which doesn't currently model flags. + // + // The encoding bits affect both the test and the branch instruction: + // + // Bits 0-7 are the Jcc opcode. + // Bits 8-15 control the test instruction which always has opcode byte 0x85. + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + // 8-bit test-and-branch. + + let t8jccb = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .rex_kind(RexRecipeKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2) + .operands_in(vec![abcd]) + .branch_range((3, 8)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .when_prefixed(t8jccb), + ); + + let t8jccd = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .rex_kind(RexRecipeKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6) + .operands_in(vec![abcd]) + .branch_range((7, 32)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .when_prefixed(t8jccd), + ); + + // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. + // The register allocator can't handle a branch instruction with constrained + // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in + // any register, but is is larger because it uses a 32-bit test instruction with + // a 0xff immediate. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6) + .operands_in(vec![gpr]) + .branch_range((11, 32)) + .emit( + r#" + // test32 r, 0xff. + {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.put4(0xff); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + ); + + // Comparison that produces a `b1` result in a GPR. + // + // This is a macro of a `cmp` instruction followed by a `setCC` instruction. + // + // TODO This is not a great solution because: + // + // - The cmp+setcc combination is not recognized by CPU's macro fusion. + // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` + // instructions may need a REX independently. + // - Modeling CPU flags in the type system would be better. + // + // Since the `setCC` instructions only write an 8-bit register, we use that as + // our `b1` representation: A `b1` value is represented as a GPR where the low 8 + // bits are known to be 0 or 1. The high bits are undefined. + // + // This bandaid macro doesn't support a REX prefix for the final `setCC` + // instruction, so it is limited to the `ABCD` register class for booleans. + // The omission of a `when_prefixed` alternative is deliberate here. + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + + { + let is_small_imm = + InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_small_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + let is_big_imm = + InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_big_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + } + + // Make a FloatCompare instruction predicate with the supported condition codes. + // + // Same thing for floating point. + // + // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: + // + // ZPC OSA + // UN 111 000 + // GT 000 000 + // LT 001 000 + // EQ 100 000 + // + // Not all floating point condition codes are supported. + // The omission of a `when_prefixed` alternative is deliberate here. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![abcd]) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.float_compare, + )) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // `setCC` instruction, no REX. + use crate::ir::condcodes::FloatCC::*; + let setcc = match cond { + Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) + Unordered => 0x9a, // UN => setp (P=1) + OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), + UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) + GreaterThan => 0x97, // GT => seta (C=0&Z=0) + GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) + UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) + UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) + Equal | // EQ + NotEqual | // UN|LT|GT + LessThan | // LT + LessThanOrEqual | // LT|EQ + UnorderedOrGreaterThan | // UN|GT + UnorderedOrGreaterThanOrEqual // UN|GT|EQ + => panic!("{} not supported by fcscc", cond), + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + { + let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"] + .iter() + .map(|name| Literal::enumerator_for(floatcc, name)) + .collect(); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs[..], + &*formats.float_compare, + )) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // Add immediate byte indicating what type of comparison. + use crate::ir::condcodes::FloatCC::*; + let imm = match cond { + Equal => 0x00, + LessThan => 0x01, + LessThanOrEqual => 0x02, + Unordered => 0x03, + NotEqual => 0x04, + UnorderedOrGreaterThanOrEqual => 0x05, + UnorderedOrGreaterThan => 0x06, + Ordered => 0x07, + _ => panic!("{} not supported by pfcmp", cond), + }; + sink.put1(imm); + "#, + ), + ); + } + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Test instruction. + {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Check ZF = 1 flag to see if register holds 0. + sink.put1(0x0f); + sink.put1(0x94); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.put1(0xff); + // `setCC` instruction, no REX. + use crate::ir::condcodes::IntCC::*; + let setcc = 0x90 | icc2opc(Equal); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit( + r#" + sink.add_stackmap(args, func, isa); + "#, + ), + ); + + // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled. + // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function. + + recipes.add_recipe( + EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16) + // FIXME Correct encoding for non rax registers + .operands_out(vec![reg_rax]) + .emit( + r#" + // output %rax + // clobbers %rdi + + // Those data16 prefixes are necessary to pad to 16 bytes. + + // data16 lea gv@tlsgd(%rip),%rdi + sink.put1(0x66); // data16 + sink.put1(0b01001000); // rex.w + const LEA: u8 = 0x8d; + sink.put1(LEA); // lea + modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d + sink.reloc_external(Reloc::ElfX86_64TlsGd, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + + // data16 data16 callq __tls_get_addr-4 + sink.put1(0x66); // data16 + sink.put1(0x66); // data16 + sink.put1(0b01001000); // rex.w + sink.put1(0xe8); // call + sink.reloc_external(Reloc::X86CallPLTRel4, + &ExternalName::LibCall(LibCall::ElfTlsGetAddr), + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9) + // FIXME Correct encoding for non rax registers + .operands_out(vec![reg_rax]) + .emit( + r#" + // output %rax + // clobbers %rdi + + // movq gv@tlv(%rip), %rdi + sink.put1(0x48); // rex + sink.put1(0x8b); // mov + modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d + sink.reloc_external(Reloc::MachOX86_64Tlv, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + + // callq *(%rdi) + sink.put1(0xff); + sink.put1(0x17); + "#, + ), + ); + + recipes +} diff --git a/cranelift/codegen/meta/src/isa/x86/registers.rs b/cranelift/codegen/meta/src/isa/x86/registers.rs new file mode 100644 index 0000000000..5c31401a3c --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/registers.rs @@ -0,0 +1,43 @@ +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; + +pub(crate) fn define() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + let builder = RegBankBuilder::new("IntRegs", "r") + .units(16) + .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"]) + .track_pressure(true) + .pinned_reg(15); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FloatRegs", "xmm") + .units(16) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FlagRegs", "") + .units(1) + .names(vec!["rflags"]) + .track_pressure(false); + let flag_reg = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + let gpr = regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FPR", float_regs); + let fpr = regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); + regs.add_class(builder); + + let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8); + let gpr8 = regs.add_class(builder); + + let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4); + regs.add_class(builder); + + let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8); + regs.add_class(builder); + + regs.build() +} diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs new file mode 100644 index 0000000000..e39384a5c3 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -0,0 +1,104 @@ +use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; + +pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { + let mut settings = SettingGroupBuilder::new("x86"); + + // CPUID.01H:ECX + let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); + let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); + let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false); + let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false); + let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false); + settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false); + + // CPUID.(EAX=07H, ECX=0H):EBX + let has_bmi1 = settings.add_bool( + "has_bmi1", + "BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]", + false, + ); + let has_bmi2 = settings.add_bool( + "has_bmi2", + "BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]", + false, + ); + + // CPUID.EAX=80000001H:ECX + let has_lzcnt = settings.add_bool( + "has_lzcnt", + "LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]", + false, + ); + + let shared_enable_simd = shared.get_bool("enable_simd"); + + settings.add_predicate("use_ssse3", predicate!(has_ssse3)); + settings.add_predicate("use_sse41", predicate!(has_sse41)); + settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42)); + + settings.add_predicate( + "use_ssse3_simd", + predicate!(shared_enable_simd && has_ssse3), + ); + settings.add_predicate( + "use_sse41_simd", + predicate!(shared_enable_simd && has_sse41), + ); + settings.add_predicate( + "use_sse42_simd", + predicate!(shared_enable_simd && has_sse41 && has_sse42), + ); + + settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42)); + settings.add_predicate("use_bmi1", predicate!(has_bmi1)); + settings.add_predicate("use_lzcnt", predicate!(has_lzcnt)); + + // Some shared boolean values are used in x86 instruction predicates, so we need to group them + // in the same TargetIsa, for compabitibity with code generated by meta-python. + // TODO Once all the meta generation code has been migrated from Python to Rust, we can put it + // back in the shared SettingGroup, and use it in x86 instruction predicates. + + let is_pic = shared.get_bool("is_pic"); + let emit_all_ones_funcaddrs = shared.get_bool("emit_all_ones_funcaddrs"); + settings.add_predicate("is_pic", predicate!(is_pic)); + settings.add_predicate("not_is_pic", predicate!(!is_pic)); + settings.add_predicate( + "all_ones_funcaddrs_and_not_is_pic", + predicate!(emit_all_ones_funcaddrs && !is_pic), + ); + settings.add_predicate( + "not_all_ones_funcaddrs_and_not_is_pic", + predicate!(!emit_all_ones_funcaddrs && !is_pic), + ); + + // Presets corresponding to x86 CPUs. + + settings.add_preset("baseline", preset!()); + let nehalem = settings.add_preset( + "nehalem", + preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt), + ); + let haswell = settings.add_preset( + "haswell", + preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt), + ); + let broadwell = settings.add_preset("broadwell", preset!(haswell)); + let skylake = settings.add_preset("skylake", preset!(broadwell)); + let cannonlake = settings.add_preset("cannonlake", preset!(skylake)); + settings.add_preset("icelake", preset!(cannonlake)); + settings.add_preset( + "znver1", + preset!( + has_sse3 + && has_ssse3 + && has_sse41 + && has_sse42 + && has_popcnt + && has_bmi1 + && has_bmi2 + && has_lzcnt + ), + ); + + settings.build() +} diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs new file mode 100644 index 0000000000..796e2a110d --- /dev/null +++ b/cranelift/codegen/meta/src/lib.rs @@ -0,0 +1,84 @@ +//! This crate generates Rust sources for use by +//! [`cranelift_codegen`](../cranelift_codegen/index.html). +#[macro_use] +mod cdsl; +mod srcgen; + +pub mod error; +pub mod isa; + +mod gen_binemit; +mod gen_encodings; +mod gen_inst; +mod gen_legalizer; +mod gen_registers; +mod gen_settings; +mod gen_types; + +mod default_map; +mod shared; +mod unique_table; + +/// Generate an ISA from an architecture string (e.g. "x86_64"). +pub fn isa_from_arch(arch: &str) -> Result { + isa::Isa::from_arch(arch).ok_or_else(|| format!("no supported isa found for arch `{}`", arch)) +} + +/// Generates all the Rust source files used in Cranelift from the meta-language. +pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> { + // Create all the definitions: + // - common definitions. + let mut shared_defs = shared::define(); + + gen_settings::generate( + &shared_defs.settings, + gen_settings::ParentGroup::None, + "settings.rs", + &out_dir, + )?; + gen_types::generate("types.rs", &out_dir)?; + + // - per ISA definitions. + let isas = isa::define(isas, &mut shared_defs); + + // At this point, all definitions are done. + let all_formats = shared_defs.verify_instruction_formats(); + + // Generate all the code. + gen_inst::generate( + all_formats, + &shared_defs.all_instructions, + "opcodes.rs", + "inst_builder.rs", + &out_dir, + )?; + + gen_legalizer::generate(&isas, &shared_defs.transform_groups, "legalize", &out_dir)?; + + for isa in isas { + gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?; + + gen_settings::generate( + &isa.settings, + gen_settings::ParentGroup::Shared, + &format!("settings-{}.rs", isa.name), + &out_dir, + )?; + + gen_encodings::generate( + &shared_defs, + &isa, + &format!("encoding-{}.rs", isa.name), + &out_dir, + )?; + + gen_binemit::generate( + &isa.name, + &isa.recipes, + &format!("binemit-{}.rs", isa.name), + &out_dir, + )?; + } + + Ok(()) +} diff --git a/cranelift/codegen/meta/src/shared/entities.rs b/cranelift/codegen/meta/src/shared/entities.rs new file mode 100644 index 0000000000..c3f2bc0387 --- /dev/null +++ b/cranelift/codegen/meta/src/shared/entities.rs @@ -0,0 +1,73 @@ +use crate::cdsl::operands::{OperandKind, OperandKindFields}; + +/// Small helper to initialize an OperandBuilder with the right kind, for a given name and doc. +fn new(format_field_name: &'static str, rust_type: &'static str, doc: &'static str) -> OperandKind { + OperandKind::new(format_field_name, rust_type, OperandKindFields::EntityRef).with_doc(doc) +} + +pub(crate) struct EntityRefs { + /// A reference to a basic block in the same function. + /// This is primarliy used in control flow instructions. + pub(crate) block: OperandKind, + + /// A reference to a stack slot declared in the function preamble. + pub(crate) stack_slot: OperandKind, + + /// A reference to a global value. + pub(crate) global_value: OperandKind, + + /// A reference to a function signature declared in the function preamble. + /// This is used to provide the call signature in a call_indirect instruction. + pub(crate) sig_ref: OperandKind, + + /// A reference to an external function declared in the function preamble. + /// This is used to provide the callee and signature in a call instruction. + pub(crate) func_ref: OperandKind, + + /// A reference to a jump table declared in the function preamble. + pub(crate) jump_table: OperandKind, + + /// A reference to a heap declared in the function preamble. + pub(crate) heap: OperandKind, + + /// A reference to a table declared in the function preamble. + pub(crate) table: OperandKind, + + /// A variable-sized list of value operands. Use for Block and function call arguments. + pub(crate) varargs: OperandKind, +} + +impl EntityRefs { + pub fn new() -> Self { + Self { + block: new( + "destination", + "ir::Block", + "a basic block in the same function.", + ), + stack_slot: new("stack_slot", "ir::StackSlot", "A stack slot"), + + global_value: new("global_value", "ir::GlobalValue", "A global value."), + + sig_ref: new("sig_ref", "ir::SigRef", "A function signature."), + + func_ref: new("func_ref", "ir::FuncRef", "An external function."), + + jump_table: new("table", "ir::JumpTable", "A jump table."), + + heap: new("heap", "ir::Heap", "A heap."), + + table: new("table", "ir::Table", "A table."), + + varargs: OperandKind::new("", "&[Value]", OperandKindFields::VariableArgs).with_doc( + r#" + A variable size list of `value` operands. + + Use this to represent arguments passed to a function call, arguments + passed to a basic block, or a variable number of results + returned from an instruction. + "#, + ), + } + } +} diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs new file mode 100644 index 0000000000..03c09e2e2b --- /dev/null +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -0,0 +1,303 @@ +use crate::cdsl::formats::{InstructionFormat, InstructionFormatBuilder as Builder}; +use crate::shared::{entities::EntityRefs, immediates::Immediates}; +use std::rc::Rc; + +pub(crate) struct Formats { + pub(crate) binary: Rc, + pub(crate) binary_imm: Rc, + pub(crate) branch: Rc, + pub(crate) branch_float: Rc, + pub(crate) branch_icmp: Rc, + pub(crate) branch_int: Rc, + pub(crate) branch_table: Rc, + pub(crate) branch_table_base: Rc, + pub(crate) branch_table_entry: Rc, + pub(crate) call: Rc, + pub(crate) call_indirect: Rc, + pub(crate) cond_trap: Rc, + pub(crate) copy_special: Rc, + pub(crate) copy_to_ssa: Rc, + pub(crate) extract_lane: Rc, + pub(crate) float_compare: Rc, + pub(crate) float_cond: Rc, + pub(crate) float_cond_trap: Rc, + pub(crate) func_addr: Rc, + pub(crate) heap_addr: Rc, + pub(crate) indirect_jump: Rc, + pub(crate) insert_lane: Rc, + pub(crate) int_compare: Rc, + pub(crate) int_compare_imm: Rc, + pub(crate) int_cond: Rc, + pub(crate) int_cond_trap: Rc, + pub(crate) int_select: Rc, + pub(crate) jump: Rc, + pub(crate) load: Rc, + pub(crate) load_complex: Rc, + pub(crate) multiary: Rc, + pub(crate) nullary: Rc, + pub(crate) reg_fill: Rc, + pub(crate) reg_move: Rc, + pub(crate) reg_spill: Rc, + pub(crate) shuffle: Rc, + pub(crate) stack_load: Rc, + pub(crate) stack_store: Rc, + pub(crate) store: Rc, + pub(crate) store_complex: Rc, + pub(crate) table_addr: Rc, + pub(crate) ternary: Rc, + pub(crate) trap: Rc, + pub(crate) unary: Rc, + pub(crate) unary_bool: Rc, + pub(crate) unary_const: Rc, + pub(crate) unary_global_value: Rc, + pub(crate) unary_ieee32: Rc, + pub(crate) unary_ieee64: Rc, + pub(crate) unary_imm: Rc, +} + +impl Formats { + pub fn new(imm: &Immediates, entities: &EntityRefs) -> Self { + Self { + unary: Builder::new("Unary").value().build(), + + unary_imm: Builder::new("UnaryImm").imm(&imm.imm64).build(), + + unary_ieee32: Builder::new("UnaryIeee32").imm(&imm.ieee32).build(), + + unary_ieee64: Builder::new("UnaryIeee64").imm(&imm.ieee64).build(), + + unary_bool: Builder::new("UnaryBool").imm(&imm.boolean).build(), + + unary_const: Builder::new("UnaryConst").imm(&imm.pool_constant).build(), + + unary_global_value: Builder::new("UnaryGlobalValue") + .imm(&entities.global_value) + .build(), + + binary: Builder::new("Binary").value().value().build(), + + binary_imm: Builder::new("BinaryImm").value().imm(&imm.imm64).build(), + + // The select instructions are controlled by the second VALUE operand. + // The first VALUE operand is the controlling flag which has a derived type. + // The fma instruction has the same constraint on all inputs. + ternary: Builder::new("Ternary") + .value() + .value() + .value() + .typevar_operand(1) + .build(), + + // Catch-all for instructions with many outputs and inputs and no immediate + // operands. + multiary: Builder::new("MultiAry").varargs().build(), + + nullary: Builder::new("NullAry").build(), + + insert_lane: Builder::new("InsertLane") + .value() + .imm_with_name("lane", &imm.uimm8) + .value() + .build(), + + extract_lane: Builder::new("ExtractLane") + .value() + .imm_with_name("lane", &imm.uimm8) + .build(), + + shuffle: Builder::new("Shuffle") + .value() + .value() + .imm_with_name("mask", &imm.uimm128) + .build(), + + int_compare: Builder::new("IntCompare") + .imm(&imm.intcc) + .value() + .value() + .build(), + + int_compare_imm: Builder::new("IntCompareImm") + .imm(&imm.intcc) + .value() + .imm(&imm.imm64) + .build(), + + int_cond: Builder::new("IntCond").imm(&imm.intcc).value().build(), + + float_compare: Builder::new("FloatCompare") + .imm(&imm.floatcc) + .value() + .value() + .build(), + + float_cond: Builder::new("FloatCond").imm(&imm.floatcc).value().build(), + + int_select: Builder::new("IntSelect") + .imm(&imm.intcc) + .value() + .value() + .value() + .build(), + + jump: Builder::new("Jump").imm(&entities.block).varargs().build(), + + branch: Builder::new("Branch") + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_int: Builder::new("BranchInt") + .imm(&imm.intcc) + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_float: Builder::new("BranchFloat") + .imm(&imm.floatcc) + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_icmp: Builder::new("BranchIcmp") + .imm(&imm.intcc) + .value() + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_table: Builder::new("BranchTable") + .value() + .imm(&entities.block) + .imm(&entities.jump_table) + .build(), + + branch_table_entry: Builder::new("BranchTableEntry") + .value() + .value() + .imm(&imm.uimm8) + .imm(&entities.jump_table) + .build(), + + branch_table_base: Builder::new("BranchTableBase") + .imm(&entities.jump_table) + .build(), + + indirect_jump: Builder::new("IndirectJump") + .value() + .imm(&entities.jump_table) + .build(), + + call: Builder::new("Call") + .imm(&entities.func_ref) + .varargs() + .build(), + + call_indirect: Builder::new("CallIndirect") + .imm(&entities.sig_ref) + .value() + .varargs() + .build(), + + func_addr: Builder::new("FuncAddr").imm(&entities.func_ref).build(), + + load: Builder::new("Load") + .imm(&imm.memflags) + .value() + .imm(&imm.offset32) + .build(), + + load_complex: Builder::new("LoadComplex") + .imm(&imm.memflags) + .varargs() + .imm(&imm.offset32) + .build(), + + store: Builder::new("Store") + .imm(&imm.memflags) + .value() + .value() + .imm(&imm.offset32) + .build(), + + store_complex: Builder::new("StoreComplex") + .imm(&imm.memflags) + .value() + .varargs() + .imm(&imm.offset32) + .build(), + + stack_load: Builder::new("StackLoad") + .imm(&entities.stack_slot) + .imm(&imm.offset32) + .build(), + + stack_store: Builder::new("StackStore") + .value() + .imm(&entities.stack_slot) + .imm(&imm.offset32) + .build(), + + // Accessing a WebAssembly heap. + heap_addr: Builder::new("HeapAddr") + .imm(&entities.heap) + .value() + .imm(&imm.uimm32) + .build(), + + // Accessing a WebAssembly table. + table_addr: Builder::new("TableAddr") + .imm(&entities.table) + .value() + .imm(&imm.offset32) + .build(), + + reg_move: Builder::new("RegMove") + .value() + .imm_with_name("src", &imm.regunit) + .imm_with_name("dst", &imm.regunit) + .build(), + + copy_special: Builder::new("CopySpecial") + .imm_with_name("src", &imm.regunit) + .imm_with_name("dst", &imm.regunit) + .build(), + + copy_to_ssa: Builder::new("CopyToSsa") + .imm_with_name("src", &imm.regunit) + .build(), + + reg_spill: Builder::new("RegSpill") + .value() + .imm_with_name("src", &imm.regunit) + .imm_with_name("dst", &entities.stack_slot) + .build(), + + reg_fill: Builder::new("RegFill") + .value() + .imm_with_name("src", &entities.stack_slot) + .imm_with_name("dst", &imm.regunit) + .build(), + + trap: Builder::new("Trap").imm(&imm.trapcode).build(), + + cond_trap: Builder::new("CondTrap").value().imm(&imm.trapcode).build(), + + int_cond_trap: Builder::new("IntCondTrap") + .imm(&imm.intcc) + .value() + .imm(&imm.trapcode) + .build(), + + float_cond_trap: Builder::new("FloatCondTrap") + .imm(&imm.floatcc) + .value() + .imm(&imm.trapcode) + .build(), + } + } +} diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs new file mode 100644 index 0000000000..d8382e4067 --- /dev/null +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -0,0 +1,161 @@ +use crate::cdsl::operands::{EnumValues, OperandKind, OperandKindFields}; + +use std::collections::HashMap; + +pub(crate) struct Immediates { + /// A 64-bit immediate integer operand. + /// + /// This type of immediate integer can interact with SSA values with any IntType type. + pub imm64: OperandKind, + + /// An unsigned 8-bit immediate integer operand. + /// + /// This small operand is used to indicate lane indexes in SIMD vectors and immediate bit + /// counts on shift instructions. + pub uimm8: OperandKind, + + /// An unsigned 32-bit immediate integer operand. + pub uimm32: OperandKind, + + /// An unsigned 128-bit immediate integer operand. + /// + /// This operand is used to pass entire 128-bit vectors as immediates to instructions like + /// const. + pub uimm128: OperandKind, + + /// A constant stored in the constant pool. + /// + /// This operand is used to pass constants to instructions like vconst while storing the + /// actual bytes in the constant pool. + pub pool_constant: OperandKind, + + /// A 32-bit immediate signed offset. + /// + /// This is used to represent an immediate address offset in load/store instructions. + pub offset32: OperandKind, + + /// A 32-bit immediate floating point operand. + /// + /// IEEE 754-2008 binary32 interchange format. + pub ieee32: OperandKind, + + /// A 64-bit immediate floating point operand. + /// + /// IEEE 754-2008 binary64 interchange format. + pub ieee64: OperandKind, + + /// An immediate boolean operand. + /// + /// This type of immediate boolean can interact with SSA values with any BoolType type. + pub boolean: OperandKind, + + /// A condition code for comparing integer values. + /// + /// This enumerated operand kind is used for the `icmp` instruction and corresponds to the + /// condcodes::IntCC` Rust type. + pub intcc: OperandKind, + + /// A condition code for comparing floating point values. + /// + /// This enumerated operand kind is used for the `fcmp` instruction and corresponds to the + /// `condcodes::FloatCC` Rust type. + pub floatcc: OperandKind, + + /// Flags for memory operations like `load` and `store`. + pub memflags: OperandKind, + + /// A register unit in the current target ISA. + pub regunit: OperandKind, + + /// A trap code indicating the reason for trapping. + /// + /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes. + pub trapcode: OperandKind, +} + +fn new_imm(format_field_name: &'static str, rust_type: &'static str) -> OperandKind { + OperandKind::new(format_field_name, rust_type, OperandKindFields::ImmValue) +} +fn new_enum( + format_field_name: &'static str, + rust_type: &'static str, + values: EnumValues, +) -> OperandKind { + OperandKind::new( + format_field_name, + rust_type, + OperandKindFields::ImmEnum(values), + ) +} + +impl Immediates { + pub fn new() -> Self { + Self { + imm64: new_imm("imm", "ir::immediates::Imm64").with_doc("A 64-bit immediate integer."), + uimm8: new_imm("imm", "ir::immediates::Uimm8") + .with_doc("An 8-bit immediate unsigned integer."), + uimm32: new_imm("imm", "ir::immediates::Uimm32") + .with_doc("A 32-bit immediate unsigned integer."), + uimm128: new_imm("imm", "ir::Immediate") + .with_doc("A 128-bit immediate unsigned integer."), + pool_constant: new_imm("constant_handle", "ir::Constant") + .with_doc("A constant stored in the constant pool."), + offset32: new_imm("offset", "ir::immediates::Offset32") + .with_doc("A 32-bit immediate signed offset."), + ieee32: new_imm("imm", "ir::immediates::Ieee32") + .with_doc("A 32-bit immediate floating point number."), + ieee64: new_imm("imm", "ir::immediates::Ieee64") + .with_doc("A 64-bit immediate floating point number."), + boolean: new_imm("imm", "bool").with_doc("An immediate boolean."), + intcc: { + let mut intcc_values = HashMap::new(); + intcc_values.insert("eq", "Equal"); + intcc_values.insert("ne", "NotEqual"); + intcc_values.insert("sge", "SignedGreaterThanOrEqual"); + intcc_values.insert("sgt", "SignedGreaterThan"); + intcc_values.insert("sle", "SignedLessThanOrEqual"); + intcc_values.insert("slt", "SignedLessThan"); + intcc_values.insert("uge", "UnsignedGreaterThanOrEqual"); + intcc_values.insert("ugt", "UnsignedGreaterThan"); + intcc_values.insert("ule", "UnsignedLessThanOrEqual"); + intcc_values.insert("ult", "UnsignedLessThan"); + intcc_values.insert("of", "Overflow"); + intcc_values.insert("nof", "NotOverflow"); + new_enum("cond", "ir::condcodes::IntCC", intcc_values) + .with_doc("An integer comparison condition code.") + }, + + floatcc: { + let mut floatcc_values = HashMap::new(); + floatcc_values.insert("ord", "Ordered"); + floatcc_values.insert("uno", "Unordered"); + floatcc_values.insert("eq", "Equal"); + floatcc_values.insert("ne", "NotEqual"); + floatcc_values.insert("one", "OrderedNotEqual"); + floatcc_values.insert("ueq", "UnorderedOrEqual"); + floatcc_values.insert("lt", "LessThan"); + floatcc_values.insert("le", "LessThanOrEqual"); + floatcc_values.insert("gt", "GreaterThan"); + floatcc_values.insert("ge", "GreaterThanOrEqual"); + floatcc_values.insert("ult", "UnorderedOrLessThan"); + floatcc_values.insert("ule", "UnorderedOrLessThanOrEqual"); + floatcc_values.insert("ugt", "UnorderedOrGreaterThan"); + floatcc_values.insert("uge", "UnorderedOrGreaterThanOrEqual"); + new_enum("cond", "ir::condcodes::FloatCC", floatcc_values) + .with_doc("A floating point comparison condition code") + }, + + memflags: new_imm("flags", "ir::MemFlags").with_doc("Memory operation flags"), + regunit: new_imm("regunit", "isa::RegUnit") + .with_doc("A register unit in the target ISA"), + trapcode: { + let mut trapcode_values = HashMap::new(); + trapcode_values.insert("stk_ovf", "StackOverflow"); + trapcode_values.insert("heap_oob", "HeapOutOfBounds"); + trapcode_values.insert("int_ovf", "IntegerOverflow"); + trapcode_values.insert("int_divz", "IntegerDivisionByZero"); + new_enum("code", "ir::TrapCode", trapcode_values).with_doc("A trap reason code.") + }, + } + } +} diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs new file mode 100644 index 0000000000..3e74da1b00 --- /dev/null +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -0,0 +1,3843 @@ +#![allow(non_snake_case)] + +use crate::cdsl::instructions::{ + AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, +}; +use crate::cdsl::operands::Operand; +use crate::cdsl::type_inference::Constraint::WiderOrEq; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; +use crate::shared::formats::Formats; +use crate::shared::types; +use crate::shared::{entities::EntityRefs, immediates::Immediates}; + +#[inline(never)] +fn define_control_flow( + ig: &mut InstructionGroupBuilder, + formats: &Formats, + imm: &Immediates, + entities: &EntityRefs, +) { + let block = &Operand::new("block", &entities.block).with_doc("Destination basic block"); + let args = &Operand::new("args", &entities.varargs).with_doc("block arguments"); + + ig.push( + Inst::new( + "jump", + r#" + Jump. + + Unconditionally jump to a basic block, passing the specified + block arguments. The number and types of arguments must match the + destination block. + "#, + &formats.jump, + ) + .operands_in(vec![block, args]) + .is_terminator(true) + .is_branch(true), + ); + + ig.push( + Inst::new( + "fallthrough", + r#" + Fall through to the next block. + + This is the same as `jump`, except the destination block must be + the next one in the layout. + + Jumps are turned into fall-through instructions by the branch + relaxation pass. There is no reason to use this instruction outside + that pass. + "#, + &formats.jump, + ) + .operands_in(vec![block, args]) + .is_terminator(true) + .is_branch(true), + ); + + let Testable = &TypeVar::new( + "Testable", + "A scalar boolean or integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .build(), + ); + + { + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + + ig.push( + Inst::new( + "brz", + r#" + Branch when zero. + + If ``c`` is a `b1` value, take the branch when ``c`` is false. If + ``c`` is an integer value, take the branch when ``c = 0``. + "#, + &formats.branch, + ) + .operands_in(vec![c, block, args]) + .is_branch(true), + ); + + ig.push( + Inst::new( + "brnz", + r#" + Branch when non-zero. + + If ``c`` is a `b1` value, take the branch when ``c`` is true. If + ``c`` is an integer value, take the branch when ``c != 0``. + "#, + &formats.branch, + ) + .operands_in(vec![c, block, args]) + .is_branch(true), + ); + } + + let iB = &TypeVar::new( + "iB", + "A scalar integer type", + TypeSetBuilder::new().ints(Interval::All).build(), + ); + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into(); + + { + let Cond = &Operand::new("Cond", &imm.intcc); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + ig.push( + Inst::new( + "br_icmp", + r#" + Compare scalar integers and branch. + + Compare ``x`` and ``y`` in the same way as the `icmp` instruction + and take the branch if the condition is true: + + ```text + br_icmp ugt v1, v2, block4(v5, v6) + ``` + + is semantically equivalent to: + + ```text + v10 = icmp ugt, v1, v2 + brnz v10, block4(v5, v6) + ``` + + Some RISC architectures like MIPS and RISC-V provide instructions that + implement all or some of the condition codes. The instruction can also + be used to represent *macro-op fusion* on architectures like Intel's. + "#, + &formats.branch_icmp, + ) + .operands_in(vec![Cond, x, y, block, args]) + .is_branch(true), + ); + + let f = &Operand::new("f", iflags); + + ig.push( + Inst::new( + "brif", + r#" + Branch when condition is true in integer CPU flags. + "#, + &formats.branch_int, + ) + .operands_in(vec![Cond, f, block, args]) + .is_branch(true), + ); + } + + { + let Cond = &Operand::new("Cond", &imm.floatcc); + + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "brff", + r#" + Branch when condition is true in floating point CPU flags. + "#, + &formats.branch_float, + ) + .operands_in(vec![Cond, f, block, args]) + .is_branch(true), + ); + } + + { + let x = &Operand::new("x", iB).with_doc("index into jump table"); + let JT = &Operand::new("JT", &entities.jump_table); + + ig.push( + Inst::new( + "br_table", + r#" + Indirect branch via jump table. + + Use ``x`` as an unsigned index into the jump table ``JT``. If a jump + table entry is found, branch to the corresponding block. If no entry was + found or the index is out-of-bounds, branch to the given default block. + + Note that this branch instruction can't pass arguments to the targeted + blocks. Split critical edges as needed to work around this. + + Do not confuse this with "tables" in WebAssembly. ``br_table`` is for + jump tables with destinations within the current function only -- think + of a ``match`` in Rust or a ``switch`` in C. If you want to call a + function in a dynamic library, that will typically use + ``call_indirect``. + "#, + &formats.branch_table, + ) + .operands_in(vec![x, block, JT]) + .is_terminator(true) + .is_branch(true), + ); + } + + let iAddr = &TypeVar::new( + "iAddr", + "An integer address type", + TypeSetBuilder::new().ints(32..64).build(), + ); + + { + let x = &Operand::new("x", iAddr).with_doc("index into jump table"); + let addr = &Operand::new("addr", iAddr); + let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes"); + let JT = &Operand::new("JT", &entities.jump_table); + let entry = &Operand::new("entry", iAddr).with_doc("entry of jump table"); + + ig.push( + Inst::new( + "jump_table_entry", + r#" + Get an entry from a jump table. + + Load a serialized ``entry`` from a jump table ``JT`` at a given index + ``addr`` with a specific ``Size``. The retrieved entry may need to be + decoded after loading, depending upon the jump table type used. + + Currently, the only type supported is entries which are relative to the + base of the jump table. + "#, + &formats.branch_table_entry, + ) + .operands_in(vec![x, addr, Size, JT]) + .operands_out(vec![entry]) + .can_load(true), + ); + + ig.push( + Inst::new( + "jump_table_base", + r#" + Get the absolute base address of a jump table. + + This is used for jump tables wherein the entries are stored relative to + the base of jump table. In order to use these, generated code should first + load an entry using ``jump_table_entry``, then use this instruction to add + the relative base back to it. + "#, + &formats.branch_table_base, + ) + .operands_in(vec![JT]) + .operands_out(vec![addr]), + ); + + ig.push( + Inst::new( + "indirect_jump_table_br", + r#" + Branch indirectly via a jump table entry. + + Unconditionally jump via a jump table entry that was previously loaded + with the ``jump_table_entry`` instruction. + "#, + &formats.indirect_jump, + ) + .operands_in(vec![addr, JT]) + .is_indirect_branch(true) + .is_terminator(true) + .is_branch(true), + ); + } + + ig.push( + Inst::new( + "debugtrap", + r#" + Encodes an assembly debug trap. + "#, + &formats.nullary, + ) + .other_side_effects(true) + .can_load(true) + .can_store(true), + ); + + { + let code = &Operand::new("code", &imm.trapcode); + ig.push( + Inst::new( + "trap", + r#" + Terminate execution unconditionally. + "#, + &formats.trap, + ) + .operands_in(vec![code]) + .can_trap(true) + .is_terminator(true), + ); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "trapz", + r#" + Trap when zero. + + if ``c`` is non-zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "resumable_trap", + r#" + A resumable trap. + + This instruction allows non-conditional traps to be used as non-terminal instructions. + "#, + &formats.trap, + ) + .operands_in(vec![code]) + .can_trap(true), + ); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "trapnz", + r#" + Trap when non-zero. + + if ``c`` is zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + let Cond = &Operand::new("Cond", &imm.intcc); + let f = &Operand::new("f", iflags); + ig.push( + Inst::new( + "trapif", + r#" + Trap when condition is true in integer CPU flags. + "#, + &formats.int_cond_trap, + ) + .operands_in(vec![Cond, f, code]) + .can_trap(true), + ); + + let Cond = &Operand::new("Cond", &imm.floatcc); + let f = &Operand::new("f", fflags); + let code = &Operand::new("code", &imm.trapcode); + ig.push( + Inst::new( + "trapff", + r#" + Trap when condition is true in floating point CPU flags. + "#, + &formats.float_cond_trap, + ) + .operands_in(vec![Cond, f, code]) + .can_trap(true), + ); + } + + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "return", + r#" + Return from the function. + + Unconditionally transfer control to the calling function, passing the + provided return values. The list of return values must match the + function signature's return types. + "#, + &formats.multiary, + ) + .operands_in(vec![rvals]) + .is_return(true) + .is_terminator(true), + ); + + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "fallthrough_return", + r#" + Return from the function by fallthrough. + + This is a specialized instruction for use where one wants to append + a custom epilogue, which will then perform the real return. This + instruction has no encoding. + "#, + &formats.multiary, + ) + .operands_in(vec![rvals]) + .is_return(true) + .is_terminator(true), + ); + + let FN = &Operand::new("FN", &entities.func_ref) + .with_doc("function to call, declared by `function`"); + let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "call", + r#" + Direct function call. + + Call a function which has been declared in the preamble. The argument + types must match the function's signature. + "#, + &formats.call, + ) + .operands_in(vec![FN, args]) + .operands_out(vec![rvals]) + .is_call(true), + ); + + let SIG = &Operand::new("SIG", &entities.sig_ref).with_doc("function signature"); + let callee = &Operand::new("callee", iAddr).with_doc("address of function to call"); + let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "call_indirect", + r#" + Indirect function call. + + Call the function pointed to by `callee` with the given arguments. The + called function must match the specified signature. + + Note that this is different from WebAssembly's ``call_indirect``; the + callee is a native address, rather than a table index. For WebAssembly, + `table_addr` and `load` are used to obtain a native address + from a table. + "#, + &formats.call_indirect, + ) + .operands_in(vec![SIG, callee, args]) + .operands_out(vec![rvals]) + .is_call(true), + ); + + let FN = &Operand::new("FN", &entities.func_ref) + .with_doc("function to call, declared by `function`"); + let addr = &Operand::new("addr", iAddr); + ig.push( + Inst::new( + "func_addr", + r#" + Get the address of a function. + + Compute the absolute address of a function declared in the preamble. + The returned address can be used as a ``callee`` argument to + `call_indirect`. This is also a method for calling functions that + are too far away to be addressable by a direct `call` + instruction. + "#, + &formats.func_addr, + ) + .operands_in(vec![FN]) + .operands_out(vec![addr]), + ); +} + +#[inline(never)] +fn define_simd_arithmetic( + ig: &mut InstructionGroupBuilder, + formats: &Formats, + _: &Immediates, + _: &EntityRefs, +) { + let Int = &TypeVar::new( + "Int", + "A scalar or vector integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let a = &Operand::new("a", Int); + let x = &Operand::new("x", Int); + let y = &Operand::new("y", Int); + + ig.push( + Inst::new( + "imin", + r#" + Signed integer minimum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "umin", + r#" + Unsigned integer minimum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "imax", + r#" + Signed integer maximum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "umax", + r#" + Unsigned integer maximum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let IxN = &TypeVar::new( + "IxN", + "A SIMD vector type containing integers", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + + let a = &Operand::new("a", IxN); + let x = &Operand::new("x", IxN); + let y = &Operand::new("y", IxN); + + ig.push( + Inst::new( + "avg_round", + r#" + Unsigned average with rounding: `a := (x + y + 1) // 2` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); +} + +#[allow(clippy::many_single_char_names)] +pub(crate) fn define( + all_instructions: &mut AllInstructions, + formats: &Formats, + imm: &Immediates, + entities: &EntityRefs, +) -> InstructionGroup { + let mut ig = InstructionGroupBuilder::new(all_instructions); + + define_control_flow(&mut ig, formats, imm, entities); + define_simd_arithmetic(&mut ig, formats, imm, entities); + + // Operand kind shorthands. + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into(); + + let b1: &TypeVar = &ValueType::from(LaneType::from(types::Bool::B1)).into(); + let f32_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F32)).into(); + let f64_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F64)).into(); + + // Starting definitions. + let Int = &TypeVar::new( + "Int", + "A scalar or vector integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let Bool = &TypeVar::new( + "Bool", + "A scalar or vector boolean type", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let iB = &TypeVar::new( + "iB", + "A scalar integer type", + TypeSetBuilder::new().ints(Interval::All).build(), + ); + + let iAddr = &TypeVar::new( + "iAddr", + "An integer address type", + TypeSetBuilder::new().ints(32..64).build(), + ); + + let Ref = &TypeVar::new( + "Ref", + "A scalar reference type", + TypeSetBuilder::new().refs(Interval::All).build(), + ); + + let Testable = &TypeVar::new( + "Testable", + "A scalar boolean or integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .build(), + ); + + let TxN = &TypeVar::new( + "TxN", + "A SIMD vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let Any = &TypeVar::new( + "Any", + "Any integer, float, boolean, or reference scalar or vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .refs(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(true) + .build(), + ); + + let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string()); + + let Mem = &TypeVar::new( + "Mem", + "Any type that can be stored in memory", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .simd_lanes(Interval::All) + .refs(Interval::All) + .build(), + ); + + let MemTo = &TypeVar::copy_from(Mem, "MemTo".to_string()); + + let addr = &Operand::new("addr", iAddr); + + let SS = &Operand::new("SS", &entities.stack_slot); + let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address"); + let x = &Operand::new("x", Mem).with_doc("Value to be stored"); + let a = &Operand::new("a", Mem).with_doc("Value loaded"); + let p = &Operand::new("p", iAddr); + let MemFlags = &Operand::new("MemFlags", &imm.memflags); + let args = &Operand::new("args", &entities.varargs).with_doc("Address arguments"); + + ig.push( + Inst::new( + "load", + r#" + Load from memory at ``p + Offset``. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "load_complex", + r#" + Load from memory at ``sum(args) + Offset``. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "store", + r#" + Store ``x`` to memory at ``p + Offset``. + + This is a polymorphic instruction that can store any value type with a + memory representation. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "store_complex", + r#" + Store ``x`` to memory at ``sum(args) + Offset``. + + This is a polymorphic instruction that can store any value type with a + memory representation. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let iExt8 = &TypeVar::new( + "iExt8", + "An integer type with more than 8 bits", + TypeSetBuilder::new().ints(16..64).build(), + ); + let x = &Operand::new("x", iExt8); + let a = &Operand::new("a", iExt8); + + ig.push( + Inst::new( + "uload8", + r#" + Load 8 bits from memory at ``p + Offset`` and zero-extend. + + This is equivalent to ``load.i8`` followed by ``uextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload8_complex", + r#" + Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i8`` followed by ``uextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload8", + r#" + Load 8 bits from memory at ``p + Offset`` and sign-extend. + + This is equivalent to ``load.i8`` followed by ``sextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload8_complex", + r#" + Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i8`` followed by ``sextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "istore8", + r#" + Store the low 8 bits of ``x`` to memory at ``p + Offset``. + + This is equivalent to ``ireduce.i8`` followed by ``store.i8``. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "istore8_complex", + r#" + Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i8`` followed by ``store.i8``. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let iExt16 = &TypeVar::new( + "iExt16", + "An integer type with more than 16 bits", + TypeSetBuilder::new().ints(32..64).build(), + ); + let x = &Operand::new("x", iExt16); + let a = &Operand::new("a", iExt16); + + ig.push( + Inst::new( + "uload16", + r#" + Load 16 bits from memory at ``p + Offset`` and zero-extend. + + This is equivalent to ``load.i16`` followed by ``uextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload16_complex", + r#" + Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i16`` followed by ``uextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload16", + r#" + Load 16 bits from memory at ``p + Offset`` and sign-extend. + + This is equivalent to ``load.i16`` followed by ``sextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload16_complex", + r#" + Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i16`` followed by ``sextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "istore16", + r#" + Store the low 16 bits of ``x`` to memory at ``p + Offset``. + + This is equivalent to ``ireduce.i16`` followed by ``store.i16``. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "istore16_complex", + r#" + Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i16`` followed by ``store.i16``. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let iExt32 = &TypeVar::new( + "iExt32", + "An integer type with more than 32 bits", + TypeSetBuilder::new().ints(64..64).build(), + ); + let x = &Operand::new("x", iExt32); + let a = &Operand::new("a", iExt32); + + ig.push( + Inst::new( + "uload32", + r#" + Load 32 bits from memory at ``p + Offset`` and zero-extend. + + This is equivalent to ``load.i32`` followed by ``uextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload32_complex", + r#" + Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i32`` followed by ``uextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload32", + r#" + Load 32 bits from memory at ``p + Offset`` and sign-extend. + + This is equivalent to ``load.i32`` followed by ``sextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload32_complex", + r#" + Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i32`` followed by ``sextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "istore32", + r#" + Store the low 32 bits of ``x`` to memory at ``p + Offset``. + + This is equivalent to ``ireduce.i32`` followed by ``store.i32``. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "istore32_complex", + r#" + Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i32`` followed by ``store.i32``. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let x = &Operand::new("x", Mem).with_doc("Value to be stored"); + let a = &Operand::new("a", Mem).with_doc("Value loaded"); + let Offset = + &Operand::new("Offset", &imm.offset32).with_doc("In-bounds offset into stack slot"); + + ig.push( + Inst::new( + "stack_load", + r#" + Load a value from a stack slot at the constant offset. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + + The offset is an immediate constant, not an SSA value. The memory + access cannot go out of bounds, i.e. + `sizeof(a) + Offset <= sizeof(SS)`. + "#, + &formats.stack_load, + ) + .operands_in(vec![SS, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "stack_store", + r#" + Store a value to a stack slot at a constant offset. + + This is a polymorphic instruction that can store any value type with a + memory representation. + + The offset is an immediate constant, not an SSA value. The memory + access cannot go out of bounds, i.e. + `sizeof(a) + Offset <= sizeof(SS)`. + "#, + &formats.stack_store, + ) + .operands_in(vec![x, SS, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "stack_addr", + r#" + Get the address of a stack slot. + + Compute the absolute address of a byte in a stack slot. The offset must + refer to a byte inside the stack slot: + `0 <= Offset < sizeof(SS)`. + "#, + &formats.stack_load, + ) + .operands_in(vec![SS, Offset]) + .operands_out(vec![addr]), + ); + + let GV = &Operand::new("GV", &entities.global_value); + + ig.push( + Inst::new( + "global_value", + r#" + Compute the value of global GV. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "symbol_value", + r#" + Compute the value of global GV, which is a symbolic value. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "tls_value", + r#" + Compute the value of global GV, which is a TLS (thread local storage) value. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![a]), + ); + + let HeapOffset = &TypeVar::new( + "HeapOffset", + "An unsigned heap offset", + TypeSetBuilder::new().ints(32..64).build(), + ); + + let H = &Operand::new("H", &entities.heap); + let p = &Operand::new("p", HeapOffset); + let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes"); + + ig.push( + Inst::new( + "heap_addr", + r#" + Bounds check and compute absolute address of heap memory. + + Verify that the offset range ``p .. p + Size - 1`` is in bounds for the + heap H, and generate an absolute address that is safe to dereference. + + 1. If ``p + Size`` is not greater than the heap bound, return an + absolute address corresponding to a byte offset of ``p`` from the + heap's base address. + 2. If ``p + Size`` is greater than the heap bound, generate a trap. + "#, + &formats.heap_addr, + ) + .operands_in(vec![H, p, Size]) + .operands_out(vec![addr]), + ); + + // Note this instruction is marked as having other side-effects, so GVN won't try to hoist it, + // which would result in it being subject to spilling. While not hoisting would generally hurt + // performance, since a computed value used many times may need to be regenerated before each + // use, it is not the case here: this instruction doesn't generate any code. That's because, + // by definition the pinned register is never used by the register allocator, but is written to + // and read explicitly and exclusively by set_pinned_reg and get_pinned_reg. + ig.push( + Inst::new( + "get_pinned_reg", + r#" + Gets the content of the pinned register, when it's enabled. + "#, + &formats.nullary, + ) + .operands_out(vec![addr]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "set_pinned_reg", + r#" + Sets the content of the pinned register, when it's enabled. + "#, + &formats.unary, + ) + .operands_in(vec![addr]) + .other_side_effects(true), + ); + + let TableOffset = &TypeVar::new( + "TableOffset", + "An unsigned table offset", + TypeSetBuilder::new().ints(32..64).build(), + ); + let T = &Operand::new("T", &entities.table); + let p = &Operand::new("p", TableOffset); + let Offset = + &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from element address"); + + ig.push( + Inst::new( + "table_addr", + r#" + Bounds check and compute absolute address of a table entry. + + Verify that the offset ``p`` is in bounds for the table T, and generate + an absolute address that is safe to dereference. + + ``Offset`` must be less than the size of a table element. + + 1. If ``p`` is not greater than the table bound, return an absolute + address corresponding to a byte offset of ``p`` from the table's + base address. + 2. If ``p`` is greater than the table bound, generate a trap. + "#, + &formats.table_addr, + ) + .operands_in(vec![T, p, Offset]) + .operands_out(vec![addr]), + ); + + let N = &Operand::new("N", &imm.imm64); + let a = &Operand::new("a", Int).with_doc("A constant integer scalar or vector value"); + + ig.push( + Inst::new( + "iconst", + r#" + Integer constant. + + Create a scalar integer SSA value with an immediate constant value, or + an integer vector where all the lanes have the same value. + "#, + &formats.unary_imm, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.ieee32); + let a = &Operand::new("a", f32_).with_doc("A constant f32 scalar value"); + + ig.push( + Inst::new( + "f32const", + r#" + Floating point constant. + + Create a `f32` SSA value with an immediate constant value. + "#, + &formats.unary_ieee32, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.ieee64); + let a = &Operand::new("a", f64_).with_doc("A constant f64 scalar value"); + + ig.push( + Inst::new( + "f64const", + r#" + Floating point constant. + + Create a `f64` SSA value with an immediate constant value. + "#, + &formats.unary_ieee64, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.boolean); + let a = &Operand::new("a", Bool).with_doc("A constant boolean scalar or vector value"); + + ig.push( + Inst::new( + "bconst", + r#" + Boolean constant. + + Create a scalar boolean SSA value with an immediate constant value, or + a boolean vector where all the lanes have the same value. + "#, + &formats.unary_bool, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.pool_constant) + .with_doc("The 16 immediate bytes of a 128-bit vector"); + let a = &Operand::new("a", TxN).with_doc("A constant vector value"); + + ig.push( + Inst::new( + "vconst", + r#" + SIMD vector constant. + + Construct a vector with the given immediate bytes. + "#, + &formats.unary_const, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let mask = &Operand::new("mask", &imm.uimm128) + .with_doc("The 16 immediate bytes used for selecting the elements to shuffle"); + let Tx16 = &TypeVar::new( + "Tx16", + "A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \ + lane counts and widths", + TypeSetBuilder::new() + .ints(8..8) + .bools(8..8) + .simd_lanes(16..16) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", Tx16).with_doc("A vector value"); + let b = &Operand::new("b", Tx16).with_doc("A vector value"); + + ig.push( + Inst::new( + "shuffle", + r#" + SIMD vector shuffle. + + Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the + immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of + 16-31 selects the (i-16)th element of the second vector. Immediate values outside of the + 0-31 range place a 0 in the resulting vector lane. + "#, + &formats.shuffle, + ) + .operands_in(vec![a, b, mask]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Ref).with_doc("A constant reference null value"); + + ig.push( + Inst::new( + "null", + r#" + Null constant value for reference types. + + Create a scalar reference SSA value with a constant null value. + "#, + &formats.nullary, + ) + .operands_out(vec![a]), + ); + + ig.push(Inst::new( + "nop", + r#" + Just a dummy instruction. + + Note: this doesn't compile to a machine code nop. + "#, + &formats.nullary, + )); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + let x = &Operand::new("x", Any).with_doc("Value to use when `c` is true"); + let y = &Operand::new("y", Any).with_doc("Value to use when `c` is false"); + let a = &Operand::new("a", Any); + + ig.push( + Inst::new( + "select", + r#" + Conditional select. + + This instruction selects whole values. Use `vselect` for + lane-wise selection. + "#, + &formats.ternary, + ) + .operands_in(vec![c, x, y]) + .operands_out(vec![a]), + ); + + let cc = &Operand::new("cc", &imm.intcc).with_doc("Controlling condition code"); + let flags = &Operand::new("flags", iflags).with_doc("The machine's flag register"); + + ig.push( + Inst::new( + "selectif", + r#" + Conditional select, dependent on integer condition codes. + "#, + &formats.int_select, + ) + .operands_in(vec![cc, flags, x, y]) + .operands_out(vec![a]), + ); + + let c = &Operand::new("c", Any).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "bitselect", + r#" + Conditional select of bits. + + For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit + in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also: + `select`, `vselect`. + "#, + &formats.ternary, + ) + .operands_in(vec![c, x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Any); + + ig.push( + Inst::new( + "copy", + r#" + Register-register copy. + + This instruction copies its input, preserving the value type. + + A pure SSA-form program does not need to copy values, but this + instruction is useful for representing intermediate stages during + instruction transformations, and the register allocator needs a way of + representing register copies. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "spill", + r#" + Spill a register value to a stack slot. + + This instruction behaves exactly like `copy`, but the result + value is assigned to a spill slot. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_store(true), + ); + + ig.push( + Inst::new( + "fill", + r#" + Load a register value from a stack slot. + + This instruction behaves exactly like `copy`, but creates a new + SSA value for the spilled input value. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "fill_nop", + r#" + This is identical to `fill`, except it has no encoding, since it is a no-op. + + This instruction is created only during late-stage redundant-reload removal, after all + registers and stack slots have been assigned. It is used to replace `fill`s that have + been identified as redundant. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_load(true), + ); + + let src = &Operand::new("src", &imm.regunit); + let dst = &Operand::new("dst", &imm.regunit); + + ig.push( + Inst::new( + "regmove", + r#" + Temporarily divert ``x`` from ``src`` to ``dst``. + + This instruction moves the location of a value from one register to + another without creating a new SSA value. It is used by the register + allocator to temporarily rearrange register assignments in order to + satisfy instruction constraints. + + The register diversions created by this instruction must be undone + before the value leaves the block. At the entry to a new block, all live + values must be in their originally assigned registers. + "#, + &formats.reg_move, + ) + .operands_in(vec![x, src, dst]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "copy_special", + r#" + Copies the contents of ''src'' register to ''dst'' register. + + This instructions copies the contents of one register to another + register without involving any SSA values. This is used for copying + special registers, e.g. copying the stack register to the frame + register in a function prologue. + "#, + &formats.copy_special, + ) + .operands_in(vec![src, dst]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "copy_to_ssa", + r#" + Copies the contents of ''src'' register to ''a'' SSA name. + + This instruction copies the contents of one register, regardless of its SSA name, to + another register, creating a new SSA name. In that sense it is a one-sided version + of ''copy_special''. This instruction is internal and should not be created by + Cranelift users. + "#, + &formats.copy_to_ssa, + ) + .operands_in(vec![src]) + .operands_out(vec![a]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "copy_nop", + r#" + Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + into a no-op. This instruction is for use only within Cranelift itself. + + This instruction copies its input, preserving the value type. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let delta = &Operand::new("delta", Int); + + ig.push( + Inst::new( + "adjust_sp_down", + r#" + Subtracts ``delta`` offset value from the stack pointer register. + + This instruction is used to adjust the stack pointer by a dynamic amount. + "#, + &formats.unary, + ) + .operands_in(vec![delta]) + .other_side_effects(true), + ); + + let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer"); + + ig.push( + Inst::new( + "adjust_sp_up_imm", + r#" + Adds ``Offset`` immediate offset value to the stack pointer register. + + This instruction is used to adjust the stack pointer, primarily in function + prologues and epilogues. ``Offset`` is constrained to the size of a signed + 32-bit integer. + "#, + &formats.unary_imm, + ) + .operands_in(vec![Offset]) + .other_side_effects(true), + ); + + let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer"); + + ig.push( + Inst::new( + "adjust_sp_down_imm", + r#" + Subtracts ``Offset`` immediate offset value from the stack pointer + register. + + This instruction is used to adjust the stack pointer, primarily in function + prologues and epilogues. ``Offset`` is constrained to the size of a signed + 32-bit integer. + "#, + &formats.unary_imm, + ) + .operands_in(vec![Offset]) + .other_side_effects(true), + ); + + let f = &Operand::new("f", iflags); + + ig.push( + Inst::new( + "ifcmp_sp", + r#" + Compare ``addr`` with the stack pointer and set the CPU flags. + + This is like `ifcmp` where ``addr`` is the LHS operand and the stack + pointer is the RHS. + "#, + &formats.unary, + ) + .operands_in(vec![addr]) + .operands_out(vec![f]), + ); + + ig.push( + Inst::new( + "regspill", + r#" + Temporarily divert ``x`` from ``src`` to ``SS``. + + This instruction moves the location of a value from a register to a + stack slot without creating a new SSA value. It is used by the register + allocator to temporarily rearrange register assignments in order to + satisfy instruction constraints. + + See also `regmove`. + "#, + &formats.reg_spill, + ) + .operands_in(vec![x, src, SS]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "regfill", + r#" + Temporarily divert ``x`` from ``SS`` to ``dst``. + + This instruction moves the location of a value from a stack slot to a + register without creating a new SSA value. It is used by the register + allocator to temporarily rearrange register assignments in order to + satisfy instruction constraints. + + See also `regmove`. + "#, + &formats.reg_fill, + ) + .operands_in(vec![x, SS, dst]) + .other_side_effects(true), + ); + + let N = + &Operand::new("args", &entities.varargs).with_doc("Variable number of args for Stackmap"); + + ig.push( + Inst::new( + "safepoint", + r#" + This instruction will provide live reference values at a point in + the function. It can only be used by the compiler. + "#, + &formats.multiary, + ) + .operands_in(vec![N]) + .other_side_effects(true), + ); + + let x = &Operand::new("x", TxN).with_doc("Vector to split"); + let lo = &Operand::new("lo", &TxN.half_vector()).with_doc("Low-numbered lanes of `x`"); + let hi = &Operand::new("hi", &TxN.half_vector()).with_doc("High-numbered lanes of `x`"); + + ig.push( + Inst::new( + "vsplit", + r#" + Split a vector into two halves. + + Split the vector `x` into two separate values, each containing half of + the lanes from ``x``. The result may be two scalars if ``x`` only had + two lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![lo, hi]) + .is_ghost(true), + ); + + let Any128 = &TypeVar::new( + "Any128", + "Any scalar or vector type with as most 128 lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(1..128) + .includes_scalars(true) + .build(), + ); + + let x = &Operand::new("x", Any128).with_doc("Low-numbered lanes"); + let y = &Operand::new("y", Any128).with_doc("High-numbered lanes"); + let a = &Operand::new("a", &Any128.double_vector()).with_doc("Concatenation of `x` and `y`"); + + ig.push( + Inst::new( + "vconcat", + r#" + Vector concatenation. + + Return a vector formed by concatenating ``x`` and ``y``. The resulting + vector type has twice as many lanes as each of the inputs. The lanes of + ``x`` appear as the low-numbered lanes, and the lanes of ``y`` become + the high-numbered lanes of ``a``. + + It is possible to form a vector by concatenating two scalars. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .is_ghost(true), + ); + + let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector"); + let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true"); + let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false"); + let a = &Operand::new("a", TxN); + + ig.push( + Inst::new( + "vselect", + r#" + Vector lane select. + + Select lanes from ``x`` or ``y`` controlled by the lanes of the boolean + vector ``c``. + "#, + &formats.ternary, + ) + .operands_in(vec![c, x, y]) + .operands_out(vec![a]), + ); + + let s = &Operand::new("s", b1); + + ig.push( + Inst::new( + "vany_true", + r#" + Reduce a vector to a scalar boolean. + + Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![s]), + ); + + ig.push( + Inst::new( + "vall_true", + r#" + Reduce a vector to a scalar boolean. + + Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![s]), + ); + + let x = &Operand::new("x", &TxN.lane_of()); + + ig.push( + Inst::new( + "splat", + r#" + Vector splat. + + Return a vector whose lanes are all ``x``. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN).with_doc("SIMD vector to modify"); + let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value"); + let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index"); + + ig.push( + Inst::new( + "insertlane", + r#" + Insert ``y`` as lane ``Idx`` in x. + + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. + "#, + &formats.insert_lane, + ) + .operands_in(vec![x, Idx, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN); + let a = &Operand::new("a", &TxN.lane_of()); + + ig.push( + Inst::new( + "extractlane", + r#" + Extract lane ``Idx`` from ``x``. + + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. Note that the upper bits of ``a`` + may or may not be zeroed depending on the ISA but the type system should prevent using + ``a`` as anything other than the extracted value. + "#, + &formats.extract_lane, + ) + .operands_in(vec![x, Idx]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", &Int.as_bool()); + let Cond = &Operand::new("Cond", &imm.intcc); + let x = &Operand::new("x", Int); + let y = &Operand::new("y", Int); + + ig.push( + Inst::new( + "icmp", + r#" + Integer comparison. + + The condition code determines if the operands are interpreted as signed + or unsigned integers. + + | Signed | Unsigned | Condition | + |--------|----------|-----------------------| + | eq | eq | Equal | + | ne | ne | Not equal | + | slt | ult | Less than | + | sge | uge | Greater than or equal | + | sgt | ugt | Greater than | + | sle | ule | Less than or equal | + | of | * | Overflow | + | nof | * | No Overflow | + + \* The unsigned version of overflow conditions have ISA-specific + semantics and thus have been kept as methods on the TargetIsa trait as + [unsigned_add_overflow_condition][isa::TargetIsa::unsigned_add_overflow_condition] and + [unsigned_sub_overflow_condition][isa::TargetIsa::unsigned_sub_overflow_condition]. + + When this instruction compares integer vectors, it returns a boolean + vector of lane-wise comparisons. + "#, + &formats.int_compare, + ) + .operands_in(vec![Cond, x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", b1); + let x = &Operand::new("x", iB); + let Y = &Operand::new("Y", &imm.imm64); + + ig.push( + Inst::new( + "icmp_imm", + r#" + Compare scalar integer to a constant. + + This is the same as the `icmp` instruction, except one operand is + an immediate constant. + + This instruction can only compare scalars. Use `icmp` for + lane-wise vector comparisons. + "#, + &formats.int_compare_imm, + ) + .operands_in(vec![Cond, x, Y]) + .operands_out(vec![a]), + ); + + let f = &Operand::new("f", iflags); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + ig.push( + Inst::new( + "ifcmp", + r#" + Compare scalar integers and return flags. + + Compare two scalar integer values and return integer CPU flags + representing the result. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + + ig.push( + Inst::new( + "ifcmp_imm", + r#" + Compare scalar integer to a constant and return flags. + + Like `icmp_imm`, but returns integer CPU flags instead of testing + a specific condition code. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![f]), + ); + + let a = &Operand::new("a", Int); + let x = &Operand::new("x", Int); + let y = &Operand::new("y", Int); + + ig.push( + Inst::new( + "iadd", + r#" + Wrapping integer addition: `a := x + y \pmod{2^B}`. + + This instruction does not depend on the signed/unsigned interpretation + of the operands. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uadd_sat", + r#" + Add with unsigned saturation. + + This is similar to `iadd` but the operands are interpreted as unsigned integers and their + summed result, instead of wrapping, will be saturated to the highest unsigned integer for + the controlling type (e.g. `0xFF` for i8). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sadd_sat", + r#" + Add with signed saturation. + + This is similar to `iadd` but the operands are interpreted as signed integers and their + summed result, instead of wrapping, will be saturated to the lowest or highest + signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example, + since an `iadd_ssat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be + clamped to `0x7F`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "isub", + r#" + Wrapping integer subtraction: `a := x - y \pmod{2^B}`. + + This instruction does not depend on the signed/unsigned interpretation + of the operands. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "usub_sat", + r#" + Subtract with unsigned saturation. + + This is similar to `isub` but the operands are interpreted as unsigned integers and their + difference, instead of wrapping, will be saturated to the lowest unsigned integer for + the controlling type (e.g. `0x00` for i8). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ssub_sat", + r#" + Subtract with signed saturation. + + This is similar to `isub` but the operands are interpreted as signed integers and their + difference, instead of wrapping, will be saturated to the lowest or highest + signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ineg", + r#" + Integer negation: `a := -x \pmod{2^B}`. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "imul", + r#" + Wrapping integer multiplication: `a := x y \pmod{2^B}`. + + This instruction does not depend on the signed/unsigned interpretation + of the operands. + + Polymorphic over all integer types (vector and scalar). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "umulhi", + r#" + Unsigned integer multiplication, producing the high half of a + double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "smulhi", + r#" + Signed integer multiplication, producing the high half of a + double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "udiv", + r#" + Unsigned integer division: `a := \lfloor {x \over y} \rfloor`. + + This operation traps if the divisor is zero. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "sdiv", + r#" + Signed integer division rounded toward zero: `a := sign(xy) + \lfloor {|x| \over |y|}\rfloor`. + + This operation traps if the divisor is zero, or if the result is not + representable in `B` bits two's complement. This only happens + when `x = -2^{B-1}, y = -1`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "urem", + r#" + Unsigned integer remainder. + + This operation traps if the divisor is zero. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "srem", + r#" + Signed integer remainder. The result has the sign of the dividend. + + This operation traps if the divisor is zero. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + let a = &Operand::new("a", iB); + let x = &Operand::new("x", iB); + let Y = &Operand::new("Y", &imm.imm64); + + ig.push( + Inst::new( + "iadd_imm", + r#" + Add immediate integer. + + Same as `iadd`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "imul_imm", + r#" + Integer multiplication by immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "udiv_imm", + r#" + Unsigned integer division by an immediate constant. + + This operation traps if the divisor is zero. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sdiv_imm", + r#" + Signed integer division by an immediate constant. + + This operation traps if the divisor is zero, or if the result is not + representable in `B` bits two's complement. This only happens + when `x = -2^{B-1}, Y = -1`. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "urem_imm", + r#" + Unsigned integer remainder with immediate divisor. + + This operation traps if the divisor is zero. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "srem_imm", + r#" + Signed integer remainder with immediate divisor. + + This operation traps if the divisor is zero. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "irsub_imm", + r#" + Immediate reverse wrapping subtraction: `a := Y - x \pmod{2^B}`. + + Also works as integer negation when `Y = 0`. Use `iadd_imm` + with a negative immediate operand for the reverse immediate + subtraction. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", iB); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + let c_in = &Operand::new("c_in", b1).with_doc("Input carry flag"); + let c_out = &Operand::new("c_out", b1).with_doc("Output carry flag"); + let b_in = &Operand::new("b_in", b1).with_doc("Input borrow flag"); + let b_out = &Operand::new("b_out", b1).with_doc("Output borrow flag"); + + let c_if_in = &Operand::new("c_in", iflags); + let c_if_out = &Operand::new("c_out", iflags); + let b_if_in = &Operand::new("b_in", iflags); + let b_if_out = &Operand::new("b_out", iflags); + + ig.push( + Inst::new( + "iadd_cin", + r#" + Add integers with carry in. + + Same as `iadd` with an additional carry input. Computes: + + ```text + a = x + y + c_{in} \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "iadd_ifcin", + r#" + Add integers with carry in. + + Same as `iadd` with an additional carry flag input. Computes: + + ```text + a = x + y + c_{in} \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_if_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "iadd_cout", + r#" + Add integers with carry out. + + Same as `iadd` with an additional carry output. + + ```text + a &= x + y \pmod 2^B \\ + c_{out} &= x+y >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, c_out]), + ); + + ig.push( + Inst::new( + "iadd_ifcout", + r#" + Add integers with carry out. + + Same as `iadd` with an additional carry flag output. + + ```text + a &= x + y \pmod 2^B \\ + c_{out} &= x+y >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, c_if_out]), + ); + + ig.push( + Inst::new( + "iadd_carry", + r#" + Add integers with carry in and out. + + Same as `iadd` with an additional carry input and output. + + ```text + a &= x + y + c_{in} \pmod 2^B \\ + c_{out} &= x + y + c_{in} >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_in]) + .operands_out(vec![a, c_out]), + ); + + ig.push( + Inst::new( + "iadd_ifcarry", + r#" + Add integers with carry in and out. + + Same as `iadd` with an additional carry flag input and output. + + ```text + a &= x + y + c_{in} \pmod 2^B \\ + c_{out} &= x + y + c_{in} >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_if_in]) + .operands_out(vec![a, c_if_out]), + ); + + ig.push( + Inst::new( + "isub_bin", + r#" + Subtract integers with borrow in. + + Same as `isub` with an additional borrow flag input. Computes: + + ```text + a = x - (y + b_{in}) \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "isub_ifbin", + r#" + Subtract integers with borrow in. + + Same as `isub` with an additional borrow flag input. Computes: + + ```text + a = x - (y + b_{in}) \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_if_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "isub_bout", + r#" + Subtract integers with borrow out. + + Same as `isub` with an additional borrow flag output. + + ```text + a &= x - y \pmod 2^B \\ + b_{out} &= x < y + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, b_out]), + ); + + ig.push( + Inst::new( + "isub_ifbout", + r#" + Subtract integers with borrow out. + + Same as `isub` with an additional borrow flag output. + + ```text + a &= x - y \pmod 2^B \\ + b_{out} &= x < y + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, b_if_out]), + ); + + ig.push( + Inst::new( + "isub_borrow", + r#" + Subtract integers with borrow in and out. + + Same as `isub` with an additional borrow flag input and output. + + ```text + a &= x - (y + b_{in}) \pmod 2^B \\ + b_{out} &= x < y + b_{in} + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_in]) + .operands_out(vec![a, b_out]), + ); + + ig.push( + Inst::new( + "isub_ifborrow", + r#" + Subtract integers with borrow in and out. + + Same as `isub` with an additional borrow flag input and output. + + ```text + a &= x - (y + b_{in}) \pmod 2^B \\ + b_{out} &= x < y + b_{in} + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_if_in]) + .operands_out(vec![a, b_if_out]), + ); + + let bits = &TypeVar::new( + "bits", + "Any integer, float, or boolean scalar or vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(true) + .build(), + ); + let x = &Operand::new("x", bits); + let y = &Operand::new("y", bits); + let a = &Operand::new("a", bits); + + ig.push( + Inst::new( + "band", + r#" + Bitwise and. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bor", + r#" + Bitwise or. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bxor", + r#" + Bitwise xor. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bnot", + r#" + Bitwise not. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "band_not", + r#" + Bitwise and not. + + Computes `x & ~y`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bor_not", + r#" + Bitwise or not. + + Computes `x | ~y`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bxor_not", + r#" + Bitwise xor not. + + Computes `x ^ ~y`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", iB); + let Y = &Operand::new("Y", &imm.imm64); + let a = &Operand::new("a", iB); + + ig.push( + Inst::new( + "band_imm", + r#" + Bitwise and with immediate. + + Same as `band`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bor_imm", + r#" + Bitwise or with immediate. + + Same as `bor`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bxor_imm", + r#" + Bitwise xor with immediate. + + Same as `bxor`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Int).with_doc("Scalar or vector value to shift"); + let y = &Operand::new("y", iB).with_doc("Number of bits to shift"); + let Y = &Operand::new("Y", &imm.imm64); + let a = &Operand::new("a", Int); + + ig.push( + Inst::new( + "rotl", + r#" + Rotate left. + + Rotate the bits in ``x`` by ``y`` places. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "rotr", + r#" + Rotate right. + + Rotate the bits in ``x`` by ``y`` places. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "rotl_imm", + r#" + Rotate left by immediate. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "rotr_imm", + r#" + Rotate right by immediate. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ishl", + r#" + Integer shift left. Shift the bits in ``x`` towards the MSB by ``y`` + places. Shift in zero bits to the LSB. + + The shift amount is masked to the size of ``x``. + + When shifting a B-bits integer type, this instruction computes: + + ```text + s &:= y \pmod B, + a &:= x \cdot 2^s \pmod{2^B}. + ``` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ushr", + r#" + Unsigned shift right. Shift bits in ``x`` towards the LSB by ``y`` + places, shifting in zero bits to the MSB. Also called a *logical + shift*. + + The shift amount is masked to the size of the register. + + When shifting a B-bits integer type, this instruction computes: + + ```text + s &:= y \pmod B, + a &:= \lfloor x \cdot 2^{-s} \rfloor. + ``` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sshr", + r#" + Signed shift right. Shift bits in ``x`` towards the LSB by ``y`` + places, shifting in sign bits to the MSB. Also called an *arithmetic + shift*. + + The shift amount is masked to the size of the register. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ishl_imm", + r#" + Integer shift left by immediate. + + The shift amount is masked to the size of ``x``. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ushr_imm", + r#" + Unsigned shift right by immediate. + + The shift amount is masked to the size of the register. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sshr_imm", + r#" + Signed shift right by immediate. + + The shift amount is masked to the size of the register. + "#, + &formats.binary_imm, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", iB); + let a = &Operand::new("a", iB); + + ig.push( + Inst::new( + "bitrev", + r#" + Reverse the bits of a integer. + + Reverses the bits in ``x``. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "clz", + r#" + Count leading zero bits. + + Starting from the MSB in ``x``, count the number of zero bits before + reaching the first one bit. When ``x`` is zero, returns the size of x + in bits. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "cls", + r#" + Count leading sign bits. + + Starting from the MSB after the sign bit in ``x``, count the number of + consecutive bits identical to the sign bit. When ``x`` is 0 or -1, + returns one less than the size of x in bits. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ctz", + r#" + Count trailing zeros. + + Starting from the LSB in ``x``, count the number of zero bits before + reaching the first one bit. When ``x`` is zero, returns the size of x + in bits. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "popcnt", + r#" + Population count + + Count the number of one bits in ``x``. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let Float = &TypeVar::new( + "Float", + "A scalar or vector floating point number", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let Cond = &Operand::new("Cond", &imm.floatcc); + let x = &Operand::new("x", Float); + let y = &Operand::new("y", Float); + let a = &Operand::new("a", &Float.as_bool()); + + ig.push( + Inst::new( + "fcmp", + r#" + Floating point comparison. + + Two IEEE 754-2008 floating point numbers, `x` and `y`, relate to each + other in exactly one of four ways: + + == ========================================== + UN Unordered when one or both numbers is NaN. + EQ When `x = y`. (And `0.0 = -0.0`). + LT When `x < y`. + GT When `x > y`. + == ========================================== + + The 14 `floatcc` condition codes each correspond to a subset of + the four relations, except for the empty set which would always be + false, and the full set which would always be true. + + The condition codes are divided into 7 'ordered' conditions which don't + include UN, and 7 unordered conditions which all include UN. + + +-------+------------+---------+------------+-------------------------+ + |Ordered |Unordered |Condition | + +=======+============+=========+============+=========================+ + |ord |EQ | LT | GT|uno |UN |NaNs absent / present. | + +-------+------------+---------+------------+-------------------------+ + |eq |EQ |ueq |UN | EQ |Equal | + +-------+------------+---------+------------+-------------------------+ + |one |LT | GT |ne |UN | LT | GT|Not equal | + +-------+------------+---------+------------+-------------------------+ + |lt |LT |ult |UN | LT |Less than | + +-------+------------+---------+------------+-------------------------+ + |le |LT | EQ |ule |UN | LT | EQ|Less than or equal | + +-------+------------+---------+------------+-------------------------+ + |gt |GT |ugt |UN | GT |Greater than | + +-------+------------+---------+------------+-------------------------+ + |ge |GT | EQ |uge |UN | GT | EQ|Greater than or equal | + +-------+------------+---------+------------+-------------------------+ + + The standard C comparison operators, `<, <=, >, >=`, are all ordered, + so they are false if either operand is NaN. The C equality operator, + `==`, is ordered, and since inequality is defined as the logical + inverse it is *unordered*. They map to the `floatcc` condition + codes as follows: + + ==== ====== ============ + C `Cond` Subset + ==== ====== ============ + `==` eq EQ + `!=` ne UN | LT | GT + `<` lt LT + `<=` le LT | EQ + `>` gt GT + `>=` ge GT | EQ + ==== ====== ============ + + This subset of condition codes also corresponds to the WebAssembly + floating point comparisons of the same name. + + When this instruction compares floating point vectors, it returns a + boolean vector with the results of lane-wise comparisons. + "#, + &formats.float_compare, + ) + .operands_in(vec![Cond, x, y]) + .operands_out(vec![a]), + ); + + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "ffcmp", + r#" + Floating point comparison returning flags. + + Compares two numbers like `fcmp`, but returns floating point CPU + flags instead of testing a specific condition. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + + let x = &Operand::new("x", Float); + let y = &Operand::new("y", Float); + let z = &Operand::new("z", Float); + let a = &Operand::new("a", Float).with_doc("Result of applying operator to each lane"); + + ig.push( + Inst::new( + "fadd", + r#" + Floating point addition. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fsub", + r#" + Floating point subtraction. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fmul", + r#" + Floating point multiplication. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fdiv", + r#" + Floating point division. + + Unlike the integer division instructions ` and + `udiv`, this can't trap. Division by zero is infinity or + NaN, depending on the dividend. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sqrt", + r#" + Floating point square root. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fma", + r#" + Floating point fused multiply-and-add. + + Computes `a := xy+z` without any intermediate rounding of the + product. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, z]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit inverted"); + + ig.push( + Inst::new( + "fneg", + r#" + Floating point negation. + + Note that this is a pure bitwise operation. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit cleared"); + + ig.push( + Inst::new( + "fabs", + r#" + Floating point absolute value. + + Note that this is a pure bitwise operation. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit changed to that of ``y``"); + + ig.push( + Inst::new( + "fcopysign", + r#" + Floating point copy sign. + + Note that this is a pure bitwise operation. The sign bit from ``y`` is + copied to the sign bit of ``x``. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("The smaller of ``x`` and ``y``"); + + ig.push( + Inst::new( + "fmin", + r#" + Floating point minimum, propagating NaNs. + + If either operand is NaN, this returns a NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("The larger of ``x`` and ``y``"); + + ig.push( + Inst::new( + "fmax", + r#" + Floating point maximum, propagating NaNs. + + If either operand is NaN, this returns a NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` rounded to integral value"); + + ig.push( + Inst::new( + "ceil", + r#" + Round floating point round to integral, towards positive infinity. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "floor", + r#" + Round floating point round to integral, towards negative infinity. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "trunc", + r#" + Round floating point round to integral, towards zero. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "nearest", + r#" + Round floating point round to integral, towards nearest with ties to + even. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", b1); + let x = &Operand::new("x", Ref); + + ig.push( + Inst::new( + "is_null", + r#" + Reference verification. + + The condition code determines if the reference type in question is + null or not. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", b1); + let x = &Operand::new("x", Ref); + + ig.push( + Inst::new( + "is_invalid", + r#" + Reference verification. + + The condition code determines if the reference type in question is + invalid or not. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let Cond = &Operand::new("Cond", &imm.intcc); + let f = &Operand::new("f", iflags); + let a = &Operand::new("a", b1); + + ig.push( + Inst::new( + "trueif", + r#" + Test integer CPU flags for a specific condition. + + Check the CPU flags in ``f`` against the ``Cond`` condition code and + return true when the condition code is satisfied. + "#, + &formats.int_cond, + ) + .operands_in(vec![Cond, f]) + .operands_out(vec![a]), + ); + + let Cond = &Operand::new("Cond", &imm.floatcc); + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "trueff", + r#" + Test floating point CPU flags for a specific condition. + + Check the CPU flags in ``f`` against the ``Cond`` condition code and + return true when the condition code is satisfied. + "#, + &formats.float_cond, + ) + .operands_in(vec![Cond, f]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Mem); + let a = &Operand::new("a", MemTo).with_doc("Bits of `x` reinterpreted"); + + ig.push( + Inst::new( + "bitcast", + r#" + Reinterpret the bits in `x` as a different type. + + The input and output types must be storable to memory and of the same + size. A bitcast is equivalent to storing one type and loading the other + type from the same address. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Any); + let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted"); + + ig.push( + Inst::new( + "raw_bitcast", + r#" + Cast the bits in `x` as a different type of the same bit width. + + This instruction does not change the data's representation but allows + data in registers to be used as different types, e.g. an i32x4 as a + b8x16. The only constraint on the result `a` is that it can be + `raw_bitcast` back to the original type. Also, in a raw_bitcast between + vector types with the same number of lanes, the value of each result + lane is a raw_bitcast of the corresponding operand lane. TODO there is + currently no mechanism for enforcing the bit width constraint. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", TxN).with_doc("A vector value"); + let s = &Operand::new("s", &TxN.lane_of()).with_doc("A scalar value"); + + ig.push( + Inst::new( + "scalar_to_vector", + r#" + Scalar To Vector -- move a value out of a scalar register and into a vector register; the + scalar will be moved to the lowest-order bits of the vector register. Note that this + instruction is intended as a low-level legalization instruction and frontends should prefer + insertlane; on certain architectures, scalar_to_vector may zero the highest-order bits for some + types (e.g. integers) but not for others (e.g. floats). + "#, + &formats.unary, + ) + .operands_in(vec![s]) + .operands_out(vec![a]), + ); + + let Bool = &TypeVar::new( + "Bool", + "A scalar or vector boolean type", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let BoolTo = &TypeVar::new( + "BoolTo", + "A smaller boolean type with the same number of lanes", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let x = &Operand::new("x", Bool); + let a = &Operand::new("a", BoolTo); + + ig.push( + Inst::new( + "breduce", + r#" + Convert `x` to a smaller boolean type in the platform-defined way. + + The result type must have the same number of vector lanes as the input, + and each lane must not have more bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(Bool.clone(), BoolTo.clone())]), + ); + + let BoolTo = &TypeVar::new( + "BoolTo", + "A larger boolean type with the same number of lanes", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Bool); + let a = &Operand::new("a", BoolTo); + + ig.push( + Inst::new( + "bextend", + r#" + Convert `x` to a larger boolean type in the platform-defined way. + + The result type must have the same number of vector lanes as the input, + and each lane must not have fewer bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(BoolTo.clone(), Bool.clone())]), + ); + + let IntTo = &TypeVar::new( + "IntTo", + "An integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Bool); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "bint", + r#" + Convert `x` to an integer. + + True maps to 1 and false maps to 0. The result type must have the same + number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bmask", + r#" + Convert `x` to an integer mask. + + True maps to all 1s and false maps to all 0s. The result type must have + the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let Int = &TypeVar::new( + "Int", + "A scalar or vector integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let IntTo = &TypeVar::new( + "IntTo", + "A smaller integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Int); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "ireduce", + r#" + Convert `x` to a smaller integer type by dropping high bits. + + Each lane in `x` is converted to a smaller integer type by discarding + the most significant bits. This is the same as reducing modulo + `2^n`. + + The result type must have the same number of vector lanes as the input, + and each lane must not have more bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), + ); + + let IntTo = &TypeVar::new( + "IntTo", + "A larger integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Int); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "uextend", + r#" + Convert `x` to a larger integer type by zero-extending. + + Each lane in `x` is converted to a larger integer type by adding + zeroes. The result has the same numerical value as `x` when both are + interpreted as unsigned integers. + + The result type must have the same number of vector lanes as the input, + and each lane must not have fewer bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]), + ); + + ig.push( + Inst::new( + "sextend", + r#" + Convert `x` to a larger integer type by sign-extending. + + Each lane in `x` is converted to a larger integer type by replicating + the sign bit. The result has the same numerical value as `x` when both + are interpreted as signed integers. + + The result type must have the same number of vector lanes as the input, + and each lane must not have fewer bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]), + ); + + let FloatTo = &TypeVar::new( + "FloatTo", + "A scalar or vector floating point number", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Float); + let a = &Operand::new("a", FloatTo); + + ig.push( + Inst::new( + "fpromote", + r#" + Convert `x` to a larger floating point format. + + Each lane in `x` is converted to the destination floating point format. + This is an exact operation. + + Cranelift currently only supports two floating point formats + - `f32` and `f64`. This may change in the future. + + The result type must have the same number of vector lanes as the input, + and the result lanes must not have fewer bits than the input lanes. If + the input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(FloatTo.clone(), Float.clone())]), + ); + + ig.push( + Inst::new( + "fdemote", + r#" + Convert `x` to a smaller floating point format. + + Each lane in `x` is converted to the destination floating point format + by rounding to nearest, ties to even. + + Cranelift currently only supports two floating point formats + - `f32` and `f64`. This may change in the future. + + The result type must have the same number of vector lanes as the input, + and the result lanes must not have more bits than the input lanes. If + the input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(Float.clone(), FloatTo.clone())]), + ); + + let x = &Operand::new("x", Float); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "fcvt_to_uint", + r#" + Convert floating point to unsigned integer. + + Each lane in `x` is converted to an unsigned integer by rounding + towards zero. If `x` is NaN or if the unsigned integral value cannot be + represented in the result type, this instruction traps. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "fcvt_to_uint_sat", + r#" + Convert floating point to unsigned integer as fcvt_to_uint does, but + saturates the input instead of trapping. NaN and negative values are + converted to 0. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fcvt_to_sint", + r#" + Convert floating point to signed integer. + + Each lane in `x` is converted to a signed integer by rounding towards + zero. If `x` is NaN or if the signed integral value cannot be + represented in the result type, this instruction traps. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "fcvt_to_sint_sat", + r#" + Convert floating point to signed integer as fcvt_to_sint does, but + saturates the input instead of trapping. NaN values are converted to 0. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Int); + let a = &Operand::new("a", FloatTo); + + ig.push( + Inst::new( + "fcvt_from_uint", + r#" + Convert unsigned integer to floating point. + + Each lane in `x` is interpreted as an unsigned integer and converted to + floating point using round to nearest, ties to even. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fcvt_from_sint", + r#" + Convert signed integer to floating point. + + Each lane in `x` is interpreted as a signed integer and converted to + floating point using round to nearest, ties to even. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let WideInt = &TypeVar::new( + "WideInt", + "An integer type with lanes from `i16` upwards", + TypeSetBuilder::new() + .ints(16..128) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", WideInt); + let lo = &Operand::new("lo", &WideInt.half_width()).with_doc("The low bits of `x`"); + let hi = &Operand::new("hi", &WideInt.half_width()).with_doc("The high bits of `x`"); + + ig.push( + Inst::new( + "isplit", + r#" + Split an integer into low and high parts. + + Vectors of integers are split lane-wise, so the results have the same + number of lanes as the input, but the lanes are half the size. + + Returns the low half of `x` and the high half of `x` as two independent + values. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![lo, hi]) + .is_ghost(true), + ); + + let NarrowInt = &TypeVar::new( + "NarrowInt", + "An integer type with lanes type to `i64`", + TypeSetBuilder::new() + .ints(8..64) + .simd_lanes(Interval::All) + .build(), + ); + + let lo = &Operand::new("lo", NarrowInt); + let hi = &Operand::new("hi", NarrowInt); + let a = &Operand::new("a", &NarrowInt.double_width()) + .with_doc("The concatenation of `lo` and `hi`"); + + ig.push( + Inst::new( + "iconcat", + r#" + Concatenate low and high bits to form a larger integer type. + + Vectors of integers are concatenated lane-wise such that the result has + the same number of lanes as the inputs, but the lanes are twice the + size. + "#, + &formats.binary, + ) + .operands_in(vec![lo, hi]) + .operands_out(vec![a]) + .is_ghost(true), + ); + + ig.build() +} diff --git a/cranelift/codegen/meta/src/shared/legalize.rs b/cranelift/codegen/meta/src/shared/legalize.rs new file mode 100644 index 0000000000..1b37f9661b --- /dev/null +++ b/cranelift/codegen/meta/src/shared/legalize.rs @@ -0,0 +1,1061 @@ +use crate::cdsl::ast::{var, ExprBuilder, Literal}; +use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup}; +use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; + +use crate::shared::immediates::Immediates; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I128, I16, I32, I64, I8}; +use cranelift_codegen_shared::condcodes::{CondCode, IntCC}; + +#[allow(clippy::many_single_char_names, clippy::cognitive_complexity)] +pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups { + let mut narrow = TransformGroupBuilder::new( + "narrow", + r#" + Legalize instructions by narrowing. + + The transformations in the 'narrow' group work by expressing + instructions in terms of smaller types. Operations on vector types are + expressed in terms of vector types with fewer lanes, and integer + operations are expressed in terms of smaller integer types. + "#, + ); + + let mut widen = TransformGroupBuilder::new( + "widen", + r#" + Legalize instructions by widening. + + The transformations in the 'widen' group work by expressing + instructions in terms of larger types. + "#, + ); + + let mut expand = TransformGroupBuilder::new( + "expand", + r#" + Legalize instructions by expansion. + + Rewrite instructions in terms of other instructions, generally + operating on the same types as the original instructions. + "#, + ); + + // List of instructions. + let band = insts.by_name("band"); + let band_imm = insts.by_name("band_imm"); + let band_not = insts.by_name("band_not"); + let bint = insts.by_name("bint"); + let bitrev = insts.by_name("bitrev"); + let bnot = insts.by_name("bnot"); + let bor = insts.by_name("bor"); + let bor_imm = insts.by_name("bor_imm"); + let bor_not = insts.by_name("bor_not"); + let brnz = insts.by_name("brnz"); + let brz = insts.by_name("brz"); + let br_icmp = insts.by_name("br_icmp"); + let br_table = insts.by_name("br_table"); + let bxor = insts.by_name("bxor"); + let bxor_imm = insts.by_name("bxor_imm"); + let bxor_not = insts.by_name("bxor_not"); + let cls = insts.by_name("cls"); + let clz = insts.by_name("clz"); + let ctz = insts.by_name("ctz"); + let fabs = insts.by_name("fabs"); + let f32const = insts.by_name("f32const"); + let f64const = insts.by_name("f64const"); + let fcopysign = insts.by_name("fcopysign"); + let fcvt_from_sint = insts.by_name("fcvt_from_sint"); + let fneg = insts.by_name("fneg"); + let iadd = insts.by_name("iadd"); + let iadd_cin = insts.by_name("iadd_cin"); + let iadd_cout = insts.by_name("iadd_cout"); + let iadd_carry = insts.by_name("iadd_carry"); + let iadd_ifcin = insts.by_name("iadd_ifcin"); + let iadd_ifcout = insts.by_name("iadd_ifcout"); + let iadd_imm = insts.by_name("iadd_imm"); + let icmp = insts.by_name("icmp"); + let icmp_imm = insts.by_name("icmp_imm"); + let iconcat = insts.by_name("iconcat"); + let iconst = insts.by_name("iconst"); + let ifcmp = insts.by_name("ifcmp"); + let ifcmp_imm = insts.by_name("ifcmp_imm"); + let imul = insts.by_name("imul"); + let imul_imm = insts.by_name("imul_imm"); + let ireduce = insts.by_name("ireduce"); + let irsub_imm = insts.by_name("irsub_imm"); + let ishl = insts.by_name("ishl"); + let ishl_imm = insts.by_name("ishl_imm"); + let isplit = insts.by_name("isplit"); + let istore8 = insts.by_name("istore8"); + let istore16 = insts.by_name("istore16"); + let isub = insts.by_name("isub"); + let isub_bin = insts.by_name("isub_bin"); + let isub_bout = insts.by_name("isub_bout"); + let isub_borrow = insts.by_name("isub_borrow"); + let isub_ifbin = insts.by_name("isub_ifbin"); + let isub_ifbout = insts.by_name("isub_ifbout"); + let jump = insts.by_name("jump"); + let load = insts.by_name("load"); + let popcnt = insts.by_name("popcnt"); + let rotl = insts.by_name("rotl"); + let rotl_imm = insts.by_name("rotl_imm"); + let rotr = insts.by_name("rotr"); + let rotr_imm = insts.by_name("rotr_imm"); + let sdiv = insts.by_name("sdiv"); + let sdiv_imm = insts.by_name("sdiv_imm"); + let select = insts.by_name("select"); + let sextend = insts.by_name("sextend"); + let sshr = insts.by_name("sshr"); + let sshr_imm = insts.by_name("sshr_imm"); + let srem = insts.by_name("srem"); + let srem_imm = insts.by_name("srem_imm"); + let store = insts.by_name("store"); + let udiv = insts.by_name("udiv"); + let udiv_imm = insts.by_name("udiv_imm"); + let uextend = insts.by_name("uextend"); + let uload8 = insts.by_name("uload8"); + let uload16 = insts.by_name("uload16"); + let umulhi = insts.by_name("umulhi"); + let ushr = insts.by_name("ushr"); + let ushr_imm = insts.by_name("ushr_imm"); + let urem = insts.by_name("urem"); + let urem_imm = insts.by_name("urem_imm"); + let trapif = insts.by_name("trapif"); + let trapnz = insts.by_name("trapnz"); + let trapz = insts.by_name("trapz"); + + // Custom expansions for memory objects. + expand.custom_legalize(insts.by_name("global_value"), "expand_global_value"); + expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr"); + expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr"); + + // Custom expansions for calls. + expand.custom_legalize(insts.by_name("call"), "expand_call"); + + // Custom expansions that need to change the CFG. + // TODO: Add sufficient XForm syntax that we don't need to hand-code these. + expand.custom_legalize(trapz, "expand_cond_trap"); + expand.custom_legalize(trapnz, "expand_cond_trap"); + expand.custom_legalize(br_table, "expand_br_table"); + expand.custom_legalize(select, "expand_select"); + widen.custom_legalize(select, "expand_select"); // small ints + + // Custom expansions for floating point constants. + // These expansions require bit-casting or creating constant pool entries. + expand.custom_legalize(f32const, "expand_fconst"); + expand.custom_legalize(f64const, "expand_fconst"); + + // Custom expansions for stack memory accesses. + expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); + expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); + + // Custom expansions for small stack memory acccess. + widen.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); + widen.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); + + // List of variables to reuse in patterns. + let x = var("x"); + let y = var("y"); + let z = var("z"); + let a = var("a"); + let a1 = var("a1"); + let a2 = var("a2"); + let a3 = var("a3"); + let a4 = var("a4"); + let b = var("b"); + let b1 = var("b1"); + let b2 = var("b2"); + let b3 = var("b3"); + let b4 = var("b4"); + let b_in = var("b_in"); + let b_int = var("b_int"); + let c = var("c"); + let c1 = var("c1"); + let c2 = var("c2"); + let c3 = var("c3"); + let c4 = var("c4"); + let c_in = var("c_in"); + let c_int = var("c_int"); + let d = var("d"); + let d1 = var("d1"); + let d2 = var("d2"); + let d3 = var("d3"); + let d4 = var("d4"); + let e = var("e"); + let e1 = var("e1"); + let e2 = var("e2"); + let e3 = var("e3"); + let e4 = var("e4"); + let f = var("f"); + let f1 = var("f1"); + let f2 = var("f2"); + let xl = var("xl"); + let xh = var("xh"); + let yl = var("yl"); + let yh = var("yh"); + let al = var("al"); + let ah = var("ah"); + let cc = var("cc"); + let block = var("block"); + let block1 = var("block1"); + let block2 = var("block2"); + let ptr = var("ptr"); + let flags = var("flags"); + let offset = var("off"); + let vararg = var("vararg"); + + narrow.custom_legalize(load, "narrow_load"); + narrow.custom_legalize(store, "narrow_store"); + + // iconst.i64 can't be legalized in the meta langage (because integer literals can't be + // embedded as part of arguments), so use a custom legalization for now. + narrow.custom_legalize(iconst, "narrow_iconst"); + + { + let inst = uextend.bind(I128).bind(I64); + narrow.legalize( + def!(a = inst(x)), + vec![ + def!(ah = iconst(Literal::constant(&imm.imm64, 0))), + def!(a = iconcat(x, ah)), + ], + ); + } + + { + let inst = sextend.bind(I128).bind(I64); + narrow.legalize( + def!(a = inst(x)), + vec![ + def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, 63))), // splat sign bit to whole number + def!(a = iconcat(x, ah)), + ], + ); + } + + for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] { + narrow.legalize( + def!(a = bin_op(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = bin_op(xl, yl)), + def!(ah = bin_op(xh, yh)), + def!(a = iconcat(al, ah)), + ], + ); + } + + narrow.legalize( + def!(a = bnot(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(al = bnot(xl)), + def!(ah = bnot(xh)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow.legalize( + def!(a = select(c, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = select(c, xl, yl)), + def!(ah = select(c, xh, yh)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow.legalize( + def!(brz.I128(x, block, vararg)), + vec![ + def!((xl, xh) = isplit(x)), + def!( + a = icmp_imm( + Literal::enumerator_for(&imm.intcc, "eq"), + xl, + Literal::constant(&imm.imm64, 0) + ) + ), + def!( + b = icmp_imm( + Literal::enumerator_for(&imm.intcc, "eq"), + xh, + Literal::constant(&imm.imm64, 0) + ) + ), + def!(c = band(a, b)), + def!(brnz(c, block, vararg)), + ], + ); + + narrow.legalize( + def!(brnz.I128(x, block1, vararg)), + vec![ + def!((xl, xh) = isplit(x)), + def!(brnz(xl, block1, vararg)), + def!(jump(block2, Literal::empty_vararg())), + block!(block2), + def!(brnz(xh, block1, vararg)), + ], + ); + + narrow.legalize( + def!(a = popcnt.I128(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(e1 = popcnt(xl)), + def!(e2 = popcnt(xh)), + def!(e3 = iadd(e1, e2)), + def!(a = uextend(e3)), + ], + ); + + // TODO(ryzokuken): benchmark this and decide if branching is a faster + // approach than evaluating boolean expressions. + + narrow.custom_legalize(icmp_imm, "narrow_icmp_imm"); + + let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); + let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); + for &(int_ty, int_ty_half) in &[(I64, I32), (I128, I64)] { + narrow.legalize( + def!(b = icmp.int_ty(intcc_eq, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(b1 = icmp.int_ty_half(intcc_eq, xl, yl)), + def!(b2 = icmp.int_ty_half(intcc_eq, xh, yh)), + def!(b = band(b1, b2)), + ], + ); + + narrow.legalize( + def!(b = icmp.int_ty(intcc_ne, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(b1 = icmp.int_ty_half(intcc_ne, xl, yl)), + def!(b2 = icmp.int_ty_half(intcc_ne, xh, yh)), + def!(b = bor(b1, b2)), + ], + ); + + use IntCC::*; + for cc in &[ + SignedGreaterThan, + SignedGreaterThanOrEqual, + SignedLessThan, + SignedLessThanOrEqual, + UnsignedGreaterThan, + UnsignedGreaterThanOrEqual, + UnsignedLessThan, + UnsignedLessThanOrEqual, + ] { + let intcc_cc = Literal::enumerator_for(&imm.intcc, cc.to_static_str()); + let cc1 = Literal::enumerator_for(&imm.intcc, cc.without_equal().to_static_str()); + let cc2 = + Literal::enumerator_for(&imm.intcc, cc.inverse().without_equal().to_static_str()); + let cc3 = Literal::enumerator_for(&imm.intcc, cc.unsigned().to_static_str()); + narrow.legalize( + def!(b = icmp.int_ty(intcc_cc, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + // X = cc1 || (!cc2 && cc3) + def!(b1 = icmp.int_ty_half(cc1, xh, yh)), + def!(b2 = icmp.int_ty_half(cc2, xh, yh)), + def!(b3 = icmp.int_ty_half(cc3, xl, yl)), + def!(c1 = bnot(b2)), + def!(c2 = band(c1, b3)), + def!(b = bor(b1, c2)), + ], + ); + } + } + + // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a + // separate legalization for x86. + for &ty in &[I64, I128] { + narrow.legalize( + def!(a = imul.ty(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(a1 = imul(xh, yl)), + def!(a2 = imul(xl, yh)), + def!(a3 = iadd(a1, a2)), + def!(a4 = umulhi(xl, yl)), + def!(ah = iadd(a3, a4)), + def!(al = imul(xl, yl)), + def!(a = iconcat(al, ah)), + ], + ); + } + + // Widen instructions with one input operand. + for &op in &[bnot, popcnt] { + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = op.int_ty(b)), + vec![ + def!(x = uextend.I32(b)), + def!(z = op.I32(x)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + } + + // Widen instructions with two input operands. + let mut widen_two_arg = |signed: bool, op: &Instruction| { + for &int_ty in &[I8, I16] { + let sign_ext_op = if signed { sextend } else { uextend }; + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sign_ext_op.I32(b)), + def!(y = sign_ext_op.I32(c)), + def!(z = op.I32(x, y)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + }; + + for bin_op in &[ + iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not, + ] { + widen_two_arg(false, bin_op); + } + for bin_op in &[sdiv, srem] { + widen_two_arg(true, bin_op); + } + + // Widen instructions using immediate operands. + let mut widen_imm = |signed: bool, op: &Instruction| { + for &int_ty in &[I8, I16] { + let sign_ext_op = if signed { sextend } else { uextend }; + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sign_ext_op.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + }; + + for bin_op in &[ + iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm, + ] { + widen_imm(false, bin_op); + } + for bin_op in &[sdiv_imm, srem_imm] { + widen_imm(true, bin_op); + } + + for &(int_ty, num) in &[(I8, 24), (I16, 16)] { + let imm = Literal::constant(&imm.imm64, -num); + + widen.legalize( + def!(a = clz.int_ty(b)), + vec![ + def!(c = uextend.I32(b)), + def!(d = clz.I32(c)), + def!(e = iadd_imm(d, imm)), + def!(a = ireduce.int_ty(e)), + ], + ); + + widen.legalize( + def!(a = cls.int_ty(b)), + vec![ + def!(c = sextend.I32(b)), + def!(d = cls.I32(c)), + def!(e = iadd_imm(d, imm)), + def!(a = ireduce.int_ty(e)), + ], + ); + } + + for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] { + let num = Literal::constant(&imm.imm64, num); + widen.legalize( + def!(a = ctz.int_ty(b)), + vec![ + def!(c = uextend.I32(b)), + // When `b` is zero, returns the size of x in bits. + def!(d = bor_imm(c, num)), + def!(e = ctz.I32(d)), + def!(a = ireduce.int_ty(e)), + ], + ); + } + + // iconst + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = iconst.int_ty(b)), + vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))], + ); + } + + for &extend_op in &[uextend, sextend] { + // The sign extension operators have two typevars: the result has one and controls the + // instruction, then the input has one. + let bound = extend_op.bind(I16).bind(I8); + widen.legalize( + def!(a = bound(b)), + vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))], + ); + } + + widen.legalize( + def!(store.I8(flags, a, ptr, offset)), + vec![ + def!(b = uextend.I32(a)), + def!(istore8(flags, b, ptr, offset)), + ], + ); + + widen.legalize( + def!(store.I16(flags, a, ptr, offset)), + vec![ + def!(b = uextend.I32(a)), + def!(istore16(flags, b, ptr, offset)), + ], + ); + + widen.legalize( + def!(a = load.I8(flags, ptr, offset)), + vec![ + def!(b = uload8.I32(flags, ptr, offset)), + def!(a = ireduce(b)), + ], + ); + + widen.legalize( + def!(a = load.I16(flags, ptr, offset)), + vec![ + def!(b = uload16.I32(flags, ptr, offset)), + def!(a = ireduce(b)), + ], + ); + + for &int_ty in &[I8, I16] { + widen.legalize( + def!(br_table.int_ty(x, y, z)), + vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))], + ); + } + + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = bint.int_ty(b)), + vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))], + ); + } + + for &int_ty in &[I8, I16] { + for &op in &[ishl, ishl_imm, ushr, ushr_imm] { + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = uextend.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + + for &op in &[sshr, sshr_imm] { + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sextend.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + + for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] { + let w_cc = Literal::enumerator_for(&imm.intcc, cc); + widen.legalize( + def!(a = icmp_imm.int_ty(w_cc, b, c)), + vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], + ); + widen.legalize( + def!(a = icmp.int_ty(w_cc, b, c)), + vec![ + def!(x = uextend.I32(b)), + def!(y = uextend.I32(c)), + def!(a = icmp.I32(w_cc, x, y)), + ], + ); + } + + for cc in &["sgt", "slt", "sge", "sle"] { + let w_cc = Literal::enumerator_for(&imm.intcc, cc); + widen.legalize( + def!(a = icmp_imm.int_ty(w_cc, b, c)), + vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], + ); + + widen.legalize( + def!(a = icmp.int_ty(w_cc, b, c)), + vec![ + def!(x = sextend.I32(b)), + def!(y = sextend.I32(c)), + def!(a = icmp(w_cc, x, y)), + ], + ); + } + } + + for &ty in &[I8, I16] { + widen.legalize( + def!(brz.ty(x, block, vararg)), + vec![def!(a = uextend.I32(x)), def!(brz(a, block, vararg))], + ); + + widen.legalize( + def!(brnz.ty(x, block, vararg)), + vec![def!(a = uextend.I32(x)), def!(brnz(a, block, vararg))], + ); + } + + // Expand integer operations with carry for RISC architectures that don't have + // the flags. + let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult"); + expand.legalize( + def!((a, c) = iadd_cout(x, y)), + vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))], + ); + + let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt"); + expand.legalize( + def!((a, b) = isub_bout(x, y)), + vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))], + ); + + expand.legalize( + def!(a = iadd_cin(x, y, c)), + vec![ + def!(a1 = iadd(x, y)), + def!(c_int = bint(c)), + def!(a = iadd(a1, c_int)), + ], + ); + + expand.legalize( + def!(a = isub_bin(x, y, b)), + vec![ + def!(a1 = isub(x, y)), + def!(b_int = bint(b)), + def!(a = isub(a1, b_int)), + ], + ); + + expand.legalize( + def!((a, c) = iadd_carry(x, y, c_in)), + vec![ + def!((a1, c1) = iadd_cout(x, y)), + def!(c_int = bint(c_in)), + def!((a, c2) = iadd_cout(a1, c_int)), + def!(c = bor(c1, c2)), + ], + ); + + expand.legalize( + def!((a, b) = isub_borrow(x, y, b_in)), + vec![ + def!((a1, b1) = isub_bout(x, y)), + def!(b_int = bint(b_in)), + def!((a, b2) = isub_bout(a1, b_int)), + def!(b = bor(b1, b2)), + ], + ); + + // Expansion for fcvt_from_sint for smaller integer types. + // This uses expand and not widen because the controlling type variable for + // this instruction is f32/f64, which is legalized as part of the expand + // group. + for &dest_ty in &[F32, F64] { + for &src_ty in &[I8, I16] { + let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty); + expand.legalize( + def!(a = bound_inst(b)), + vec![ + def!(x = sextend.I32(b)), + def!(a = fcvt_from_sint.dest_ty(x)), + ], + ); + } + } + + // Expansions for immediate operands that are out of range. + for &(inst_imm, inst) in &[ + (iadd_imm, iadd), + (imul_imm, imul), + (sdiv_imm, sdiv), + (udiv_imm, udiv), + (srem_imm, srem), + (urem_imm, urem), + (band_imm, band), + (bor_imm, bor), + (bxor_imm, bxor), + (ifcmp_imm, ifcmp), + ] { + expand.legalize( + def!(a = inst_imm(x, y)), + vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))], + ); + } + + expand.legalize( + def!(a = irsub_imm(y, x)), + vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))], + ); + + // Rotates and shifts. + for &(inst_imm, inst) in &[ + (rotl_imm, rotl), + (rotr_imm, rotr), + (ishl_imm, ishl), + (sshr_imm, sshr), + (ushr_imm, ushr), + ] { + expand.legalize( + def!(a = inst_imm(x, y)), + vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))], + ); + } + + expand.legalize( + def!(a = icmp_imm(cc, x, y)), + vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))], + ); + + //# Expansions for *_not variants of bitwise ops. + for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] { + expand.legalize( + def!(a = inst_not(x, y)), + vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))], + ); + } + + //# Expand bnot using xor. + let minus_one = Literal::constant(&imm.imm64, -1); + expand.legalize( + def!(a = bnot(x)), + vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))], + ); + + //# Expand bitrev + //# Adapted from Stack Overflow. + //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c + let imm64_1 = Literal::constant(&imm.imm64, 1); + let imm64_2 = Literal::constant(&imm.imm64, 2); + let imm64_4 = Literal::constant(&imm.imm64, 4); + + widen.legalize( + def!(a = bitrev.I8(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(a = bor(c2, c4)), + ], + ); + + let imm64_8 = Literal::constant(&imm.imm64, 8); + + widen.legalize( + def!(a = bitrev.I16(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(a = bor(d2, d4)), + ], + ); + + let imm64_16 = Literal::constant(&imm.imm64, 16); + + expand.legalize( + def!(a = bitrev.I32(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa_aaaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555_5555))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc_cccc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333_3333))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0_f0f0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f_0f0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00_ff00))), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff_00ff))), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(e = bor(d2, d4)), + def!(e1 = ushr_imm(e, imm64_16)), + def!(e2 = ishl_imm(e, imm64_16)), + def!(a = bor(e1, e2)), + ], + ); + + #[allow(overflowing_literals)] + let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaa_aaaa_aaaa_aaaa); + let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555_5555_5555_5555); + #[allow(overflowing_literals)] + let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccc_cccc_cccc_cccc); + let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333_3333_3333_3333); + #[allow(overflowing_literals)] + let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0_f0f0_f0f0_f0f0); + let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f_0f0f_0f0f_0f0f); + #[allow(overflowing_literals)] + let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00_ff00_ff00_ff00); + let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff_00ff_00ff_00ff); + #[allow(overflowing_literals)] + let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff_0000_ffff_0000); + let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000_ffff_0000_ffff); + let imm64_32 = Literal::constant(&imm.imm64, 32); + + expand.legalize( + def!(a = bitrev.I64(x)), + vec![ + def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, imm64_0x5555555555555555)), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, imm64_0x3333333333333333)), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(e = bor(d2, d4)), + def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)), + def!(e2 = ushr_imm(e1, imm64_16)), + def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)), + def!(e4 = ishl_imm(e3, imm64_16)), + def!(f = bor(e2, e4)), + def!(f1 = ushr_imm(f, imm64_32)), + def!(f2 = ishl_imm(f, imm64_32)), + def!(a = bor(f1, f2)), + ], + ); + + narrow.legalize( + def!(a = bitrev.I128(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(yh = bitrev(xl)), + def!(yl = bitrev(xh)), + def!(a = iconcat(yl, yh)), + ], + ); + + // Floating-point sign manipulations. + for &(ty, const_inst, minus_zero) in &[ + (F32, f32const, &Literal::bits(&imm.ieee32, 0x8000_0000)), + ( + F64, + f64const, + &Literal::bits(&imm.ieee64, 0x8000_0000_0000_0000), + ), + ] { + expand.legalize( + def!(a = fabs.ty(x)), + vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))], + ); + + expand.legalize( + def!(a = fneg.ty(x)), + vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))], + ); + + expand.legalize( + def!(a = fcopysign.ty(x, y)), + vec![ + def!(b = const_inst(minus_zero)), + def!(a1 = band_not(x, b)), + def!(a2 = band(y, b)), + def!(a = bor(a1, a2)), + ], + ); + } + + expand.custom_legalize(br_icmp, "expand_br_icmp"); + + let mut groups = TransformGroups::new(); + + let narrow_id = narrow.build_and_add_to(&mut groups); + let expand_id = expand.build_and_add_to(&mut groups); + + // Expansions using CPU flags. + let mut expand_flags = TransformGroupBuilder::new( + "expand_flags", + r#" + Instruction expansions for architectures with flags. + + Expand some instructions using CPU flags, then fall back to the normal + expansions. Not all architectures support CPU flags, so these patterns + are kept separate. + "#, + ) + .chain_with(expand_id); + + let imm64_0 = Literal::constant(&imm.imm64, 0); + let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); + let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); + + expand_flags.legalize( + def!(trapnz(x, c)), + vec![ + def!(a = ifcmp_imm(x, imm64_0)), + def!(trapif(intcc_ne, a, c)), + ], + ); + + expand_flags.legalize( + def!(trapz(x, c)), + vec![ + def!(a = ifcmp_imm(x, imm64_0)), + def!(trapif(intcc_eq, a, c)), + ], + ); + + expand_flags.build_and_add_to(&mut groups); + + // Narrow legalizations using CPU flags. + let mut narrow_flags = TransformGroupBuilder::new( + "narrow_flags", + r#" + Narrow instructions for architectures with flags. + + Narrow some instructions using CPU flags, then fall back to the normal + legalizations. Not all architectures support CPU flags, so these + patterns are kept separate. + "#, + ) + .chain_with(narrow_id); + + narrow_flags.legalize( + def!(a = iadd(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, c) = iadd_ifcout(xl, yl)), + def!(ah = iadd_ifcin(xh, yh, c)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_flags.legalize( + def!(a = isub(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, b) = isub_ifbout(xl, yl)), + def!(ah = isub_ifbin(xh, yh, b)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_flags.build_and_add_to(&mut groups); + + // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and + // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required. + // Narrow legalizations for ISAs that don't have CPU flags. + let mut narrow_no_flags = TransformGroupBuilder::new( + "narrow_no_flags", + r#" + Narrow instructions for architectures without flags. + + Narrow some instructions avoiding the use of CPU flags, then fall back + to the normal legalizations. Not all architectures support CPU flags, + so these patterns are kept separate. + "#, + ) + .chain_with(narrow_id); + + narrow_no_flags.legalize( + def!(a = iadd(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, c) = iadd_cout(xl, yl)), + def!(ah = iadd_cin(xh, yh, c)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_no_flags.legalize( + def!(a = isub(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, b) = isub_bout(xl, yl)), + def!(ah = isub_bin(xh, yh, b)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_no_flags.build_and_add_to(&mut groups); + + // TODO The order of declarations unfortunately matters to be compatible with the Python code. + // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls + // above. + widen.build_and_add_to(&mut groups); + + groups +} diff --git a/cranelift/codegen/meta/src/shared/mod.rs b/cranelift/codegen/meta/src/shared/mod.rs new file mode 100644 index 0000000000..b185262ccd --- /dev/null +++ b/cranelift/codegen/meta/src/shared/mod.rs @@ -0,0 +1,101 @@ +//! Shared definitions for the Cranelift intermediate language. + +pub mod entities; +pub mod formats; +pub mod immediates; +pub mod instructions; +pub mod legalize; +pub mod settings; +pub mod types; + +use crate::cdsl::formats::{FormatStructure, InstructionFormat}; +use crate::cdsl::instructions::{AllInstructions, InstructionGroup}; +use crate::cdsl::settings::SettingGroup; +use crate::cdsl::xform::TransformGroups; + +use crate::shared::entities::EntityRefs; +use crate::shared::formats::Formats; +use crate::shared::immediates::Immediates; + +use std::collections::HashMap; +use std::iter::FromIterator; +use std::rc::Rc; + +pub(crate) struct Definitions { + pub settings: SettingGroup, + pub all_instructions: AllInstructions, + pub instructions: InstructionGroup, + pub imm: Immediates, + pub formats: Formats, + pub transform_groups: TransformGroups, + pub entities: EntityRefs, +} + +pub(crate) fn define() -> Definitions { + let mut all_instructions = AllInstructions::new(); + + let immediates = Immediates::new(); + let entities = EntityRefs::new(); + let formats = Formats::new(&immediates, &entities); + let instructions = + instructions::define(&mut all_instructions, &formats, &immediates, &entities); + let transform_groups = legalize::define(&instructions, &immediates); + + Definitions { + settings: settings::define(), + all_instructions, + instructions, + imm: immediates, + formats, + transform_groups, + entities, + } +} + +impl Definitions { + /// Verifies certain properties of formats. + /// + /// - Formats must be uniquely named: if two formats have the same name, they must refer to the + /// same data. Otherwise, two format variants in the codegen crate would have the same name. + /// - Formats must be structurally different from each other. Otherwise, this would lead to + /// code duplicate in the codegen crate. + /// + /// Returns a list of all the instruction formats effectively used. + pub fn verify_instruction_formats(&self) -> Vec<&InstructionFormat> { + let mut format_names: HashMap<&'static str, &Rc> = HashMap::new(); + + // A structure is: number of input value operands / whether there's varargs or not / names + // of immediate fields. + let mut format_structures: HashMap = HashMap::new(); + + for inst in self.all_instructions.values() { + // Check name. + if let Some(existing_format) = format_names.get(&inst.format.name) { + assert!( + Rc::ptr_eq(&existing_format, &inst.format), + "formats must uniquely named; there's a\ + conflict on the name '{}', please make sure it is used only once.", + existing_format.name + ); + } else { + format_names.insert(inst.format.name, &inst.format); + } + + // Check structure. + let key = inst.format.structure(); + if let Some(existing_format) = format_structures.get(&key) { + assert_eq!( + existing_format.name, inst.format.name, + "duplicate instruction formats {} and {}; please remove one.", + existing_format.name, inst.format.name + ); + } else { + format_structures.insert(key, &inst.format); + } + } + + let mut result = Vec::from_iter(format_structures.into_iter().map(|(_, v)| v)); + result.sort_by_key(|format| format.name); + result + } +} diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs new file mode 100644 index 0000000000..fd6063e852 --- /dev/null +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -0,0 +1,235 @@ +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +pub(crate) fn define() -> SettingGroup { + let mut settings = SettingGroupBuilder::new("shared"); + + settings.add_enum( + "opt_level", + r#" + Optimization level: + + - none: Minimise compile time by disabling most optimizations. + - speed: Generate the fastest possible code + - speed_and_size: like "speed", but also perform transformations + aimed at reducing code size. + "#, + vec!["none", "speed", "speed_and_size"], + ); + + settings.add_bool( + "enable_verifier", + r#" + Run the Cranelift IR verifier at strategic times during compilation. + + This makes compilation slower but catches many bugs. The verifier is always enabled by + default, which is useful during development. + "#, + true, + ); + + // Note that Cranelift doesn't currently need an is_pie flag, because PIE is + // just PIC where symbols can't be pre-empted, which can be expressed with the + // `colocated` flag on external functions and global values. + settings.add_bool( + "is_pic", + "Enable Position-Independent Code generation", + false, + ); + + settings.add_bool( + "use_colocated_libcalls", + r#" + Use colocated libcalls. + + Generate code that assumes that libcalls can be declared "colocated", + meaning they will be defined along with the current function, such that + they can use more efficient addressing. + "#, + false, + ); + + settings.add_bool( + "avoid_div_traps", + r#" + Generate explicit checks around native division instructions to avoid + their trapping. + + This is primarily used by SpiderMonkey which doesn't install a signal + handler for SIGFPE, but expects a SIGILL trap for division by zero. + + On ISAs like ARM where the native division instructions don't trap, + this setting has no effect - explicit checks are always inserted. + "#, + false, + ); + + settings.add_bool( + "enable_float", + r#" + Enable the use of floating-point instructions + + Disabling use of floating-point instructions is not yet implemented. + "#, + true, + ); + + settings.add_bool( + "enable_nan_canonicalization", + r#" + Enable NaN canonicalization + + This replaces NaNs with a single canonical value, for users requiring + entirely deterministic WebAssembly computation. This is not required + by the WebAssembly spec, so it is not enabled by default. + "#, + false, + ); + + settings.add_bool( + "enable_pinned_reg", + r#"Enable the use of the pinned register. + + This register is excluded from register allocation, and is completely under the control of + the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it + with the set_pinned_reg instruction. + "#, + false, + ); + + settings.add_bool( + "use_pinned_reg_as_heap_base", + r#"Use the pinned register as the heap base. + + Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom + legalization of the `heap_addr` instruction so it will use the pinned register as the heap + base, instead of fetching it from a global value. + + Warning! Enabling this means that the pinned register *must* be maintained to contain the + heap base address at all times, during the lifetime of a function. Using the pinned + register for other purposes when this is set is very likely to cause crashes. + "#, + false, + ); + + settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false); + + settings.add_bool( + "enable_atomics", + "Enable the use of atomic instructions", + true, + ); + + settings.add_bool( + "enable_safepoints", + r#" + Enable safepoint instruction insertions. + + This will allow the emit_stackmaps() function to insert the safepoint + instruction on top of calls and interrupt traps in order to display the + live reference values at that point in the program. + "#, + false, + ); + + settings.add_enum( + "tls_model", + r#" + Defines the model used to perform TLS accesses. + "#, + vec!["none", "elf_gd", "macho", "coff"], + ); + + // Settings specific to the `baldrdash` calling convention. + + settings.add_enum( + "libcall_call_conv", + r#" + Defines the calling convention to use for LibCalls call expansion, + since it may be different from the ISA default calling convention. + + The default value is to use the same calling convention as the ISA + default calling convention. + + This list should be kept in sync with the list of calling + conventions available in isa/call_conv.rs. + "#, + vec![ + "isa_default", + "fast", + "cold", + "system_v", + "windows_fastcall", + "baldrdash_system_v", + "baldrdash_windows", + "probestack", + ], + ); + + settings.add_num( + "baldrdash_prologue_words", + r#" + Number of pointer-sized words pushed by the baldrdash prologue. + + Functions with the `baldrdash` calling convention don't generate their + own prologue and epilogue. They depend on externally generated code + that pushes a fixed number of words in the prologue and restores them + in the epilogue. + + This setting configures the number of pointer-sized words pushed on the + stack when the Cranelift-generated code is entered. This includes the + pushed return address on x86. + "#, + 0, + ); + + // BaldrMonkey requires that not-yet-relocated function addresses be encoded + // as all-ones bitpatterns. + settings.add_bool( + "emit_all_ones_funcaddrs", + "Emit not-yet-relocated function addresses as all-ones bit patterns.", + false, + ); + + // Stack probing options. + + settings.add_bool( + "enable_probestack", + r#" + Enable the use of stack probes, for calling conventions which support this + functionality. + "#, + true, + ); + + settings.add_bool( + "probestack_func_adjusts_sp", + r#" + Set this to true of the stack probe function modifies the stack pointer + itself. + "#, + false, + ); + + settings.add_num( + "probestack_size_log2", + r#" + The log2 of the size of the stack guard region. + + Stack frames larger than this size will have stack overflow checked + by calling the probestack function. + + The default is 12, which translates to a size of 4096. + "#, + 12, + ); + + // Jump table options. + + settings.add_bool( + "enable_jump_tables", + "Enable the use of jump tables in generated machine code.", + true, + ); + + settings.build() +} diff --git a/cranelift/codegen/meta/src/shared/types.rs b/cranelift/codegen/meta/src/shared/types.rs new file mode 100644 index 0000000000..631e5433e9 --- /dev/null +++ b/cranelift/codegen/meta/src/shared/types.rs @@ -0,0 +1,236 @@ +//! This module predefines all the Cranelift scalar types. + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Bool { + /// 1-bit bool. + B1 = 1, + /// 8-bit bool. + B8 = 8, + /// 16-bit bool. + B16 = 16, + /// 32-bit bool. + B32 = 32, + /// 64-bit bool. + B64 = 64, + /// 128-bit bool. + B128 = 128, +} + +/// This provides an iterator through all of the supported bool variants. +pub(crate) struct BoolIterator { + index: u8, +} + +impl BoolIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for BoolIterator { + type Item = Bool; + fn next(&mut self) -> Option { + let res = match self.index { + 0 => Some(Bool::B1), + 1 => Some(Bool::B8), + 2 => Some(Bool::B16), + 3 => Some(Bool::B32), + 4 => Some(Bool::B64), + 5 => Some(Bool::B128), + _ => return None, + }; + self.index += 1; + res + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Int { + /// 8-bit int. + I8 = 8, + /// 16-bit int. + I16 = 16, + /// 32-bit int. + I32 = 32, + /// 64-bit int. + I64 = 64, + /// 128-bit int. + I128 = 128, +} + +/// This provides an iterator through all of the supported int variants. +pub(crate) struct IntIterator { + index: u8, +} + +impl IntIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for IntIterator { + type Item = Int; + fn next(&mut self) -> Option { + let res = match self.index { + 0 => Some(Int::I8), + 1 => Some(Int::I16), + 2 => Some(Int::I32), + 3 => Some(Int::I64), + 4 => Some(Int::I128), + _ => return None, + }; + self.index += 1; + res + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Float { + F32 = 32, + F64 = 64, +} + +/// Iterator through the variants of the Float enum. +pub(crate) struct FloatIterator { + index: u8, +} + +impl FloatIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +/// This provides an iterator through all of the supported float variants. +impl Iterator for FloatIterator { + type Item = Float; + fn next(&mut self) -> Option { + let res = match self.index { + 0 => Some(Float::F32), + 1 => Some(Float::F64), + _ => return None, + }; + self.index += 1; + res + } +} + +/// A type representing CPU flags. +/// +/// Flags can't be stored in memory. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Flag { + /// CPU flags from an integer comparison. + IFlags, + /// CPU flags from a floating point comparison. + FFlags, +} + +/// Iterator through the variants of the Flag enum. +pub(crate) struct FlagIterator { + index: u8, +} + +impl FlagIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for FlagIterator { + type Item = Flag; + fn next(&mut self) -> Option { + let res = match self.index { + 0 => Some(Flag::IFlags), + 1 => Some(Flag::FFlags), + _ => return None, + }; + self.index += 1; + res + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Reference { + /// 32-bit reference. + R32 = 32, + /// 64-bit reference. + R64 = 64, +} + +/// This provides an iterator through all of the supported reference variants. +pub(crate) struct ReferenceIterator { + index: u8, +} + +impl ReferenceIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for ReferenceIterator { + type Item = Reference; + fn next(&mut self) -> Option { + let res = match self.index { + 0 => Some(Reference::R32), + 1 => Some(Reference::R64), + _ => return None, + }; + self.index += 1; + res + } +} + +#[cfg(test)] +mod iter_tests { + use super::*; + + #[test] + fn bool_iter_works() { + let mut bool_iter = BoolIterator::new(); + assert_eq!(bool_iter.next(), Some(Bool::B1)); + assert_eq!(bool_iter.next(), Some(Bool::B8)); + assert_eq!(bool_iter.next(), Some(Bool::B16)); + assert_eq!(bool_iter.next(), Some(Bool::B32)); + assert_eq!(bool_iter.next(), Some(Bool::B64)); + assert_eq!(bool_iter.next(), Some(Bool::B128)); + assert_eq!(bool_iter.next(), None); + } + + #[test] + fn int_iter_works() { + let mut int_iter = IntIterator::new(); + assert_eq!(int_iter.next(), Some(Int::I8)); + assert_eq!(int_iter.next(), Some(Int::I16)); + assert_eq!(int_iter.next(), Some(Int::I32)); + assert_eq!(int_iter.next(), Some(Int::I64)); + assert_eq!(int_iter.next(), Some(Int::I128)); + assert_eq!(int_iter.next(), None); + } + + #[test] + fn float_iter_works() { + let mut float_iter = FloatIterator::new(); + assert_eq!(float_iter.next(), Some(Float::F32)); + assert_eq!(float_iter.next(), Some(Float::F64)); + assert_eq!(float_iter.next(), None); + } + + #[test] + fn flag_iter_works() { + let mut flag_iter = FlagIterator::new(); + assert_eq!(flag_iter.next(), Some(Flag::IFlags)); + assert_eq!(flag_iter.next(), Some(Flag::FFlags)); + assert_eq!(flag_iter.next(), None); + } + + #[test] + fn reference_iter_works() { + let mut reference_iter = ReferenceIterator::new(); + assert_eq!(reference_iter.next(), Some(Reference::R32)); + assert_eq!(reference_iter.next(), Some(Reference::R64)); + assert_eq!(reference_iter.next(), None); + } +} diff --git a/cranelift/codegen/meta/src/srcgen.rs b/cranelift/codegen/meta/src/srcgen.rs new file mode 100644 index 0000000000..ad8db175d7 --- /dev/null +++ b/cranelift/codegen/meta/src/srcgen.rs @@ -0,0 +1,484 @@ +//! Source code generator. +//! +//! The `srcgen` module contains generic helper routines and classes for +//! generating source code. + +#![macro_use] + +use std::cmp; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::io::Write; +use std::path; + +use crate::error; + +static SHIFTWIDTH: usize = 4; + +/// A macro that simplifies the usage of the Formatter by allowing format +/// strings. +macro_rules! fmtln { + ($fmt:ident, $fmtstring:expr, $($fmtargs:expr),*) => { + $fmt.line(format!($fmtstring, $($fmtargs),*)); + }; + + ($fmt:ident, $arg:expr) => { + $fmt.line($arg); + }; + + ($_:tt, $($args:expr),+) => { + compile_error!("This macro requires at least two arguments: the Formatter instance and a format string."); + }; + + ($_:tt) => { + compile_error!("This macro requires at least two arguments: the Formatter instance and a format string."); + }; +} + +pub(crate) struct Formatter { + indent: usize, + lines: Vec, +} + +impl Formatter { + /// Source code formatter class. Used to collect source code to be written + /// to a file, and keep track of indentation. + pub fn new() -> Self { + Self { + indent: 0, + lines: Vec::new(), + } + } + + /// Increase current indentation level by one. + pub fn indent_push(&mut self) { + self.indent += 1; + } + + /// Decrease indentation by one level. + pub fn indent_pop(&mut self) { + assert!(self.indent > 0, "Already at top level indentation"); + self.indent -= 1; + } + + pub fn indent T>(&mut self, f: F) -> T { + self.indent_push(); + let ret = f(self); + self.indent_pop(); + ret + } + + /// Get the current whitespace indentation in the form of a String. + fn get_indent(&self) -> String { + if self.indent == 0 { + String::new() + } else { + format!("{:-1$}", " ", self.indent * SHIFTWIDTH) + } + } + + /// Get a string containing whitespace outdented one level. Used for + /// lines of code that are inside a single indented block. + fn get_outdent(&mut self) -> String { + self.indent_pop(); + let s = self.get_indent(); + self.indent_push(); + s + } + + /// Add an indented line. + pub fn line(&mut self, contents: impl AsRef) { + let indented_line = format!("{}{}\n", self.get_indent(), contents.as_ref()); + self.lines.push(indented_line); + } + + /// Pushes an empty line. + pub fn empty_line(&mut self) { + self.lines.push("\n".to_string()); + } + + /// Emit a line outdented one level. + pub fn outdented_line(&mut self, s: &str) { + let new_line = format!("{}{}\n", self.get_outdent(), s); + self.lines.push(new_line); + } + + /// Write `self.lines` to a file. + pub fn update_file( + &self, + filename: impl AsRef, + directory: &str, + ) -> Result<(), error::Error> { + #[cfg(target_family = "windows")] + let path_str = format!("{}\\{}", directory, filename.as_ref()); + #[cfg(not(target_family = "windows"))] + let path_str = format!("{}/{}", directory, filename.as_ref()); + + let path = path::Path::new(&path_str); + let mut f = fs::File::create(path)?; + + for l in self.lines.iter().map(|l| l.as_bytes()) { + f.write_all(l)?; + } + + Ok(()) + } + + /// Add one or more lines after stripping common indentation. + pub fn multi_line(&mut self, s: &str) { + parse_multiline(s).into_iter().for_each(|l| self.line(&l)); + } + + /// Add a comment line. + pub fn comment(&mut self, s: impl AsRef) { + fmtln!(self, "// {}", s.as_ref()); + } + + /// Add a (multi-line) documentation comment. + pub fn doc_comment(&mut self, contents: impl AsRef) { + parse_multiline(contents.as_ref()) + .iter() + .map(|l| { + if l.is_empty() { + "///".into() + } else { + format!("/// {}", l) + } + }) + .for_each(|s| self.line(s.as_str())); + } + + /// Add a match expression. + pub fn add_match(&mut self, m: Match) { + fmtln!(self, "match {} {{", m.expr); + self.indent(|fmt| { + for (&(ref fields, ref body), ref names) in m.arms.iter() { + // name { fields } | name { fields } => { body } + let conditions = names + .iter() + .map(|name| { + if !fields.is_empty() { + format!("{} {{ {} }}", name, fields.join(", ")) + } else { + name.clone() + } + }) + .collect::>() + .join(" |\n") + + " => {"; + + fmt.multi_line(&conditions); + fmt.indent(|fmt| { + fmt.line(body); + }); + fmt.line("}"); + } + + // Make sure to include the catch all clause last. + if let Some(body) = m.catch_all { + fmt.line("_ => {"); + fmt.indent(|fmt| { + fmt.line(body); + }); + fmt.line("}"); + } + }); + self.line("}"); + } +} + +/// Compute the indentation of s, or None of an empty line. +fn _indent(s: &str) -> Option { + if s.is_empty() { + None + } else { + let t = s.trim_start(); + Some(s.len() - t.len()) + } +} + +/// Given a multi-line string, split it into a sequence of lines after +/// stripping a common indentation. This is useful for strings defined with +/// doc strings. +fn parse_multiline(s: &str) -> Vec { + // Convert tabs into spaces. + let expanded_tab = format!("{:-1$}", " ", SHIFTWIDTH); + let lines: Vec = s.lines().map(|l| l.replace("\t", &expanded_tab)).collect(); + + // Determine minimum indentation, ignoring the first line and empty lines. + let indent = lines + .iter() + .skip(1) + .filter(|l| !l.trim().is_empty()) + .map(|l| l.len() - l.trim_start().len()) + .min(); + + // Strip off leading blank lines. + let mut lines_iter = lines.iter().skip_while(|l| l.is_empty()); + let mut trimmed = Vec::with_capacity(lines.len()); + + // Remove indentation (first line is special) + if let Some(s) = lines_iter.next().map(|l| l.trim()).map(|l| l.to_string()) { + trimmed.push(s); + } + + // Remove trailing whitespace from other lines. + let mut other_lines = if let Some(indent) = indent { + // Note that empty lines may have fewer than `indent` chars. + lines_iter + .map(|l| &l[cmp::min(indent, l.len())..]) + .map(|l| l.trim_end()) + .map(|l| l.to_string()) + .collect::>() + } else { + lines_iter + .map(|l| l.trim_end()) + .map(|l| l.to_string()) + .collect::>() + }; + + trimmed.append(&mut other_lines); + + // Strip off trailing blank lines. + while let Some(s) = trimmed.pop() { + if s.is_empty() { + continue; + } else { + trimmed.push(s); + break; + } + } + + trimmed +} + +/// Match formatting class. +/// +/// Match objects collect all the information needed to emit a Rust `match` +/// expression, automatically deduplicating overlapping identical arms. +/// +/// Note that this class is ignorant of Rust types, and considers two fields +/// with the same name to be equivalent. BTreeMap/BTreeSet are used to +/// represent the arms in order to make the order deterministic. +pub(crate) struct Match { + expr: String, + arms: BTreeMap<(Vec, String), BTreeSet>, + /// The clause for the placeholder pattern _. + catch_all: Option, +} + +impl Match { + /// Create a new match statement on `expr`. + pub fn new(expr: impl Into) -> Self { + Self { + expr: expr.into(), + arms: BTreeMap::new(), + catch_all: None, + } + } + + fn set_catch_all(&mut self, clause: String) { + assert!(self.catch_all.is_none()); + self.catch_all = Some(clause); + } + + /// Add an arm that reads fields to the Match statement. + pub fn arm, S: Into>(&mut self, name: T, fields: Vec, body: T) { + let name = name.into(); + assert!( + name != "_", + "catch all clause can't extract fields, use arm_no_fields instead." + ); + + let body = body.into(); + let fields = fields.into_iter().map(|x| x.into()).collect(); + let match_arm = self + .arms + .entry((fields, body)) + .or_insert_with(BTreeSet::new); + match_arm.insert(name); + } + + /// Adds an arm that doesn't read anythings from the fields to the Match statement. + pub fn arm_no_fields(&mut self, name: impl Into, body: impl Into) { + let body = body.into(); + + let name = name.into(); + if name == "_" { + self.set_catch_all(body); + return; + } + + let match_arm = self + .arms + .entry((Vec::new(), body)) + .or_insert_with(BTreeSet::new); + match_arm.insert(name); + } +} + +#[cfg(test)] +mod srcgen_tests { + use super::parse_multiline; + use super::Formatter; + use super::Match; + + fn from_raw_string>(s: S) -> Vec { + s.into() + .trim() + .split("\n") + .into_iter() + .map(|x| format!("{}\n", x)) + .collect() + } + + #[test] + fn adding_arms_works() { + let mut m = Match::new("x"); + m.arm("Orange", vec!["a", "b"], "some body"); + m.arm("Yellow", vec!["a", "b"], "some body"); + m.arm("Green", vec!["a", "b"], "different body"); + m.arm("Blue", vec!["x", "y"], "some body"); + assert_eq!(m.arms.len(), 3); + + let mut fmt = Formatter::new(); + fmt.add_match(m); + + let expected_lines = from_raw_string( + r#" +match x { + Green { a, b } => { + different body + } + Orange { a, b } | + Yellow { a, b } => { + some body + } + Blue { x, y } => { + some body + } +} + "#, + ); + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn match_with_catchall_order() { + // The catchall placeholder must be placed after other clauses. + let mut m = Match::new("x"); + m.arm("Orange", vec!["a", "b"], "some body"); + m.arm("Green", vec!["a", "b"], "different body"); + m.arm_no_fields("_", "unreachable!()"); + assert_eq!(m.arms.len(), 2); // catchall is not counted + + let mut fmt = Formatter::new(); + fmt.add_match(m); + + let expected_lines = from_raw_string( + r#" +match x { + Green { a, b } => { + different body + } + Orange { a, b } => { + some body + } + _ => { + unreachable!() + } +} + "#, + ); + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn parse_multiline_works() { + let input = "\n hello\n world\n"; + let expected = vec!["hello", "world"]; + let output = parse_multiline(input); + assert_eq!(output, expected); + } + + #[test] + fn formatter_basic_example_works() { + let mut fmt = Formatter::new(); + fmt.line("Hello line 1"); + fmt.indent_push(); + fmt.comment("Nested comment"); + fmt.indent_pop(); + fmt.line("Back home again"); + let expected_lines = vec![ + "Hello line 1\n", + " // Nested comment\n", + "Back home again\n", + ]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn get_indent_works() { + let mut fmt = Formatter::new(); + let expected_results = vec!["", " ", " ", ""]; + + let actual_results = Vec::with_capacity(4); + (0..3).for_each(|_| { + fmt.get_indent(); + fmt.indent_push(); + }); + (0..3).for_each(|_| fmt.indent_pop()); + fmt.get_indent(); + + actual_results + .into_iter() + .zip(expected_results.into_iter()) + .for_each(|(actual, expected): (String, &str)| assert_eq!(&actual, expected)); + } + + #[test] + fn fmt_can_add_type_to_lines() { + let mut fmt = Formatter::new(); + fmt.line(format!("pub const {}: Type = Type({:#x});", "example", 0,)); + let expected_lines = vec!["pub const example: Type = Type(0x0);\n"]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn fmt_can_add_indented_line() { + let mut fmt = Formatter::new(); + fmt.line("hello"); + fmt.indent_push(); + fmt.line("world"); + let expected_lines = vec!["hello\n", " world\n"]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn fmt_can_add_doc_comments() { + let mut fmt = Formatter::new(); + fmt.doc_comment("documentation\nis\ngood"); + let expected_lines = vec!["/// documentation\n", "/// is\n", "/// good\n"]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn fmt_can_add_doc_comments_with_empty_lines() { + let mut fmt = Formatter::new(); + fmt.doc_comment( + r#"documentation + can be really good. + + If you stick to writing it. +"#, + ); + let expected_lines = from_raw_string( + r#" +/// documentation +/// can be really good. +/// +/// If you stick to writing it."#, + ); + assert_eq!(fmt.lines, expected_lines); + } +} diff --git a/cranelift/codegen/meta/src/unique_table.rs b/cranelift/codegen/meta/src/unique_table.rs new file mode 100644 index 0000000000..65ef7e8b4a --- /dev/null +++ b/cranelift/codegen/meta/src/unique_table.rs @@ -0,0 +1,141 @@ +//! An index-accessed table implementation that avoids duplicate entries. +use std::collections::HashMap; +use std::hash::Hash; +use std::slice; + +/// Collect items into the `table` list, removing duplicates. +pub(crate) struct UniqueTable<'entries, T: Eq + Hash> { + table: Vec<&'entries T>, + map: HashMap<&'entries T, usize>, +} + +impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> { + pub fn new() -> Self { + Self { + table: Vec::new(), + map: HashMap::new(), + } + } + + pub fn add(&mut self, entry: &'entries T) -> usize { + match self.map.get(&entry) { + None => { + let i = self.table.len(); + self.table.push(entry); + self.map.insert(entry, i); + i + } + Some(&i) => i, + } + } + + pub fn len(&self) -> usize { + self.table.len() + } + pub fn get(&self, index: usize) -> &T { + self.table[index] + } + pub fn iter(&self) -> slice::Iter<&'entries T> { + self.table.iter() + } +} + +/// A table of sequences which tries to avoid common subsequences. +pub(crate) struct UniqueSeqTable { + table: Vec, +} + +impl UniqueSeqTable { + pub fn new() -> Self { + Self { table: Vec::new() } + } + pub fn add(&mut self, values: &[T]) -> usize { + if values.is_empty() { + return 0; + } + if let Some(offset) = find_subsequence(values, &self.table) { + offset + } else { + let table_len = self.table.len(); + + // Try to put in common the last elements of the table if they're a prefix of the new + // sequence. + // + // We know there wasn't a full match, so the best prefix we can hope to find contains + // all the values but the last one. + let mut start_from = usize::min(table_len, values.len() - 1); + while start_from != 0 { + // Loop invariant: start_from <= table_len, so table_len - start_from >= 0. + if values[0..start_from] == self.table[table_len - start_from..table_len] { + break; + } + start_from -= 1; + } + + self.table + .extend(values[start_from..values.len()].iter().cloned()); + table_len - start_from + } + } + pub fn len(&self) -> usize { + self.table.len() + } + pub fn iter(&self) -> slice::Iter { + self.table.iter() + } +} + +/// Try to find the subsequence `sub` in the `whole` sequence. Returns None if +/// it's not been found, or Some(index) if it has been. Naive implementation +/// until proven we need something better. +fn find_subsequence(sub: &[T], whole: &[T]) -> Option { + assert!(!sub.is_empty()); + // We want i + sub.len() <= whole.len(), i.e. i < whole.len() + 1 - sub.len(). + if whole.len() < sub.len() { + return None; + } + let max = whole.len() - sub.len(); + for i in 0..=max { + if whole[i..i + sub.len()] == sub[..] { + return Some(i); + } + } + None +} + +#[test] +fn test_find_subsequence() { + assert_eq!(find_subsequence(&vec![1], &vec![4]), None); + assert_eq!(find_subsequence(&vec![1], &vec![1]), Some(0)); + assert_eq!(find_subsequence(&vec![1, 2], &vec![1]), None); + assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 2]), Some(0)); + assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 3]), None); + assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 2]), Some(1)); + assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1]), None); + assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1, 2]), Some(3)); + assert_eq!( + find_subsequence(&vec![1, 1, 3], &vec![1, 1, 1, 3, 3]), + Some(1) + ); +} + +#[test] +fn test_optimal_add() { + let mut seq_table = UniqueSeqTable::new(); + // [0, 1, 2, 3] + assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0); + assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0); + assert_eq!(seq_table.add(&vec![1, 2, 3]), 1); + assert_eq!(seq_table.add(&vec![2, 3]), 2); + assert_eq!(seq_table.len(), 4); + // [0, 1, 2, 3, 4] + assert_eq!(seq_table.add(&vec![2, 3, 4]), 2); + assert_eq!(seq_table.len(), 5); + // [0, 1, 2, 3, 4, 6, 5, 7] + assert_eq!(seq_table.add(&vec![4, 6, 5, 7]), 4); + assert_eq!(seq_table.len(), 8); + // [0, 1, 2, 3, 4, 6, 5, 7, 8, 2, 3, 4] + assert_eq!(seq_table.add(&vec![8, 2, 3, 4]), 8); + assert_eq!(seq_table.add(&vec![8]), 8); + assert_eq!(seq_table.len(), 12); +} diff --git a/cranelift/codegen/shared/Cargo.toml b/cranelift/codegen/shared/Cargo.toml new file mode 100644 index 0000000000..42207d427a --- /dev/null +++ b/cranelift/codegen/shared/Cargo.toml @@ -0,0 +1,11 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-codegen-shared" +version = "0.59.0" +description = "For code shared between cranelift-codegen-meta and cranelift-codegen" +license = "Apache-2.0 WITH LLVM-exception" +repository = "https://github.com/bytecodealliance/cranelift" +readme = "README.md" +edition = "2018" + +# Since this is a shared dependency of several packages, please strive to keep this dependency-free. diff --git a/cranelift/codegen/shared/LICENSE b/cranelift/codegen/shared/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/codegen/shared/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/codegen/shared/README.md b/cranelift/codegen/shared/README.md new file mode 100644 index 0000000000..54f9f5d6d2 --- /dev/null +++ b/cranelift/codegen/shared/README.md @@ -0,0 +1,2 @@ +This crate contains shared definitions for use in both `cranelift-codegen-meta` and `cranelift +-codegen`. diff --git a/cranelift/codegen/shared/src/condcodes.rs b/cranelift/codegen/shared/src/condcodes.rs new file mode 100644 index 0000000000..03ae865ce4 --- /dev/null +++ b/cranelift/codegen/shared/src/condcodes.rs @@ -0,0 +1,405 @@ +//! Condition codes for the Cranelift code generator. +//! +//! A condition code here is an enumerated type that determined how to compare two numbers. There +//! are different rules for comparing integers and floating point numbers, so they use different +//! condition codes. + +use core::fmt::{self, Display, Formatter}; +use core::str::FromStr; + +/// Common traits of condition codes. +pub trait CondCode: Copy { + /// Get the inverse condition code of `self`. + /// + /// The inverse condition code produces the opposite result for all comparisons. + /// That is, `cmp CC, x, y` is true if and only if `cmp CC.inverse(), x, y` is false. + #[must_use] + fn inverse(self) -> Self; + + /// Get the reversed condition code for `self`. + /// + /// The reversed condition code produces the same result as swapping `x` and `y` in the + /// comparison. That is, `cmp CC, x, y` is the same as `cmp CC.reverse(), y, x`. + #[must_use] + fn reverse(self) -> Self; +} + +/// Condition code for comparing integers. +/// +/// This condition code is used by the `icmp` instruction to compare integer values. There are +/// separate codes for comparing the integers as signed or unsigned numbers where it makes a +/// difference. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum IntCC { + /// `==`. + Equal, + /// `!=`. + NotEqual, + /// Signed `<`. + SignedLessThan, + /// Signed `>=`. + SignedGreaterThanOrEqual, + /// Signed `>`. + SignedGreaterThan, + /// Signed `<=`. + SignedLessThanOrEqual, + /// Unsigned `<`. + UnsignedLessThan, + /// Unsigned `>=`. + UnsignedGreaterThanOrEqual, + /// Unsigned `>`. + UnsignedGreaterThan, + /// Unsigned `<=`. + UnsignedLessThanOrEqual, + /// Signed Overflow. + Overflow, + /// Signed No Overflow. + NotOverflow, +} + +impl CondCode for IntCC { + fn inverse(self) -> Self { + use self::IntCC::*; + match self { + Equal => NotEqual, + NotEqual => Equal, + SignedLessThan => SignedGreaterThanOrEqual, + SignedGreaterThanOrEqual => SignedLessThan, + SignedGreaterThan => SignedLessThanOrEqual, + SignedLessThanOrEqual => SignedGreaterThan, + UnsignedLessThan => UnsignedGreaterThanOrEqual, + UnsignedGreaterThanOrEqual => UnsignedLessThan, + UnsignedGreaterThan => UnsignedLessThanOrEqual, + UnsignedLessThanOrEqual => UnsignedGreaterThan, + Overflow => NotOverflow, + NotOverflow => Overflow, + } + } + + fn reverse(self) -> Self { + use self::IntCC::*; + match self { + Equal => Equal, + NotEqual => NotEqual, + SignedGreaterThan => SignedLessThan, + SignedGreaterThanOrEqual => SignedLessThanOrEqual, + SignedLessThan => SignedGreaterThan, + SignedLessThanOrEqual => SignedGreaterThanOrEqual, + UnsignedGreaterThan => UnsignedLessThan, + UnsignedGreaterThanOrEqual => UnsignedLessThanOrEqual, + UnsignedLessThan => UnsignedGreaterThan, + UnsignedLessThanOrEqual => UnsignedGreaterThanOrEqual, + Overflow => Overflow, + NotOverflow => NotOverflow, + } + } +} + +impl IntCC { + /// Get the corresponding IntCC with the equal component removed. + /// For conditions without a zero component, this is a no-op. + pub fn without_equal(self) -> Self { + use self::IntCC::*; + match self { + SignedGreaterThan | SignedGreaterThanOrEqual => SignedGreaterThan, + SignedLessThan | SignedLessThanOrEqual => SignedLessThan, + UnsignedGreaterThan | UnsignedGreaterThanOrEqual => UnsignedGreaterThan, + UnsignedLessThan | UnsignedLessThanOrEqual => UnsignedLessThan, + _ => self, + } + } + + /// Get the corresponding IntCC with the signed component removed. + /// For conditions without a signed component, this is a no-op. + pub fn unsigned(self) -> Self { + use self::IntCC::*; + match self { + SignedGreaterThan | UnsignedGreaterThan => UnsignedGreaterThan, + SignedGreaterThanOrEqual | UnsignedGreaterThanOrEqual => UnsignedGreaterThanOrEqual, + SignedLessThan | UnsignedLessThan => UnsignedLessThan, + SignedLessThanOrEqual | UnsignedLessThanOrEqual => UnsignedLessThanOrEqual, + _ => self, + } + } + + /// Get the corresponding string condition code for the IntCC object. + pub fn to_static_str(self) -> &'static str { + use self::IntCC::*; + match self { + Equal => "eq", + NotEqual => "ne", + SignedGreaterThan => "sgt", + SignedGreaterThanOrEqual => "sge", + SignedLessThan => "slt", + SignedLessThanOrEqual => "sle", + UnsignedGreaterThan => "ugt", + UnsignedGreaterThanOrEqual => "uge", + UnsignedLessThan => "ult", + UnsignedLessThanOrEqual => "ule", + Overflow => "of", + NotOverflow => "nof", + } + } +} + +impl Display for IntCC { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_str(self.to_static_str()) + } +} + +impl FromStr for IntCC { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::IntCC::*; + match s { + "eq" => Ok(Equal), + "ne" => Ok(NotEqual), + "sge" => Ok(SignedGreaterThanOrEqual), + "sgt" => Ok(SignedGreaterThan), + "sle" => Ok(SignedLessThanOrEqual), + "slt" => Ok(SignedLessThan), + "uge" => Ok(UnsignedGreaterThanOrEqual), + "ugt" => Ok(UnsignedGreaterThan), + "ule" => Ok(UnsignedLessThanOrEqual), + "ult" => Ok(UnsignedLessThan), + "of" => Ok(Overflow), + "nof" => Ok(NotOverflow), + _ => Err(()), + } + } +} + +/// Condition code for comparing floating point numbers. +/// +/// This condition code is used by the `fcmp` instruction to compare floating point values. Two +/// IEEE floating point values relate in exactly one of four ways: +/// +/// 1. `UN` - unordered when either value is NaN. +/// 2. `EQ` - equal numerical value. +/// 3. `LT` - `x` is less than `y`. +/// 4. `GT` - `x` is greater than `y`. +/// +/// Note that `0.0` and `-0.0` relate as `EQ` because they both represent the number 0. +/// +/// The condition codes described here are used to produce a single boolean value from the +/// comparison. The 14 condition codes here cover every possible combination of the relation above +/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum FloatCC { + /// EQ | LT | GT + Ordered, + /// UN + Unordered, + + /// EQ + Equal, + /// The C '!=' operator is the inverse of '==': `NotEqual`. + /// UN | LT | GT + NotEqual, + /// LT | GT + OrderedNotEqual, + /// UN | EQ + UnorderedOrEqual, + + /// LT + LessThan, + /// LT | EQ + LessThanOrEqual, + /// GT + GreaterThan, + /// GT | EQ + GreaterThanOrEqual, + + /// UN | LT + UnorderedOrLessThan, + /// UN | LT | EQ + UnorderedOrLessThanOrEqual, + /// UN | GT + UnorderedOrGreaterThan, + /// UN | GT | EQ + UnorderedOrGreaterThanOrEqual, +} + +impl CondCode for FloatCC { + fn inverse(self) -> Self { + use self::FloatCC::*; + match self { + Ordered => Unordered, + Unordered => Ordered, + Equal => NotEqual, + NotEqual => Equal, + OrderedNotEqual => UnorderedOrEqual, + UnorderedOrEqual => OrderedNotEqual, + LessThan => UnorderedOrGreaterThanOrEqual, + LessThanOrEqual => UnorderedOrGreaterThan, + GreaterThan => UnorderedOrLessThanOrEqual, + GreaterThanOrEqual => UnorderedOrLessThan, + UnorderedOrLessThan => GreaterThanOrEqual, + UnorderedOrLessThanOrEqual => GreaterThan, + UnorderedOrGreaterThan => LessThanOrEqual, + UnorderedOrGreaterThanOrEqual => LessThan, + } + } + fn reverse(self) -> Self { + use self::FloatCC::*; + match self { + Ordered => Ordered, + Unordered => Unordered, + Equal => Equal, + NotEqual => NotEqual, + OrderedNotEqual => OrderedNotEqual, + UnorderedOrEqual => UnorderedOrEqual, + LessThan => GreaterThan, + LessThanOrEqual => GreaterThanOrEqual, + GreaterThan => LessThan, + GreaterThanOrEqual => LessThanOrEqual, + UnorderedOrLessThan => UnorderedOrGreaterThan, + UnorderedOrLessThanOrEqual => UnorderedOrGreaterThanOrEqual, + UnorderedOrGreaterThan => UnorderedOrLessThan, + UnorderedOrGreaterThanOrEqual => UnorderedOrLessThanOrEqual, + } + } +} + +impl Display for FloatCC { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + use self::FloatCC::*; + f.write_str(match *self { + Ordered => "ord", + Unordered => "uno", + Equal => "eq", + NotEqual => "ne", + OrderedNotEqual => "one", + UnorderedOrEqual => "ueq", + LessThan => "lt", + LessThanOrEqual => "le", + GreaterThan => "gt", + GreaterThanOrEqual => "ge", + UnorderedOrLessThan => "ult", + UnorderedOrLessThanOrEqual => "ule", + UnorderedOrGreaterThan => "ugt", + UnorderedOrGreaterThanOrEqual => "uge", + }) + } +} + +impl FromStr for FloatCC { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::FloatCC::*; + match s { + "ord" => Ok(Ordered), + "uno" => Ok(Unordered), + "eq" => Ok(Equal), + "ne" => Ok(NotEqual), + "one" => Ok(OrderedNotEqual), + "ueq" => Ok(UnorderedOrEqual), + "lt" => Ok(LessThan), + "le" => Ok(LessThanOrEqual), + "gt" => Ok(GreaterThan), + "ge" => Ok(GreaterThanOrEqual), + "ult" => Ok(UnorderedOrLessThan), + "ule" => Ok(UnorderedOrLessThanOrEqual), + "ugt" => Ok(UnorderedOrGreaterThan), + "uge" => Ok(UnorderedOrGreaterThanOrEqual), + _ => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::string::ToString; + + static INT_ALL: [IntCC; 12] = [ + IntCC::Equal, + IntCC::NotEqual, + IntCC::SignedLessThan, + IntCC::SignedGreaterThanOrEqual, + IntCC::SignedGreaterThan, + IntCC::SignedLessThanOrEqual, + IntCC::UnsignedLessThan, + IntCC::UnsignedGreaterThanOrEqual, + IntCC::UnsignedGreaterThan, + IntCC::UnsignedLessThanOrEqual, + IntCC::Overflow, + IntCC::NotOverflow, + ]; + + #[test] + fn int_inverse() { + for r in &INT_ALL { + let cc = *r; + let inv = cc.inverse(); + assert!(cc != inv); + assert_eq!(inv.inverse(), cc); + } + } + + #[test] + fn int_reverse() { + for r in &INT_ALL { + let cc = *r; + let rev = cc.reverse(); + assert_eq!(rev.reverse(), cc); + } + } + + #[test] + fn int_display() { + for r in &INT_ALL { + let cc = *r; + assert_eq!(cc.to_string().parse(), Ok(cc)); + } + assert_eq!("bogus".parse::(), Err(())); + } + + static FLOAT_ALL: [FloatCC; 14] = [ + FloatCC::Ordered, + FloatCC::Unordered, + FloatCC::Equal, + FloatCC::NotEqual, + FloatCC::OrderedNotEqual, + FloatCC::UnorderedOrEqual, + FloatCC::LessThan, + FloatCC::LessThanOrEqual, + FloatCC::GreaterThan, + FloatCC::GreaterThanOrEqual, + FloatCC::UnorderedOrLessThan, + FloatCC::UnorderedOrLessThanOrEqual, + FloatCC::UnorderedOrGreaterThan, + FloatCC::UnorderedOrGreaterThanOrEqual, + ]; + + #[test] + fn float_inverse() { + for r in &FLOAT_ALL { + let cc = *r; + let inv = cc.inverse(); + assert!(cc != inv); + assert_eq!(inv.inverse(), cc); + } + } + + #[test] + fn float_reverse() { + for r in &FLOAT_ALL { + let cc = *r; + let rev = cc.reverse(); + assert_eq!(rev.reverse(), cc); + } + } + + #[test] + fn float_display() { + for r in &FLOAT_ALL { + let cc = *r; + assert_eq!(cc.to_string().parse(), Ok(cc)); + } + assert_eq!("bogus".parse::(), Err(())); + } +} diff --git a/cranelift/codegen/shared/src/constant_hash.rs b/cranelift/codegen/shared/src/constant_hash.rs new file mode 100644 index 0000000000..ceac8e2722 --- /dev/null +++ b/cranelift/codegen/shared/src/constant_hash.rs @@ -0,0 +1,81 @@ +//! Build support for precomputed constant hash tables. +//! +//! This module can generate constant hash tables using open addressing and quadratic probing. +//! +//! The hash tables are arrays that are guaranteed to: +//! +//! - Have a power-of-two size. +//! - Contain at least one empty slot. +//! +//! This module provides build meta support for lookups in these tables, as well as the shared hash +//! function used for probing. + +use std::iter; + +/// A primitive hash function for matching opcodes. +pub fn simple_hash(s: &str) -> usize { + let mut h: u32 = 5381; + for c in s.chars() { + h = (h ^ c as u32).wrapping_add(h.rotate_right(6)); + } + h as usize +} + +/// Compute an open addressed, quadratically probed hash table containing +/// `items`. The returned table is a list containing the elements of the +/// iterable `items` and `None` in unused slots. +#[allow(clippy::float_arithmetic)] +pub fn generate_table<'cont, T, I: iter::Iterator, H: Fn(&T) -> usize>( + items: I, + num_items: usize, + hash_function: H, +) -> Vec> { + let size = (1.20 * num_items as f64) as usize; + + // Probing code's stop condition relies on the table having one vacant entry at least. + let size = if size.is_power_of_two() { + size * 2 + } else { + size.next_power_of_two() + }; + + let mut table = vec![None; size]; + + for i in items { + let mut h = hash_function(&i) % size; + let mut s = 0; + while table[h].is_some() { + s += 1; + h = (h + s) % size; + } + table[h] = Some(i); + } + + table +} + +#[cfg(test)] +mod tests { + use super::{generate_table, simple_hash}; + + #[test] + fn basic() { + assert_eq!(simple_hash("Hello"), 0x2fa70c01); + assert_eq!(simple_hash("world"), 0x5b0c31d5); + } + + #[test] + fn test_generate_table() { + let v = vec!["Hello".to_string(), "world".to_string()]; + let table = generate_table(v.iter(), v.len(), |s| simple_hash(&s)); + assert_eq!( + table, + vec![ + None, + Some(&"Hello".to_string()), + Some(&"world".to_string()), + None + ] + ); + } +} diff --git a/cranelift/codegen/shared/src/constants.rs b/cranelift/codegen/shared/src/constants.rs new file mode 100644 index 0000000000..b3f1377856 --- /dev/null +++ b/cranelift/codegen/shared/src/constants.rs @@ -0,0 +1,30 @@ +//! This module contains constants that are shared between the codegen and the meta crate, so they +//! are kept in sync. + +// Numbering scheme for value types: +// +// 0: Void +// 0x01-0x6f: Special types +// 0x70-0x7d: Lane types +// 0x7e-0x7f: Reference types +// 0x80-0xff: Vector types +// +// Vector types are encoded with the lane type in the low 4 bits and log2(lanes) +// in the high 4 bits, giving a range of 2-256 lanes. + +/// Start of the lane types. +pub const LANE_BASE: u8 = 0x70; + +/// Base for reference types. +pub const REFERENCE_BASE: u8 = 0x7E; + +/// Start of the 2-lane vector types. +pub const VECTOR_BASE: u8 = 0x80; + +// Some constants about register classes and types. + +/// Guaranteed maximum number of top-level register classes with pressure tracking in any ISA. +pub const MAX_TRACKED_TOP_RCS: usize = 4; + +/// Guaranteed maximum number of register classes in any ISA. +pub const MAX_NUM_REG_CLASSES: usize = 32; diff --git a/cranelift/codegen/shared/src/isa/mod.rs b/cranelift/codegen/shared/src/isa/mod.rs new file mode 100644 index 0000000000..4d8e485f6c --- /dev/null +++ b/cranelift/codegen/shared/src/isa/mod.rs @@ -0,0 +1,3 @@ +//! Shared ISA-specific definitions. + +pub mod x86; diff --git a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs new file mode 100644 index 0000000000..9edb2a6e6f --- /dev/null +++ b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs @@ -0,0 +1,419 @@ +//! Provides a named interface to the `u16` Encoding bits. + +use std::ops::RangeInclusive; + +/// Named interface to the `u16` Encoding bits, representing an opcode. +/// +/// Cranelift requires each recipe to have a single encoding size in bytes. +/// X86 opcodes are variable length, so we use separate recipes for different +/// styles of opcodes and prefixes. The opcode format is indicated by the +/// recipe name prefix. +/// +/// VEX/XOP and EVEX prefixes are not yet supported. +/// Encodings using any of these prefixes are represented by separate recipes. +/// +/// The encoding bits are: +/// +/// 0-7: The opcode byte . +/// 8-9: pp, mandatory prefix: +/// 00: none (Op*) +/// 01: 66 (Mp*) +/// 10: F3 (Mp*) +/// 11: F2 (Mp*) +/// 10-11: mm, opcode map: +/// 00: (Op1/Mp1) +/// 01: 0F (Op2/Mp2) +/// 10: 0F 38 (Op3/Mp3) +/// 11: 0F 3A (Op3/Mp3) +/// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. +/// 15: REX.W bit (or VEX.W/E) +#[derive(Copy, Clone, PartialEq)] +pub struct EncodingBits(u16); +const OPCODE: RangeInclusive = 0..=7; +const OPCODE_PREFIX: RangeInclusive = 8..=11; // Includes pp and mm. +const RRR: RangeInclusive = 12..=14; +const REX_W: RangeInclusive = 15..=15; + +impl From for EncodingBits { + fn from(bits: u16) -> Self { + Self(bits) + } +} + +impl EncodingBits { + /// Constructs a new EncodingBits from parts. + pub fn new(op_bytes: &[u8], rrr: u16, rex_w: u16) -> Self { + assert!( + !op_bytes.is_empty(), + "op_bytes must include at least one opcode byte" + ); + let mut new = Self::from(0); + let last_byte = op_bytes[op_bytes.len() - 1]; + new.write(OPCODE, last_byte as u16); + let prefix: u8 = OpcodePrefix::from_opcode(op_bytes).into(); + new.write(OPCODE_PREFIX, prefix as u16); + new.write(RRR, rrr); + new.write(REX_W, rex_w); + new + } + + /// Returns a copy of the EncodingBits with the RRR bits set. + #[inline] + pub fn with_rrr(mut self, rrr: u8) -> Self { + debug_assert_eq!(self.rrr(), 0); + self.write(RRR, rrr.into()); + self + } + + /// Returns a copy of the EncodingBits with the REX.W bit set. + #[inline] + pub fn with_rex_w(mut self) -> Self { + debug_assert_eq!(self.rex_w(), 0); + self.write(REX_W, 1); + self + } + + /// Returns the raw bits. + #[inline] + pub fn bits(self) -> u16 { + self.0 + } + + /// Convenience method for writing bits to specific range. + #[inline] + fn write(&mut self, range: RangeInclusive, value: u16) { + assert!(ExactSizeIterator::len(&range) > 0); + let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. + let mask = (1 << size) - 1; // Generate a bit mask. + debug_assert!( + value <= mask, + "The written value should have fewer than {} bits.", + size + ); + let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask. + self.0 &= mask_complement; // Clear the bits in `range`. + let value = (value & mask) << *range.start(); // Place the value in the correct location. + self.0 |= value; // Modify the bits in `range`. + } + + /// Convenience method for reading bits from a specific range. + #[inline] + fn read(self, range: RangeInclusive) -> u8 { + assert!(ExactSizeIterator::len(&range) > 0); + let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. + debug_assert!(size <= 8, "This structure expects ranges of at most 8 bits"); + let mask = (1 << size) - 1; // Generate a bit mask. + ((self.0 >> *range.start()) & mask) as u8 + } + + /// Instruction opcode byte, without the prefix. + #[inline] + pub fn opcode_byte(self) -> u8 { + self.read(OPCODE) + } + + /// Prefix kind for the instruction, as an enum. + #[inline] + pub fn prefix(self) -> OpcodePrefix { + OpcodePrefix::from(self.read(OPCODE_PREFIX)) + } + + /// Extracts the PP bits of the OpcodePrefix. + #[inline] + pub fn pp(self) -> u8 { + self.prefix().to_primitive() & 0x3 + } + + /// Extracts the MM bits of the OpcodePrefix. + #[inline] + pub fn mm(self) -> u8 { + (self.prefix().to_primitive() >> 2) & 0x3 + } + + /// Bits for the ModR/M byte for certain opcodes. + #[inline] + pub fn rrr(self) -> u8 { + self.read(RRR) + } + + /// REX.W bit (or VEX.W/E). + #[inline] + pub fn rex_w(self) -> u8 { + self.read(REX_W) + } +} + +/// Opcode prefix representation. +/// +/// The prefix type occupies four of the EncodingBits. +#[allow(non_camel_case_types)] +#[allow(missing_docs)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum OpcodePrefix { + Op1, + Mp1_66, + Mp1_f3, + Mp1_f2, + Op2_0f, + Mp2_66_0f, + Mp2_f3_0f, + Mp2_f2_0f, + Op3_0f_38, + Mp3_66_0f_38, + Mp3_f3_0f_38, + Mp3_f2_0f_38, + Op3_0f_3a, + Mp3_66_0f_3a, + Mp3_f3_0f_3a, + Mp3_f2_0f_3a, +} + +impl From for OpcodePrefix { + fn from(n: u8) -> Self { + use OpcodePrefix::*; + match n { + 0b0000 => Op1, + 0b0001 => Mp1_66, + 0b0010 => Mp1_f3, + 0b0011 => Mp1_f2, + 0b0100 => Op2_0f, + 0b0101 => Mp2_66_0f, + 0b0110 => Mp2_f3_0f, + 0b0111 => Mp2_f2_0f, + 0b1000 => Op3_0f_38, + 0b1001 => Mp3_66_0f_38, + 0b1010 => Mp3_f3_0f_38, + 0b1011 => Mp3_f2_0f_38, + 0b1100 => Op3_0f_3a, + 0b1101 => Mp3_66_0f_3a, + 0b1110 => Mp3_f3_0f_3a, + 0b1111 => Mp3_f2_0f_3a, + _ => panic!("invalid opcode prefix"), + } + } +} + +impl Into for OpcodePrefix { + fn into(self) -> u8 { + use OpcodePrefix::*; + match self { + Op1 => 0b0000, + Mp1_66 => 0b0001, + Mp1_f3 => 0b0010, + Mp1_f2 => 0b0011, + Op2_0f => 0b0100, + Mp2_66_0f => 0b0101, + Mp2_f3_0f => 0b0110, + Mp2_f2_0f => 0b0111, + Op3_0f_38 => 0b1000, + Mp3_66_0f_38 => 0b1001, + Mp3_f3_0f_38 => 0b1010, + Mp3_f2_0f_38 => 0b1011, + Op3_0f_3a => 0b1100, + Mp3_66_0f_3a => 0b1101, + Mp3_f3_0f_3a => 0b1110, + Mp3_f2_0f_3a => 0b1111, + } + } +} + +impl OpcodePrefix { + /// Convert an opcode prefix to a `u8`; this is a convenience proxy for `Into`. + fn to_primitive(self) -> u8 { + self.into() + } + + /// Extracts the OpcodePrefix from the opcode. + pub fn from_opcode(op_bytes: &[u8]) -> Self { + assert!(!op_bytes.is_empty(), "at least one opcode byte"); + + let prefix_bytes = &op_bytes[..op_bytes.len() - 1]; + match prefix_bytes { + [] => Self::Op1, + [0x66] => Self::Mp1_66, + [0xf3] => Self::Mp1_f3, + [0xf2] => Self::Mp1_f2, + [0x0f] => Self::Op2_0f, + [0x66, 0x0f] => Self::Mp2_66_0f, + [0xf3, 0x0f] => Self::Mp2_f3_0f, + [0xf2, 0x0f] => Self::Mp2_f2_0f, + [0x0f, 0x38] => Self::Op3_0f_38, + [0x66, 0x0f, 0x38] => Self::Mp3_66_0f_38, + [0xf3, 0x0f, 0x38] => Self::Mp3_f3_0f_38, + [0xf2, 0x0f, 0x38] => Self::Mp3_f2_0f_38, + [0x0f, 0x3a] => Self::Op3_0f_3a, + [0x66, 0x0f, 0x3a] => Self::Mp3_66_0f_3a, + [0xf3, 0x0f, 0x3a] => Self::Mp3_f3_0f_3a, + [0xf2, 0x0f, 0x3a] => Self::Mp3_f2_0f_3a, + _ => { + panic!("unexpected opcode sequence: {:?}", op_bytes); + } + } + } + + /// Returns the recipe name prefix. + /// + /// At the moment, each similar OpcodePrefix group is given its own Recipe. + /// In order to distinguish them, this string is prefixed. + pub fn recipe_name_prefix(self) -> &'static str { + use OpcodePrefix::*; + match self { + Op1 => "Op1", + Op2_0f => "Op2", + Op3_0f_38 | Op3_0f_3a => "Op3", + Mp1_66 | Mp1_f3 | Mp1_f2 => "Mp1", + Mp2_66_0f | Mp2_f3_0f | Mp2_f2_0f => "Mp2", + Mp3_66_0f_38 | Mp3_f3_0f_38 | Mp3_f2_0f_38 => "Mp3", + Mp3_66_0f_3a | Mp3_f3_0f_3a | Mp3_f2_0f_3a => "Mp3", + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper function for prefix_roundtrip() to avoid long lines. + fn test_roundtrip(p: OpcodePrefix) { + assert_eq!(p, OpcodePrefix::from(p.to_primitive())); + } + + /// Tests that to/from each opcode matches. + #[test] + fn prefix_roundtrip() { + test_roundtrip(OpcodePrefix::Op1); + test_roundtrip(OpcodePrefix::Mp1_66); + test_roundtrip(OpcodePrefix::Mp1_f3); + test_roundtrip(OpcodePrefix::Mp1_f2); + test_roundtrip(OpcodePrefix::Op2_0f); + test_roundtrip(OpcodePrefix::Mp2_66_0f); + test_roundtrip(OpcodePrefix::Mp2_f3_0f); + test_roundtrip(OpcodePrefix::Mp2_f2_0f); + test_roundtrip(OpcodePrefix::Op3_0f_38); + test_roundtrip(OpcodePrefix::Mp3_66_0f_38); + test_roundtrip(OpcodePrefix::Mp3_f3_0f_38); + test_roundtrip(OpcodePrefix::Mp3_f2_0f_38); + test_roundtrip(OpcodePrefix::Op3_0f_3a); + test_roundtrip(OpcodePrefix::Mp3_66_0f_3a); + test_roundtrip(OpcodePrefix::Mp3_f3_0f_3a); + test_roundtrip(OpcodePrefix::Mp3_f2_0f_3a); + } + + #[test] + fn prefix_to_name() { + assert_eq!(OpcodePrefix::Op1.recipe_name_prefix(), "Op1"); + assert_eq!(OpcodePrefix::Op2_0f.recipe_name_prefix(), "Op2"); + assert_eq!(OpcodePrefix::Op3_0f_38.recipe_name_prefix(), "Op3"); + assert_eq!(OpcodePrefix::Mp1_66.recipe_name_prefix(), "Mp1"); + assert_eq!(OpcodePrefix::Mp2_66_0f.recipe_name_prefix(), "Mp2"); + assert_eq!(OpcodePrefix::Mp3_66_0f_3a.recipe_name_prefix(), "Mp3"); + } + + /// Tests that the opcode_byte is the lower of the EncodingBits. + #[test] + fn encodingbits_opcode_byte() { + let enc = EncodingBits::from(0x00ff); + assert_eq!(enc.opcode_byte(), 0xff); + assert_eq!(enc.prefix().to_primitive(), 0x0); + assert_eq!(enc.rrr(), 0x0); + assert_eq!(enc.rex_w(), 0x0); + + let enc = EncodingBits::from(0x00cd); + assert_eq!(enc.opcode_byte(), 0xcd); + } + + /// Tests that the OpcodePrefix is encoded correctly. + #[test] + fn encodingbits_prefix() { + let enc = EncodingBits::from(0x0c00); + assert_eq!(enc.opcode_byte(), 0x00); + assert_eq!(enc.prefix().to_primitive(), 0xc); + assert_eq!(enc.prefix(), OpcodePrefix::Op3_0f_3a); + assert_eq!(enc.rrr(), 0x0); + assert_eq!(enc.rex_w(), 0x0); + } + + /// Tests that the PP bits are encoded correctly. + #[test] + fn encodingbits_pp() { + let enc = EncodingBits::from(0x0300); + assert_eq!(enc.opcode_byte(), 0x0); + assert_eq!(enc.pp(), 0x3); + assert_eq!(enc.mm(), 0x0); + assert_eq!(enc.rrr(), 0x0); + assert_eq!(enc.rex_w(), 0x0); + } + + /// Tests that the MM bits are encoded correctly. + #[test] + fn encodingbits_mm() { + let enc = EncodingBits::from(0x0c00); + assert_eq!(enc.opcode_byte(), 0x0); + assert_eq!(enc.pp(), 0x00); + assert_eq!(enc.mm(), 0x3); + assert_eq!(enc.rrr(), 0x0); + assert_eq!(enc.rex_w(), 0x0); + } + + /// Tests that the ModR/M bits are encoded correctly. + #[test] + fn encodingbits_rrr() { + let enc = EncodingBits::from(0x5000); + assert_eq!(enc.opcode_byte(), 0x0); + assert_eq!(enc.prefix().to_primitive(), 0x0); + assert_eq!(enc.rrr(), 0x5); + assert_eq!(enc.rex_w(), 0x0); + } + + /// Tests that the REX.W bit is encoded correctly. + #[test] + fn encodingbits_rex_w() { + let enc = EncodingBits::from(0x8000); + assert_eq!(enc.opcode_byte(), 0x00); + assert_eq!(enc.prefix().to_primitive(), 0x0); + assert_eq!(enc.rrr(), 0x0); + assert_eq!(enc.rex_w(), 0x1); + } + + /// Tests setting and unsetting a bit using EncodingBits::write. + #[test] + fn encodingbits_flip() { + let mut bits = EncodingBits::from(0); + let range = 2..=2; + + bits.write(range.clone(), 1); + assert_eq!(bits.bits(), 0b100); + + bits.write(range, 0); + assert_eq!(bits.bits(), 0b000); + } + + /// Tests a round-trip of EncodingBits from/to a u16 (hardcoded endianness). + #[test] + fn encodingbits_roundtrip() { + let bits: u16 = 0x1234; + assert_eq!(EncodingBits::from(bits).bits(), bits); + } + + #[test] + // I purposely want to divide the bits using the ranges defined above. + #[allow(clippy::inconsistent_digit_grouping)] + fn encodingbits_construction() { + assert_eq!( + EncodingBits::new(&[0x66, 0x40], 5, 1).bits(), + 0b1_101_0001_01000000 // 1 = rex_w, 101 = rrr, 0001 = prefix, 01000000 = opcode + ); + } + + #[test] + #[should_panic] + fn encodingbits_panics_at_write_to_invalid_range() { + EncodingBits::from(0).write(1..=0, 42); + } + + #[test] + #[should_panic] + fn encodingbits_panics_at_read_to_invalid_range() { + EncodingBits::from(0).read(1..=0); + } +} diff --git a/cranelift/codegen/shared/src/isa/x86/mod.rs b/cranelift/codegen/shared/src/isa/x86/mod.rs new file mode 100644 index 0000000000..fb45ae56c3 --- /dev/null +++ b/cranelift/codegen/shared/src/isa/x86/mod.rs @@ -0,0 +1,4 @@ +//! Shared x86-specific definitions. + +mod encoding_bits; +pub use encoding_bits::*; diff --git a/cranelift/codegen/shared/src/lib.rs b/cranelift/codegen/shared/src/lib.rs new file mode 100644 index 0000000000..4a0cf24873 --- /dev/null +++ b/cranelift/codegen/shared/src/lib.rs @@ -0,0 +1,29 @@ +//! This library contains code that is common to both the `cranelift-codegen` and +//! `cranelift-codegen-meta` libraries. + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "std", deny(unstable_features))] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +pub mod condcodes; +pub mod constant_hash; +pub mod constants; +pub mod isa; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/codegen/src/abi.rs b/cranelift/codegen/src/abi.rs new file mode 100644 index 0000000000..3a3ed7a53b --- /dev/null +++ b/cranelift/codegen/src/abi.rs @@ -0,0 +1,230 @@ +//! Common helper code for ABI lowering. +//! +//! This module provides functions and data structures that are useful for implementing the +//! `TargetIsa::legalize_signature()` method. + +use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type}; +use alloc::borrow::Cow; +use alloc::vec::Vec; +use core::cmp::Ordering; + +/// Legalization action to perform on a single argument or return value when converting a +/// signature. +/// +/// An argument may go through a sequence of legalization steps before it reaches the final +/// `Assign` action. +#[derive(Clone, Copy, Debug)] +pub enum ArgAction { + /// Assign the argument to the given location. + Assign(ArgumentLoc), + + /// Convert the argument, then call again. + /// + /// This action can split an integer type into two smaller integer arguments, or it can split a + /// SIMD vector into halves. + Convert(ValueConversion), +} + +impl From for ArgAction { + fn from(x: ArgumentLoc) -> Self { + Self::Assign(x) + } +} + +impl From for ArgAction { + fn from(x: ValueConversion) -> Self { + Self::Convert(x) + } +} + +/// Legalization action to be applied to a value that is being passed to or from a legalized ABI. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ValueConversion { + /// Split an integer types into low and high parts, using `isplit`. + IntSplit, + + /// Split a vector type into halves with identical lane types, using `vsplit`. + VectorSplit, + + /// Bit-cast to an integer type of the same size. + IntBits, + + /// Sign-extend integer value to the required type. + Sext(Type), + + /// Unsigned zero-extend value to the required type. + Uext(Type), +} + +impl ValueConversion { + /// Apply this conversion to a type, return the converted type. + pub fn apply(self, ty: Type) -> Type { + match self { + Self::IntSplit => ty.half_width().expect("Integer type too small to split"), + Self::VectorSplit => ty.half_vector().expect("Not a vector"), + Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"), + Self::Sext(nty) | Self::Uext(nty) => nty, + } + } + + /// Is this a split conversion that results in two arguments? + pub fn is_split(self) -> bool { + match self { + Self::IntSplit | Self::VectorSplit => true, + _ => false, + } + } +} + +/// Common trait for assigning arguments to registers or stack locations. +/// +/// This will be implemented by individual ISAs. +pub trait ArgAssigner { + /// Pick an assignment action for function argument (or return value) `arg`. + fn assign(&mut self, arg: &AbiParam) -> ArgAction; +} + +/// Legalize the arguments in `args` using the given argument assigner. +/// +/// This function can be used for both arguments and return values. +pub fn legalize_args(args: &[AbiParam], aa: &mut AA) -> Option> { + let mut args = Cow::Borrowed(args); + + // Iterate over the arguments. + // We may need to mutate the vector in place, so don't use a normal iterator, and clone the + // argument to avoid holding a reference. + let mut argno = 0; + while let Some(arg) = args.get(argno).cloned() { + // Leave the pre-assigned arguments alone. + // We'll assume that they don't interfere with our assignments. + if arg.location.is_assigned() { + argno += 1; + continue; + } + + match aa.assign(&arg) { + // Assign argument to a location and move on to the next one. + ArgAction::Assign(loc) => { + args.to_mut()[argno].location = loc; + argno += 1; + } + // Split this argument into two smaller ones. Then revisit both. + ArgAction::Convert(conv) => { + let value_type = conv.apply(arg.value_type); + let new_arg = AbiParam { value_type, ..arg }; + args.to_mut()[argno].value_type = value_type; + if conv.is_split() { + args.to_mut().insert(argno + 1, new_arg); + } + } + } + } + + match args { + Cow::Borrowed(_) => None, + Cow::Owned(a) => Some(a), + } +} + +/// Determine the right action to take when passing a `have` value type to a call signature where +/// the next argument is `arg` which has a different value type. +/// +/// The signature legalization process in `legalize_args` above can replace a single argument value +/// with multiple arguments of smaller types. It can also change the type of an integer argument to +/// a larger integer type, requiring the smaller value to be sign- or zero-extended. +/// +/// The legalizer needs to repair the values at all ABI boundaries: +/// +/// - Incoming function arguments to the entry block. +/// - Function arguments passed to a call. +/// - Return values from a call. +/// - Return values passed to a return instruction. +/// +/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer +/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value +/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type +/// for the argument. +/// +/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the +/// desired argument type appears. This will happen when a vector or integer type needs to be split +/// more than once, for example. +pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion { + let have_bits = have.bits(); + let arg_bits = arg.value_type.bits(); + + match have_bits.cmp(&arg_bits) { + // We have fewer bits than the ABI argument. + Ordering::Less => { + debug_assert!( + have.is_int() && arg.value_type.is_int(), + "Can only extend integer values" + ); + match arg.extension { + ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type), + ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type), + _ => panic!("No argument extension specified"), + } + } + // We have the same number of bits as the argument. + Ordering::Equal => { + // This must be an integer vector that is split and then extended. + debug_assert!(arg.value_type.is_int()); + debug_assert!(have.is_vector(), "expected vector type, got {}", have); + ValueConversion::VectorSplit + } + // We have more bits than the argument. + Ordering::Greater => { + if have.is_vector() { + ValueConversion::VectorSplit + } else if have.is_float() { + // Convert a float to int so it can be split the next time. + // ARM would do this to pass an `f64` in two registers. + ValueConversion::IntBits + } else { + ValueConversion::IntSplit + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::types; + use crate::ir::AbiParam; + + #[test] + fn legalize() { + let mut arg = AbiParam::new(types::I32); + + assert_eq!( + legalize_abi_value(types::I64X2, &arg), + ValueConversion::VectorSplit + ); + assert_eq!( + legalize_abi_value(types::I64, &arg), + ValueConversion::IntSplit + ); + + // Vector of integers is broken down, then sign-extended. + arg.extension = ArgumentExtension::Sext; + assert_eq!( + legalize_abi_value(types::I16X4, &arg), + ValueConversion::VectorSplit + ); + assert_eq!( + legalize_abi_value(types::I16.by(2).unwrap(), &arg), + ValueConversion::VectorSplit + ); + assert_eq!( + legalize_abi_value(types::I16, &arg), + ValueConversion::Sext(types::I32) + ); + + // 64-bit float is split as an integer. + assert_eq!( + legalize_abi_value(types::F64, &arg), + ValueConversion::IntBits + ); + } +} diff --git a/cranelift/codegen/src/binemit/memorysink.rs b/cranelift/codegen/src/binemit/memorysink.rs new file mode 100644 index 0000000000..60c7fe251e --- /dev/null +++ b/cranelift/codegen/src/binemit/memorysink.rs @@ -0,0 +1,205 @@ +//! Code sink that writes binary machine code into contiguous memory. +//! +//! The `CodeSink` trait is the most general way of extracting binary machine code from Cranelift, +//! and it is implemented by things like the `test binemit` file test driver to generate +//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of +//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it +//! can't have any generic functions that could be specialized for each `CodeSink` implementation. +//! This results in many virtual function callbacks (one per `put*` call) when +//! `TargetIsa::emit_inst()` is used. +//! +//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to +//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is +//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any +//! relocations to a `RelocSink` trait object. Relocations are less frequent than the +//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe. +use super::{Addend, CodeInfo, CodeOffset, CodeSink, Reloc}; +use crate::binemit::stackmap::Stackmap; +use crate::ir::entities::Value; +use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, SourceLoc, TrapCode}; +use crate::isa::TargetIsa; +use core::ptr::write_unaligned; + +/// A `CodeSink` that writes binary machine code directly into memory. +/// +/// A `MemoryCodeSink` object should be used when emitting a Cranelift IR function into executable +/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make +/// sure to allocate enough memory for the whole function. The number of bytes required is returned +/// by the `Context::compile()` function. +/// +/// Any relocations in the function are forwarded to the `RelocSink` trait object. +/// +/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This +/// is not the right thing to do for cross compilation. +pub struct MemoryCodeSink<'a> { + /// Pointer to start of sink's preallocated memory. + data: *mut u8, + /// Offset is isize because its major consumer needs it in that form. + offset: isize, + relocs: &'a mut dyn RelocSink, + traps: &'a mut dyn TrapSink, + stackmaps: &'a mut dyn StackmapSink, + /// Information about the generated code and read-only data. + pub info: CodeInfo, +} + +impl<'a> MemoryCodeSink<'a> { + /// Create a new memory code sink that writes a function to the memory pointed to by `data`. + /// + /// # Safety + /// + /// This function is unsafe since `MemoryCodeSink` does not perform bounds checking on the + /// memory buffer, and it can't guarantee that the `data` pointer is valid. + pub unsafe fn new( + data: *mut u8, + relocs: &'a mut dyn RelocSink, + traps: &'a mut dyn TrapSink, + stackmaps: &'a mut dyn StackmapSink, + ) -> Self { + Self { + data, + offset: 0, + info: CodeInfo { + code_size: 0, + jumptables_size: 0, + rodata_size: 0, + total_size: 0, + }, + relocs, + traps, + stackmaps, + } + } +} + +/// A trait for receiving relocations for code that is emitted directly into memory. +pub trait RelocSink { + /// Add a relocation referencing an block at the current offset. + fn reloc_block(&mut self, _: CodeOffset, _: Reloc, _: CodeOffset); + + /// Add a relocation referencing an external symbol at the current offset. + fn reloc_external(&mut self, _: CodeOffset, _: Reloc, _: &ExternalName, _: Addend); + + /// Add a relocation referencing a constant. + fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset); + + /// Add a relocation referencing a jump table. + fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable); +} + +/// A trait for receiving trap codes and offsets. +/// +/// If you don't need information about possible traps, you can use the +/// [`NullTrapSink`](NullTrapSink) implementation. +pub trait TrapSink { + /// Add trap information for a specific offset. + fn trap(&mut self, _: CodeOffset, _: SourceLoc, _: TrapCode); +} + +impl<'a> MemoryCodeSink<'a> { + fn write(&mut self, x: T) { + unsafe { + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + write_unaligned(self.data.offset(self.offset) as *mut T, x); + self.offset += core::mem::size_of::() as isize; + } + } +} + +impl<'a> CodeSink for MemoryCodeSink<'a> { + fn offset(&self) -> CodeOffset { + self.offset as CodeOffset + } + + fn put1(&mut self, x: u8) { + self.write(x); + } + + fn put2(&mut self, x: u16) { + self.write(x); + } + + fn put4(&mut self, x: u32) { + self.write(x); + } + + fn put8(&mut self, x: u64) { + self.write(x); + } + + fn reloc_block(&mut self, rel: Reloc, block_offset: CodeOffset) { + let ofs = self.offset(); + self.relocs.reloc_block(ofs, rel, block_offset); + } + + fn reloc_external(&mut self, rel: Reloc, name: &ExternalName, addend: Addend) { + let ofs = self.offset(); + self.relocs.reloc_external(ofs, rel, name, addend); + } + + fn reloc_constant(&mut self, rel: Reloc, constant_offset: ConstantOffset) { + let ofs = self.offset(); + self.relocs.reloc_constant(ofs, rel, constant_offset); + } + + fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) { + let ofs = self.offset(); + self.relocs.reloc_jt(ofs, rel, jt); + } + + fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) { + let ofs = self.offset(); + self.traps.trap(ofs, srcloc, code); + } + + fn begin_jumptables(&mut self) { + self.info.code_size = self.offset(); + } + + fn begin_rodata(&mut self) { + self.info.jumptables_size = self.offset() - self.info.code_size; + } + + fn end_codegen(&mut self) { + self.info.rodata_size = self.offset() - (self.info.jumptables_size + self.info.code_size); + self.info.total_size = self.offset(); + } + + fn add_stackmap(&mut self, val_list: &[Value], func: &Function, isa: &dyn TargetIsa) { + let ofs = self.offset(); + let stackmap = Stackmap::from_values(&val_list, func, isa); + self.stackmaps.add_stackmap(ofs, stackmap); + } +} + +/// A `RelocSink` implementation that does nothing, which is convenient when +/// compiling code that does not relocate anything. +pub struct NullRelocSink {} + +impl RelocSink for NullRelocSink { + fn reloc_block(&mut self, _: u32, _: Reloc, _: u32) {} + fn reloc_external(&mut self, _: u32, _: Reloc, _: &ExternalName, _: i64) {} + fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset) {} + fn reloc_jt(&mut self, _: u32, _: Reloc, _: JumpTable) {} +} + +/// A `TrapSink` implementation that does nothing, which is convenient when +/// compiling code that does not rely on trapping semantics. +pub struct NullTrapSink {} + +impl TrapSink for NullTrapSink { + fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {} +} + +/// A trait for emitting stackmaps. +pub trait StackmapSink { + /// Output a bitmap of the stack representing the live reference variables at this code offset. + fn add_stackmap(&mut self, _: CodeOffset, _: Stackmap); +} + +/// Placeholder StackmapSink that does nothing. +pub struct NullStackmapSink {} + +impl StackmapSink for NullStackmapSink { + fn add_stackmap(&mut self, _: CodeOffset, _: Stackmap) {} +} diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs new file mode 100644 index 0000000000..b52025e887 --- /dev/null +++ b/cranelift/codegen/src/binemit/mod.rs @@ -0,0 +1,246 @@ +//! Binary machine code emission. +//! +//! The `binemit` module contains code for translating Cranelift's intermediate representation into +//! binary machine code. + +mod memorysink; +mod relaxation; +mod shrink; +mod stackmap; + +pub use self::memorysink::{ + MemoryCodeSink, NullRelocSink, NullStackmapSink, NullTrapSink, RelocSink, StackmapSink, + TrapSink, +}; +pub use self::relaxation::relax_branches; +pub use self::shrink::shrink_instructions; +pub use self::stackmap::Stackmap; +use crate::ir::entities::Value; +use crate::ir::{ConstantOffset, ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode}; +use crate::isa::TargetIsa; +pub use crate::regalloc::RegDiversions; +use core::fmt; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// Offset in bytes from the beginning of the function. +/// +/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which +/// depends on the *host* platform, not the *target* platform. +pub type CodeOffset = u32; + +/// Addend to add to the symbol value. +pub type Addend = i64; + +/// Relocation kinds for every ISA +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum Reloc { + /// absolute 4-byte + Abs4, + /// absolute 8-byte + Abs8, + /// x86 PC-relative 4-byte + X86PCRel4, + /// x86 PC-relative 4-byte offset to trailing rodata + X86PCRelRodata4, + /// x86 call to PC-relative 4-byte + X86CallPCRel4, + /// x86 call to PLT-relative 4-byte + X86CallPLTRel4, + /// x86 GOT PC-relative 4-byte + X86GOTPCRel4, + /// Arm32 call target + Arm32Call, + /// Arm64 call target + Arm64Call, + /// RISC-V call target + RiscvCall, + + /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol. + ElfX86_64TlsGd, + + /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry. + MachOX86_64Tlv, +} + +impl fmt::Display for Reloc { + /// Display trait implementation drops the arch, since its used in contexts where the arch is + /// already unambiguous, e.g. clif syntax with isa specified. In other contexts, use Debug. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Abs4 => write!(f, "Abs4"), + Self::Abs8 => write!(f, "Abs8"), + Self::X86PCRel4 => write!(f, "PCRel4"), + Self::X86PCRelRodata4 => write!(f, "PCRelRodata4"), + Self::X86CallPCRel4 => write!(f, "CallPCRel4"), + Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), + Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), + Self::Arm32Call | Self::Arm64Call | Self::RiscvCall => write!(f, "Call"), + + Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), + Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), + } + } +} + +/// Container for information about a vector of compiled code and its supporting read-only data. +/// +/// The code starts at offset 0 and is followed optionally by relocatable jump tables and copyable +/// (raw binary) read-only data. Any padding between sections is always part of the section that +/// precedes the boundary between the sections. +#[derive(PartialEq)] +pub struct CodeInfo { + /// Number of bytes of machine code (the code starts at offset 0). + pub code_size: CodeOffset, + + /// Number of bytes of jumptables. + pub jumptables_size: CodeOffset, + + /// Number of bytes of rodata. + pub rodata_size: CodeOffset, + + /// Number of bytes in total. + pub total_size: CodeOffset, +} + +impl CodeInfo { + /// Offset of any relocatable jump tables, or equal to rodata if there are no jump tables. + pub fn jumptables(&self) -> CodeOffset { + self.code_size + } + + /// Offset of any copyable read-only data, or equal to total_size if there are no rodata. + pub fn rodata(&self) -> CodeOffset { + self.code_size + self.jumptables_size + } +} + +/// Abstract interface for adding bytes to the code segment. +/// +/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations +/// which are locations in the code section that need to be fixed up when linking. +pub trait CodeSink { + /// Get the current position. + fn offset(&self) -> CodeOffset; + + /// Add 1 byte to the code section. + fn put1(&mut self, _: u8); + + /// Add 2 bytes to the code section. + fn put2(&mut self, _: u16); + + /// Add 4 bytes to the code section. + fn put4(&mut self, _: u32); + + /// Add 8 bytes to the code section. + fn put8(&mut self, _: u64); + + /// Add a relocation referencing an block at the current offset. + fn reloc_block(&mut self, _: Reloc, _: CodeOffset); + + /// Add a relocation referencing an external symbol plus the addend at the current offset. + fn reloc_external(&mut self, _: Reloc, _: &ExternalName, _: Addend); + + /// Add a relocation referencing a constant. + fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset); + + /// Add a relocation referencing a jump table. + fn reloc_jt(&mut self, _: Reloc, _: JumpTable); + + /// Add trap information for the current offset. + fn trap(&mut self, _: TrapCode, _: SourceLoc); + + /// Machine code output is complete, jump table data may follow. + fn begin_jumptables(&mut self); + + /// Jump table output is complete, raw read-only data may follow. + fn begin_rodata(&mut self); + + /// Read-only data output is complete, we're done. + fn end_codegen(&mut self); + + /// Add a stackmap at the current code offset. + fn add_stackmap(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa); +} + +/// Type of the frame unwind information. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum FrameUnwindKind { + /// Windows fastcall unwinding (as in .pdata). + Fastcall, + /// FDE entry for libunwind (similar to .eh_frame format). + Libunwind, +} + +/// Offset in frame unwind information buffer. +pub type FrameUnwindOffset = usize; + +/// Sink for frame unwind information. +pub trait FrameUnwindSink { + /// Get the current position. + fn len(&self) -> FrameUnwindOffset; + + /// Add bytes to the code section. + fn bytes(&mut self, _: &[u8]); + + /// Reserves bytes in the buffer. + fn reserve(&mut self, _len: usize) {} + + /// Add a relocation entry. + fn reloc(&mut self, _: Reloc, _: FrameUnwindOffset); + + /// Specified offset to main structure. + fn set_entry_offset(&mut self, _: FrameUnwindOffset); +} + +/// Report a bad encoding error. +#[cold] +pub fn bad_encoding(func: &Function, inst: Inst) -> ! { + panic!( + "Bad encoding {} for {}", + func.encodings[inst], + func.dfg.display_inst(inst, None) + ); +} + +/// Emit a function to `sink`, given an instruction emitter function. +/// +/// This function is called from the `TargetIsa::emit_function()` implementations with the +/// appropriate instruction emitter. +pub fn emit_function(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa) +where + CS: CodeSink, + EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa), +{ + let mut divert = RegDiversions::new(); + for block in func.layout.blocks() { + divert.at_block(&func.entry_diversions, block); + debug_assert_eq!(func.offsets[block], sink.offset()); + for inst in func.layout.block_insts(block) { + emit_inst(func, inst, &mut divert, sink, isa); + } + } + + sink.begin_jumptables(); + + // Output jump tables. + for (jt, jt_data) in func.jump_tables.iter() { + let jt_offset = func.jt_offsets[jt]; + for block in jt_data.iter() { + let rel_offset: i32 = func.offsets[*block] as i32 - jt_offset as i32; + sink.put4(rel_offset as u32) + } + } + + sink.begin_rodata(); + + // Output constants. + for (_, constant_data) in func.dfg.constants.iter() { + for byte in constant_data.iter() { + sink.put1(*byte) + } + } + + sink.end_codegen(); +} diff --git a/cranelift/codegen/src/binemit/relaxation.rs b/cranelift/codegen/src/binemit/relaxation.rs new file mode 100644 index 0000000000..abdd778aaf --- /dev/null +++ b/cranelift/codegen/src/binemit/relaxation.rs @@ -0,0 +1,393 @@ +//! Branch relaxation and offset computation. +//! +//! # block header offsets +//! +//! Before we can generate binary machine code for branch instructions, we need to know the final +//! offsets of all the block headers in the function. This information is encoded in the +//! `func.offsets` table. +//! +//! # Branch relaxation +//! +//! Branch relaxation is the process of ensuring that all branches in the function have enough +//! range to encode their destination. It is common to have multiple branch encodings in an ISA. +//! For example, x86 branches can have either an 8-bit or a 32-bit displacement. +//! +//! On RISC architectures, it can happen that conditional branches have a shorter range than +//! unconditional branches: +//! +//! ```clif +//! brz v1, block17 +//! ``` +//! +//! can be transformed into: +//! +//! ```clif +//! brnz v1, block23 +//! jump block17 +//! block23: +//! ``` + +use crate::binemit::{CodeInfo, CodeOffset}; +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList}; +use crate::isa::{EncInfo, TargetIsa}; +use crate::iterators::IteratorExtras; +use crate::regalloc::RegDiversions; +use crate::timing; +use crate::CodegenResult; +use core::convert::TryFrom; +use log::debug; + +/// Relax branches and compute the final layout of block headers in `func`. +/// +/// Fill in the `func.offsets` table so the function is ready for binary emission. +pub fn relax_branches( + func: &mut Function, + _cfg: &mut ControlFlowGraph, + _domtree: &mut DominatorTree, + isa: &dyn TargetIsa, +) -> CodegenResult { + let _tt = timing::relax_branches(); + + let encinfo = isa.encoding_info(); + + // Clear all offsets so we can recognize blocks that haven't been visited yet. + func.offsets.clear(); + func.offsets.resize(func.dfg.num_blocks()); + + // Start by removing redundant jumps. + fold_redundant_jumps(func, _cfg, _domtree); + + // Convert jumps to fallthrough instructions where possible. + fallthroughs(func); + + let mut offset = 0; + let mut divert = RegDiversions::new(); + + // First, compute initial offsets for every block. + { + let mut cur = FuncCursor::new(func); + while let Some(block) = cur.next_block() { + divert.at_block(&cur.func.entry_diversions, block); + cur.func.offsets[block] = offset; + while let Some(inst) = cur.next_inst() { + divert.apply(&cur.func.dfg[inst]); + let enc = cur.func.encodings[inst]; + offset += encinfo.byte_size(enc, inst, &divert, &cur.func); + } + } + } + + // Then, run the relaxation algorithm until it converges. + let mut go_again = true; + while go_again { + go_again = false; + offset = 0; + + // Visit all instructions in layout order. + let mut cur = FuncCursor::new(func); + while let Some(block) = cur.next_block() { + divert.at_block(&cur.func.entry_diversions, block); + + // Record the offset for `block` and make sure we iterate until offsets are stable. + if cur.func.offsets[block] != offset { + cur.func.offsets[block] = offset; + go_again = true; + } + + while let Some(inst) = cur.next_inst() { + divert.apply(&cur.func.dfg[inst]); + + let enc = cur.func.encodings[inst]; + + // See if this is a branch has a range and a destination, and if the target is in + // range. + if let Some(range) = encinfo.branch_range(enc) { + if let Some(dest) = cur.func.dfg[inst].branch_destination() { + let dest_offset = cur.func.offsets[dest]; + if !range.contains(offset, dest_offset) { + offset += + relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa); + continue; + } + } + } + + offset += encinfo.byte_size(enc, inst, &divert, &cur.func); + } + } + } + + let code_size = offset; + let jumptables = offset; + + for (jt, jt_data) in func.jump_tables.iter() { + func.jt_offsets[jt] = offset; + // TODO: this should be computed based on the min size needed to hold the furthest branch. + offset += jt_data.len() as u32 * 4; + } + + let jumptables_size = offset - jumptables; + let rodata = offset; + + for constant in func.dfg.constants.entries_mut() { + constant.set_offset(offset); + offset += + u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32") + } + + let rodata_size = offset - rodata; + + Ok(CodeInfo { + code_size, + jumptables_size, + rodata_size, + total_size: offset, + }) +} + +/// Folds an instruction if it is a redundant jump. +/// Returns whether folding was performed (which invalidates the CFG). +fn try_fold_redundant_jump( + func: &mut Function, + cfg: &mut ControlFlowGraph, + block: Block, + first_inst: Inst, +) -> bool { + let first_dest = match func.dfg[first_inst].branch_destination() { + Some(block) => block, // The instruction was a single-target branch. + None => { + return false; // The instruction was either multi-target or not a branch. + } + }; + + // For the moment, only attempt to fold a branch to an block that is parameterless. + // These blocks are mainly produced by critical edge splitting. + // + // TODO: Allow folding blocks that define SSA values and function as phi nodes. + if func.dfg.num_block_params(first_dest) != 0 { + return false; + } + + // Look at the first instruction of the first branch's destination. + // If it is an unconditional branch, maybe the second jump can be bypassed. + let second_inst = func.layout.first_inst(first_dest).expect("Instructions"); + if func.dfg[second_inst].opcode() != Opcode::Jump { + return false; + } + + // Now we need to fix up first_inst's block parameters to match second_inst's, + // without changing the branch-specific arguments. + // + // The intermediary block is allowed to reference any SSA value that dominates it, + // but that SSA value may not necessarily also dominate the instruction that's + // being patched. + + // Get the arguments and parameters passed by the first branch. + let num_fixed = func.dfg[first_inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let (first_args, first_params) = func.dfg[first_inst] + .arguments(&func.dfg.value_lists) + .split_at(num_fixed); + + // Get the parameters passed by the second jump. + let num_fixed = func.dfg[second_inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let (_, second_params) = func.dfg[second_inst] + .arguments(&func.dfg.value_lists) + .split_at(num_fixed); + let mut second_params = second_params.to_vec(); // Clone for rewriting below. + + // For each parameter passed by the second jump, if any of those parameters + // was a block parameter, rewrite it to refer to the value that the first jump + // passed in its parameters. Otherwise, make sure it dominates first_inst. + // + // For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`, + // we want to rewrite the original jump to `jump block2(v1)`. + let block_params: &[Value] = func.dfg.block_params(first_dest); + debug_assert!(block_params.len() == first_params.len()); + + for value in second_params.iter_mut() { + if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) { + // This value was the Nth parameter passed to the second_inst's block. + // Rewrite it as the Nth parameter passed by first_inst. + *value = first_params[n]; + } + } + + // Build a value list of first_args (unchanged) followed by second_params (rewritten). + let arguments_vec: alloc::vec::Vec<_> = first_args + .iter() + .chain(second_params.iter()) + .copied() + .collect(); + let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists); + + func.dfg[first_inst].take_value_list(); // Drop the current list. + func.dfg[first_inst].put_value_list(value_list); // Put the new list. + + // Bypass the second jump. + // This can disconnect the Block containing `second_inst`, to be cleaned up later. + let second_dest = func.dfg[second_inst].branch_destination().expect("Dest"); + func.change_branch_destination(first_inst, second_dest); + cfg.recompute_block(func, block); + + // The previously-intermediary Block may now be unreachable. Update CFG. + if cfg.pred_iter(first_dest).count() == 0 { + // Remove all instructions from that block. + while let Some(inst) = func.layout.first_inst(first_dest) { + func.layout.remove_inst(inst); + } + + // Remove the block... + cfg.recompute_block(func, first_dest); // ...from predecessor lists. + func.layout.remove_block(first_dest); // ...from the layout. + } + + true +} + +/// Redirects `jump` instructions that point to other `jump` instructions to the final destination. +/// This transformation may orphan some blocks. +fn fold_redundant_jumps( + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, +) { + let mut folded = false; + + // Postorder iteration guarantees that a chain of jumps is visited from + // the end of the chain to the start of the chain. + for &block in domtree.cfg_postorder() { + // Only proceed if the first terminator instruction is a single-target branch. + let first_inst = func + .layout + .last_inst(block) + .expect("Block has no terminator"); + folded |= try_fold_redundant_jump(func, cfg, block, first_inst); + + // Also try the previous instruction. + if let Some(prev_inst) = func.layout.prev_inst(first_inst) { + folded |= try_fold_redundant_jump(func, cfg, block, prev_inst); + } + } + + // Folding jumps invalidates the dominator tree. + if folded { + domtree.compute(func, cfg); + } +} + +/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any +/// existing `fallthrough` instructions are correct. +fn fallthroughs(func: &mut Function) { + for (block, succ) in func.layout.blocks().adjacent_pairs() { + let term = func + .layout + .last_inst(block) + .expect("block has no terminator."); + if let InstructionData::Jump { + ref mut opcode, + destination, + .. + } = func.dfg[term] + { + match *opcode { + Opcode::Fallthrough => { + // Somebody used a fall-through instruction before the branch relaxation pass. + // Make sure it is correct, i.e. the destination is the layout successor. + debug_assert_eq!(destination, succ, "Illegal fall-through in {}", block) + } + Opcode::Jump => { + // If this is a jump to the successor block, change it to a fall-through. + if destination == succ { + *opcode = Opcode::Fallthrough; + func.encodings[term] = Default::default(); + } + } + _ => {} + } + } + } +} + +/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`. +/// +/// Return the size of the replacement instructions up to and including the location where `cur` is +/// left. +fn relax_branch( + cur: &mut FuncCursor, + divert: &RegDiversions, + offset: CodeOffset, + dest_offset: CodeOffset, + encinfo: &EncInfo, + isa: &dyn TargetIsa, +) -> CodeOffset { + let inst = cur.current_inst().unwrap(); + debug!( + "Relaxing [{}] {} for {:#x}-{:#x} range", + encinfo.display(cur.func.encodings[inst]), + cur.func.dfg.display_inst(inst, isa), + offset, + dest_offset + ); + + // Pick the smallest encoding that can handle the branch range. + let dfg = &cur.func.dfg; + let ctrl_type = dfg.ctrl_typevar(inst); + if let Some(enc) = isa + .legal_encodings(cur.func, &dfg[inst], ctrl_type) + .filter(|&enc| { + let range = encinfo.branch_range(enc).expect("Branch with no range"); + if !range.contains(offset, dest_offset) { + debug!(" trying [{}]: out of range", encinfo.display(enc)); + false + } else if encinfo.operand_constraints(enc) + != encinfo.operand_constraints(cur.func.encodings[inst]) + { + // Conservatively give up if the encoding has different constraints + // than the original, so that we don't risk picking a new encoding + // which the existing operands don't satisfy. We can't check for + // validity directly because we don't have a RegDiversions active so + // we don't know which registers are actually in use. + debug!(" trying [{}]: constraints differ", encinfo.display(enc)); + false + } else { + debug!(" trying [{}]: OK", encinfo.display(enc)); + true + } + }) + .min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func)) + { + debug_assert!(enc != cur.func.encodings[inst]); + cur.func.encodings[inst] = enc; + return encinfo.byte_size(enc, inst, &divert, &cur.func); + } + + // Note: On some RISC ISAs, conditional branches have shorter range than unconditional + // branches, so one way of extending the range of a conditional branch is to invert its + // condition and make it branch over an unconditional jump which has the larger range. + // + // Splitting the block is problematic this late because there may be register diversions in + // effect across the conditional branch, and they can't survive the control flow edge to a new + // block. We have two options for handling that: + // + // 1. Set a flag on the new block that indicates it wants the preserve the register diversions of + // its layout predecessor, or + // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block. + // + // It seems that 1. would allow us to share code among RISC ISAs that need this. + // + // We can't allow register diversions to survive from the layout predecessor because the layout + // predecessor could contain kill points for some values that are live in this block, and + // diversions are not automatically cancelled when the live range of a value ends. + + // This assumes solution 2. above: + panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset); +} diff --git a/cranelift/codegen/src/binemit/shrink.rs b/cranelift/codegen/src/binemit/shrink.rs new file mode 100644 index 0000000000..f6fa43e062 --- /dev/null +++ b/cranelift/codegen/src/binemit/shrink.rs @@ -0,0 +1,73 @@ +//! Instruction shrinking. +//! +//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially +//! chooses the largest one, because this typically provides the register allocator the most +//! flexibility. However, once register allocation is done, this is no longer important, and we +//! can switch to smaller encodings when possible. + +use crate::ir::instructions::InstructionData; +use crate::ir::Function; +use crate::isa::TargetIsa; +use crate::regalloc::RegDiversions; +use crate::timing; +use log::debug; + +/// Pick the smallest valid encodings for instructions. +pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) { + let _tt = timing::shrink_instructions(); + + let encinfo = isa.encoding_info(); + let mut divert = RegDiversions::new(); + + for block in func.layout.blocks() { + // Load diversions from predecessors. + divert.at_block(&func.entry_diversions, block); + + for inst in func.layout.block_insts(block) { + let enc = func.encodings[inst]; + if enc.is_legal() { + // regmove/regfill/regspill are special instructions with register immediates + // that represented as normal operands, so the normal predicates below don't + // handle them correctly. + // + // Also, they need to be presented to the `RegDiversions` to update the + // location tracking. + // + // TODO: Eventually, we want the register allocator to avoid leaving these special + // instructions behind, but for now, just temporarily avoid trying to shrink them. + let inst_data = &func.dfg[inst]; + match inst_data { + InstructionData::RegMove { .. } + | InstructionData::RegFill { .. } + | InstructionData::RegSpill { .. } => { + divert.apply(inst_data); + continue; + } + _ => (), + } + + let ctrl_type = func.dfg.ctrl_typevar(inst); + + // Pick the last encoding with constraints that are satisfied. + let best_enc = isa + .legal_encodings(func, &func.dfg[inst], ctrl_type) + .filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func)) + .min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func)) + .unwrap(); + + if best_enc != enc { + func.encodings[inst] = best_enc; + + debug!( + "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}", + encinfo.display(enc), + encinfo.display(best_enc), + func.dfg.display_inst(inst, isa), + encinfo.byte_size(enc, inst, &divert, &func), + encinfo.byte_size(best_enc, inst, &divert, &func) + ); + } + } + } + } +} diff --git a/cranelift/codegen/src/binemit/stackmap.rs b/cranelift/codegen/src/binemit/stackmap.rs new file mode 100644 index 0000000000..10ae96a7cb --- /dev/null +++ b/cranelift/codegen/src/binemit/stackmap.rs @@ -0,0 +1,150 @@ +use crate::bitset::BitSet; +use crate::ir; +use crate::isa::TargetIsa; +use alloc::vec::Vec; + +type Num = u32; +const NUM_BITS: usize = core::mem::size_of::() * 8; + +/// A stack map is a bitmap with one bit per machine word on the stack. Stack +/// maps are created at `safepoint` instructions and record all live reference +/// values that are on the stack. All slot kinds, except `OutgoingArg` are +/// captured in a stack map. The `OutgoingArg`'s will be captured in the callee +/// function as `IncomingArg`'s. +/// +/// The first value in the bitmap is of the lowest addressed slot on the stack. +/// As all stacks in Isa's supported by Cranelift grow down, this means that +/// first value is of the top of the stack and values proceed down the stack. +#[derive(Clone, Debug)] +pub struct Stackmap { + bitmap: Vec>, + mapped_words: u32, +} + +impl Stackmap { + /// Create a stackmap based on where references are located on a function's stack. + pub fn from_values( + args: &[ir::entities::Value], + func: &ir::Function, + isa: &dyn TargetIsa, + ) -> Self { + let loc = &func.locations; + let mut live_ref_in_stack_slot = crate::HashSet::new(); + // References can be in registers, and live registers values are pushed onto the stack before calls and traps. + // TODO: Implement register maps. If a register containing a reference is spilled and reused after a safepoint, + // it could contain a stale reference value if the garbage collector relocated the value. + for val in args { + if let Some(value_loc) = loc.get(*val) { + match *value_loc { + ir::ValueLoc::Stack(stack_slot) => { + live_ref_in_stack_slot.insert(stack_slot); + } + _ => {} + } + } + } + + let stack = &func.stack_slots; + let info = func.stack_slots.layout_info.unwrap(); + + // Refer to the doc comment for `Stackmap` above to understand the + // bitmap representation used here. + let map_size = (info.frame_size + info.inbound_args_size) as usize; + let word_size = isa.pointer_bytes() as usize; + let num_words = map_size / word_size; + + let mut vec = alloc::vec::Vec::with_capacity(num_words); + vec.resize(num_words, false); + + for (ss, ssd) in stack.iter() { + if !live_ref_in_stack_slot.contains(&ss) + || ssd.kind == ir::stackslot::StackSlotKind::OutgoingArg + { + continue; + } + + debug_assert!(ssd.size as usize == word_size); + let bytes_from_bottom = info.frame_size as i32 + ssd.offset.unwrap(); + let words_from_bottom = (bytes_from_bottom as usize) / word_size; + vec[words_from_bottom] = true; + } + + Self::from_slice(&vec) + } + + /// Create a vec of Bitsets from a slice of bools. + pub fn from_slice(vec: &[bool]) -> Self { + let len = vec.len(); + let num_word = len / NUM_BITS + (len % NUM_BITS != 0) as usize; + let mut bitmap = Vec::with_capacity(num_word); + + for segment in vec.chunks(NUM_BITS) { + let mut curr_word = 0; + for (i, set) in segment.iter().enumerate() { + if *set { + curr_word |= 1 << i; + } + } + bitmap.push(BitSet(curr_word)); + } + Self { + mapped_words: len as u32, + bitmap, + } + } + + /// Returns a specified bit. + pub fn get_bit(&self, bit_index: usize) -> bool { + assert!(bit_index < NUM_BITS * self.bitmap.len()); + let word_index = bit_index / NUM_BITS; + let word_offset = (bit_index % NUM_BITS) as u8; + self.bitmap[word_index].contains(word_offset) + } + + /// Returns the raw bitmap that represents this stack map. + pub fn as_slice(&self) -> &[BitSet] { + &self.bitmap + } + + /// Returns the number of words represented by this stack map. + pub fn mapped_words(&self) -> u32 { + self.mapped_words + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn stackmaps() { + let vec: Vec = Vec::new(); + assert!(Stackmap::from_slice(&vec).bitmap.is_empty()); + + let mut vec: [bool; NUM_BITS] = Default::default(); + let set_true_idx = [5, 7, 24, 31]; + + for &idx in &set_true_idx { + vec[idx] = true; + } + + let mut vec = vec.to_vec(); + assert_eq!( + vec![BitSet::(2164261024)], + Stackmap::from_slice(&vec).bitmap + ); + + vec.push(false); + vec.push(true); + let res = Stackmap::from_slice(&vec); + assert_eq!( + vec![BitSet::(2164261024), BitSet::(2)], + res.bitmap + ); + + assert!(res.get_bit(5)); + assert!(res.get_bit(31)); + assert!(res.get_bit(33)); + assert!(!res.get_bit(1)); + } +} diff --git a/cranelift/codegen/src/bitset.rs b/cranelift/codegen/src/bitset.rs new file mode 100644 index 0000000000..8035d80b96 --- /dev/null +++ b/cranelift/codegen/src/bitset.rs @@ -0,0 +1,161 @@ +//! Small Bitset +//! +//! This module defines a struct `BitSet` encapsulating a bitset built over the type T. +//! T is intended to be a primitive unsigned type. Currently it can be any type between u8 and u32 +//! +//! If you would like to add support for larger bitsets in the future, you need to change the trait +//! bound Into and the u32 in the implementation of `max_bits()`. +use core::convert::{From, Into}; +use core::mem::size_of; +use core::ops::{Add, BitOr, Shl, Sub}; + +/// A small bitset built on a single primitive integer type +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct BitSet(pub T); + +impl BitSet +where + T: Into + + From + + BitOr + + Shl + + Sub + + Add + + PartialEq + + Copy, +{ + /// Maximum number of bits supported by this BitSet instance + pub fn bits() -> usize { + size_of::() * 8 + } + + /// Maximum number of bits supported by any bitset instance atm. + pub fn max_bits() -> usize { + size_of::() * 8 + } + + /// Check if this BitSet contains the number num + pub fn contains(&self, num: u8) -> bool { + debug_assert!((num as usize) < Self::bits()); + debug_assert!((num as usize) < Self::max_bits()); + self.0.into() & (1 << num) != 0 + } + + /// Return the smallest number contained in the bitset or None if empty + pub fn min(&self) -> Option { + if self.0.into() == 0 { + None + } else { + Some(self.0.into().trailing_zeros() as u8) + } + } + + /// Return the largest number contained in the bitset or None if empty + pub fn max(&self) -> Option { + if self.0.into() == 0 { + None + } else { + let leading_zeroes = self.0.into().leading_zeros() as usize; + Some((Self::max_bits() - leading_zeroes - 1) as u8) + } + } + + /// Construct a BitSet with the half-open range [lo,hi) filled in + pub fn from_range(lo: u8, hi: u8) -> Self { + debug_assert!(lo <= hi); + debug_assert!((hi as usize) <= Self::bits()); + let one: T = T::from(1); + // I can't just do (one << hi) - one here as the shift may overflow + let hi_rng = if hi >= 1 { + (one << (hi - 1)) + ((one << (hi - 1)) - one) + } else { + T::from(0) + }; + + let lo_rng = (one << lo) - one; + + Self(hi_rng - lo_rng) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn contains() { + let s = BitSet::(255); + for i in 0..7 { + assert!(s.contains(i)); + } + + let s1 = BitSet::(0); + for i in 0..7 { + assert!(!s1.contains(i)); + } + + let s2 = BitSet::(127); + for i in 0..6 { + assert!(s2.contains(i)); + } + assert!(!s2.contains(7)); + + let s3 = BitSet::(2 | 4 | 64); + assert!(!s3.contains(0) && !s3.contains(3) && !s3.contains(4)); + assert!(!s3.contains(5) && !s3.contains(7)); + assert!(s3.contains(1) && s3.contains(2) && s3.contains(6)); + + let s4 = BitSet::(4 | 8 | 256 | 1024); + assert!( + !s4.contains(0) + && !s4.contains(1) + && !s4.contains(4) + && !s4.contains(5) + && !s4.contains(6) + && !s4.contains(7) + && !s4.contains(9) + && !s4.contains(11) + ); + assert!(s4.contains(2) && s4.contains(3) && s4.contains(8) && s4.contains(10)); + } + + #[test] + fn minmax() { + let s = BitSet::(255); + assert_eq!(s.min(), Some(0)); + assert_eq!(s.max(), Some(7)); + assert!(s.min() == Some(0) && s.max() == Some(7)); + let s1 = BitSet::(0); + assert!(s1.min() == None && s1.max() == None); + let s2 = BitSet::(127); + assert!(s2.min() == Some(0) && s2.max() == Some(6)); + let s3 = BitSet::(2 | 4 | 64); + assert!(s3.min() == Some(1) && s3.max() == Some(6)); + let s4 = BitSet::(4 | 8 | 256 | 1024); + assert!(s4.min() == Some(2) && s4.max() == Some(10)); + } + + #[test] + fn from_range() { + let s = BitSet::::from_range(5, 5); + assert!(s.0 == 0); + + let s = BitSet::::from_range(0, 8); + assert!(s.0 == 255); + + let s = BitSet::::from_range(0, 8); + assert!(s.0 == 255u16); + + let s = BitSet::::from_range(0, 16); + assert!(s.0 == 65535u16); + + let s = BitSet::::from_range(5, 6); + assert!(s.0 == 32u8); + + let s = BitSet::::from_range(3, 7); + assert!(s.0 == 8 | 16 | 32 | 64); + + let s = BitSet::::from_range(5, 11); + assert!(s.0 == 32 | 64 | 128 | 256 | 512 | 1024); + } +} diff --git a/cranelift/codegen/src/cfg_printer.rs b/cranelift/codegen/src/cfg_printer.rs new file mode 100644 index 0000000000..364b2985fe --- /dev/null +++ b/cranelift/codegen/src/cfg_printer.rs @@ -0,0 +1,83 @@ +//! The `CFGPrinter` utility. + +use alloc::vec::Vec; +use core::fmt::{Display, Formatter, Result, Write}; + +use crate::entity::SecondaryMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::Function; +use crate::write::{FuncWriter, PlainWriter}; + +/// A utility for pretty-printing the CFG of a `Function`. +pub struct CFGPrinter<'a> { + func: &'a Function, + cfg: ControlFlowGraph, +} + +/// A utility for pretty-printing the CFG of a `Function`. +impl<'a> CFGPrinter<'a> { + /// Create a new CFGPrinter. + pub fn new(func: &'a Function) -> Self { + Self { + func, + cfg: ControlFlowGraph::with_function(func), + } + } + + /// Write the CFG for this function to `w`. + pub fn write(&self, w: &mut dyn Write) -> Result { + self.header(w)?; + self.block_nodes(w)?; + self.cfg_connections(w)?; + writeln!(w, "}}") + } + + fn header(&self, w: &mut dyn Write) -> Result { + writeln!(w, "digraph \"{}\" {{", self.func.name)?; + if let Some(entry) = self.func.layout.entry_block() { + writeln!(w, " {{rank=min; {}}}", entry)?; + } + Ok(()) + } + + fn block_nodes(&self, w: &mut dyn Write) -> Result { + let mut aliases = SecondaryMap::<_, Vec<_>>::new(); + for v in self.func.dfg.values() { + // VADFS returns the immediate target of an alias + if let Some(k) = self.func.dfg.value_alias_dest_for_serialization(v) { + aliases[k].push(v); + } + } + + for block in &self.func.layout { + write!(w, " {} [shape=record, label=\"{{", block)?; + crate::write::write_block_header(w, self.func, None, block, 4)?; + // Add all outgoing branch instructions to the label. + for inst in self.func.layout.block_insts(block) { + write!(w, " | <{}>", inst)?; + PlainWriter.write_instruction(w, self.func, &aliases, None, inst, 0)?; + } + writeln!(w, "}}\"]")? + } + Ok(()) + } + + fn cfg_connections(&self, w: &mut dyn Write) -> Result { + for block in &self.func.layout { + for BlockPredecessor { + block: parent, + inst, + } in self.cfg.pred_iter(block) + { + writeln!(w, " {}:{} -> {}", parent, inst, block)?; + } + } + Ok(()) + } +} + +impl<'a> Display for CFGPrinter<'a> { + fn fmt(&self, f: &mut Formatter) -> Result { + self.write(f) + } +} diff --git a/cranelift/codegen/src/constant_hash.rs b/cranelift/codegen/src/constant_hash.rs new file mode 100644 index 0000000000..1de2a2edb4 --- /dev/null +++ b/cranelift/codegen/src/constant_hash.rs @@ -0,0 +1,62 @@ +//! Runtime support for precomputed constant hash tables. +//! +//! The shared module with the same name can generate constant hash tables using open addressing +//! and quadratic probing. +//! +//! The hash tables are arrays that are guaranteed to: +//! +//! - Have a power-of-two size. +//! - Contain at least one empty slot. +//! +//! This module provides runtime support for lookups in these tables. + +// Re-export entities from constant_hash for simplicity of use. +pub use cranelift_codegen_shared::constant_hash::*; + +/// Trait that must be implemented by the entries in a constant hash table. +pub trait Table { + /// Get the number of entries in this table which must be a power of two. + fn len(&self) -> usize; + + /// Get the key corresponding to the entry at `idx`, or `None` if the entry is empty. + /// The `idx` must be in range. + fn key(&self, idx: usize) -> Option; +} + +/// Look for `key` in `table`. +/// +/// The provided `hash` value must have been computed from `key` using the same hash function that +/// was used to construct the table. +/// +/// Returns `Ok(idx)` with the table index containing the found entry, or `Err(idx)` with the empty +/// sentinel entry if no entry could be found. +pub fn probe + ?Sized>( + table: &T, + key: K, + hash: usize, +) -> Result { + debug_assert!(table.len().is_power_of_two()); + let mask = table.len() - 1; + + let mut idx = hash; + let mut step = 0; + + loop { + idx &= mask; + + match table.key(idx) { + None => return Err(idx), + Some(k) if k == key => return Ok(idx), + _ => {} + } + + // Quadratic probing. + step += 1; + + // When `table.len()` is a power of two, it can be proven that `idx` will visit all + // entries. This means that this loop will always terminate if the hash table has even + // one unused entry. + debug_assert!(step < table.len()); + idx += step; + } +} diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs new file mode 100644 index 0000000000..ca70293c05 --- /dev/null +++ b/cranelift/codegen/src/context.rs @@ -0,0 +1,393 @@ +//! Cranelift compilation context and main entry point. +//! +//! When compiling many small functions, it is important to avoid repeatedly allocating and +//! deallocating the data structures needed for compilation. The `Context` struct is used to hold +//! on to memory allocations between function compilations. +//! +//! The context does not hold a `TargetIsa` instance which has to be provided as an argument +//! instead. This is because an ISA instance is immutable and can be used by multiple compilation +//! contexts concurrently. Typically, you would have one context per compilation thread and only a +//! single ISA instance. + +use crate::binemit::{ + relax_branches, shrink_instructions, CodeInfo, FrameUnwindKind, FrameUnwindSink, + MemoryCodeSink, RelocSink, StackmapSink, TrapSink, +}; +use crate::dce::do_dce; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::Function; +use crate::isa::TargetIsa; +use crate::legalize_function; +use crate::licm::do_licm; +use crate::loop_analysis::LoopAnalysis; +use crate::nan_canonicalization::do_nan_canonicalization; +use crate::postopt::do_postopt; +use crate::redundant_reload_remover::RedundantReloadRemover; +use crate::regalloc; +use crate::result::CodegenResult; +use crate::settings::{FlagsOrIsa, OptLevel}; +use crate::simple_gvn::do_simple_gvn; +use crate::simple_preopt::do_preopt; +use crate::timing; +use crate::unreachable_code::eliminate_unreachable_code; +use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges}; +use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult}; +use alloc::vec::Vec; +use log::debug; + +/// Persistent data structures and compilation pipeline. +pub struct Context { + /// The function we're compiling. + pub func: Function, + + /// The control flow graph of `func`. + pub cfg: ControlFlowGraph, + + /// Dominator tree for `func`. + pub domtree: DominatorTree, + + /// Register allocation context. + pub regalloc: regalloc::Context, + + /// Loop analysis of `func`. + pub loop_analysis: LoopAnalysis, + + /// Redundant-reload remover context. + pub redundant_reload_remover: RedundantReloadRemover, +} + +impl Context { + /// Allocate a new compilation context. + /// + /// The returned instance should be reused for compiling multiple functions in order to avoid + /// needless allocator thrashing. + pub fn new() -> Self { + Self::for_function(Function::new()) + } + + /// Allocate a new compilation context with an existing Function. + /// + /// The returned instance should be reused for compiling multiple functions in order to avoid + /// needless allocator thrashing. + pub fn for_function(func: Function) -> Self { + Self { + func, + cfg: ControlFlowGraph::new(), + domtree: DominatorTree::new(), + regalloc: regalloc::Context::new(), + loop_analysis: LoopAnalysis::new(), + redundant_reload_remover: RedundantReloadRemover::new(), + } + } + + /// Clear all data structures in this context. + pub fn clear(&mut self) { + self.func.clear(); + self.cfg.clear(); + self.domtree.clear(); + self.regalloc.clear(); + self.loop_analysis.clear(); + self.redundant_reload_remover.clear(); + } + + /// Compile the function, and emit machine code into a `Vec`. + /// + /// Run the function through all the passes necessary to generate code for the target ISA + /// represented by `isa`, as well as the final step of emitting machine code into a + /// `Vec`. The machine code is not relocated. Instead, any relocations are emitted + /// into `relocs`. + /// + /// This function calls `compile` and `emit_to_memory`, taking care to resize `mem` as + /// needed, so it provides a safe interface. + /// + /// Returns information about the function's code and read-only data. + pub fn compile_and_emit( + &mut self, + isa: &dyn TargetIsa, + mem: &mut Vec, + relocs: &mut dyn RelocSink, + traps: &mut dyn TrapSink, + stackmaps: &mut dyn StackmapSink, + ) -> CodegenResult { + let info = self.compile(isa)?; + let old_len = mem.len(); + mem.resize(old_len + info.total_size as usize, 0); + let new_info = unsafe { + self.emit_to_memory(isa, mem.as_mut_ptr().add(old_len), relocs, traps, stackmaps) + }; + debug_assert!(new_info == info); + Ok(info) + } + + /// Compile the function. + /// + /// Run the function through all the passes necessary to generate code for the target ISA + /// represented by `isa`. This does not include the final step of emitting machine code into a + /// code sink. + /// + /// Returns information about the function's code and read-only data. + pub fn compile(&mut self, isa: &dyn TargetIsa) -> CodegenResult { + let _tt = timing::compile(); + self.verify_if(isa)?; + debug!("Compiling:\n{}", self.func.display(isa)); + + let opt_level = isa.flags().opt_level(); + + self.compute_cfg(); + if opt_level != OptLevel::None { + self.preopt(isa)?; + } + if isa.flags().enable_nan_canonicalization() { + self.canonicalize_nans(isa)?; + } + self.legalize(isa)?; + if opt_level != OptLevel::None { + self.postopt(isa)?; + self.compute_domtree(); + self.compute_loop_analysis(); + self.licm(isa)?; + self.simple_gvn(isa)?; + } + self.compute_domtree(); + self.eliminate_unreachable_code(isa)?; + if opt_level != OptLevel::None { + self.dce(isa)?; + } + self.regalloc(isa)?; + self.prologue_epilogue(isa)?; + if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize { + self.redundant_reload_remover(isa)?; + } + if opt_level == OptLevel::SpeedAndSize { + self.shrink_instructions(isa)?; + } + let result = self.relax_branches(isa); + + debug!("Compiled:\n{}", self.func.display(isa)); + result + } + + /// Emit machine code directly into raw memory. + /// + /// Write all of the function's machine code to the memory at `mem`. The size of the machine + /// code is returned by `compile` above. + /// + /// The machine code is not relocated. Instead, any relocations are emitted into `relocs`. + /// + /// # Safety + /// + /// This function is unsafe since it does not perform bounds checking on the memory buffer, + /// and it can't guarantee that the `mem` pointer is valid. + /// + /// Returns information about the emitted code and data. + pub unsafe fn emit_to_memory( + &self, + isa: &dyn TargetIsa, + mem: *mut u8, + relocs: &mut dyn RelocSink, + traps: &mut dyn TrapSink, + stackmaps: &mut dyn StackmapSink, + ) -> CodeInfo { + let _tt = timing::binemit(); + let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps); + isa.emit_function_to_memory(&self.func, &mut sink); + sink.info + } + + /// Emit unwind information. + /// + /// Requires that the function layout be calculated (see `relax_branches`). + /// + /// Only some calling conventions (e.g. Windows fastcall) will have unwind information. + /// This is a no-op if the function has no unwind information. + pub fn emit_unwind_info( + &self, + isa: &dyn TargetIsa, + kind: FrameUnwindKind, + sink: &mut dyn FrameUnwindSink, + ) { + isa.emit_unwind_info(&self.func, kind, sink); + } + + /// Run the verifier on the function. + /// + /// Also check that the dominator tree and control flow graph are consistent with the function. + pub fn verify<'a, FOI: Into>>(&self, fisa: FOI) -> VerifierResult<()> { + let mut errors = VerifierErrors::default(); + let _ = verify_context(&self.func, &self.cfg, &self.domtree, fisa, &mut errors); + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Run the verifier only if the `enable_verifier` setting is true. + pub fn verify_if<'a, FOI: Into>>(&self, fisa: FOI) -> CodegenResult<()> { + let fisa = fisa.into(); + if fisa.flags.enable_verifier() { + self.verify(fisa)?; + } + Ok(()) + } + + /// Run the locations verifier on the function. + pub fn verify_locations(&self, isa: &dyn TargetIsa) -> VerifierResult<()> { + let mut errors = VerifierErrors::default(); + let _ = verify_locations(isa, &self.func, &self.cfg, None, &mut errors); + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } + + /// Run the locations verifier only if the `enable_verifier` setting is true. + pub fn verify_locations_if(&self, isa: &dyn TargetIsa) -> CodegenResult<()> { + if isa.flags().enable_verifier() { + self.verify_locations(isa)?; + } + Ok(()) + } + + /// Perform dead-code elimination on the function. + pub fn dce<'a, FOI: Into>>(&mut self, fisa: FOI) -> CodegenResult<()> { + do_dce(&mut self.func, &mut self.domtree); + self.verify_if(fisa)?; + Ok(()) + } + + /// Perform pre-legalization rewrites on the function. + pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + do_preopt(&mut self.func, &mut self.cfg, isa); + self.verify_if(isa)?; + Ok(()) + } + + /// Perform NaN canonicalizing rewrites on the function. + pub fn canonicalize_nans(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + do_nan_canonicalization(&mut self.func); + self.verify_if(isa) + } + + /// Run the legalizer for `isa` on the function. + pub fn legalize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + // Legalization invalidates the domtree and loop_analysis by mutating the CFG. + // TODO: Avoid doing this when legalization doesn't actually mutate the CFG. + self.domtree.clear(); + self.loop_analysis.clear(); + legalize_function(&mut self.func, &mut self.cfg, isa); + debug!("Legalized:\n{}", self.func.display(isa)); + self.verify_if(isa) + } + + /// Perform post-legalization rewrites on the function. + pub fn postopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + do_postopt(&mut self.func, isa); + self.verify_if(isa)?; + Ok(()) + } + + /// Compute the control flow graph. + pub fn compute_cfg(&mut self) { + self.cfg.compute(&self.func) + } + + /// Compute dominator tree. + pub fn compute_domtree(&mut self) { + self.domtree.compute(&self.func, &self.cfg) + } + + /// Compute the loop analysis. + pub fn compute_loop_analysis(&mut self) { + self.loop_analysis + .compute(&self.func, &self.cfg, &self.domtree) + } + + /// Compute the control flow graph and dominator tree. + pub fn flowgraph(&mut self) { + self.compute_cfg(); + self.compute_domtree() + } + + /// Perform simple GVN on the function. + pub fn simple_gvn<'a, FOI: Into>>(&mut self, fisa: FOI) -> CodegenResult<()> { + do_simple_gvn(&mut self.func, &mut self.domtree); + self.verify_if(fisa) + } + + /// Perform LICM on the function. + pub fn licm(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + do_licm( + isa, + &mut self.func, + &mut self.cfg, + &mut self.domtree, + &mut self.loop_analysis, + ); + self.verify_if(isa) + } + + /// Perform unreachable code elimination. + pub fn eliminate_unreachable_code<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()> + where + FOI: Into>, + { + eliminate_unreachable_code(&mut self.func, &mut self.cfg, &self.domtree); + self.verify_if(fisa) + } + + /// Run the register allocator. + pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + self.regalloc + .run(isa, &mut self.func, &mut self.cfg, &mut self.domtree) + } + + /// Insert prologue and epilogues after computing the stack frame layout. + pub fn prologue_epilogue(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + isa.prologue_epilogue(&mut self.func)?; + self.verify_if(isa)?; + self.verify_locations_if(isa)?; + Ok(()) + } + + /// Do redundant-reload removal after allocation of both registers and stack slots. + pub fn redundant_reload_remover(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + self.redundant_reload_remover + .run(isa, &mut self.func, &self.cfg); + self.verify_if(isa)?; + Ok(()) + } + + /// Run the instruction shrinking pass. + pub fn shrink_instructions(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { + shrink_instructions(&mut self.func, isa); + self.verify_if(isa)?; + self.verify_locations_if(isa)?; + Ok(()) + } + + /// Run the branch relaxation pass and return information about the function's code and + /// read-only data. + pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult { + let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?; + self.verify_if(isa)?; + self.verify_locations_if(isa)?; + Ok(info) + } + + /// Builds ranges and location for specified value labels. + pub fn build_value_labels_ranges( + &self, + isa: &dyn TargetIsa, + ) -> CodegenResult { + Ok(build_value_labels_ranges::( + &self.func, + &self.regalloc, + isa, + )) + } +} diff --git a/cranelift/codegen/src/cursor.rs b/cranelift/codegen/src/cursor.rs new file mode 100644 index 0000000000..51345cde47 --- /dev/null +++ b/cranelift/codegen/src/cursor.rs @@ -0,0 +1,810 @@ +//! Cursor library. +//! +//! This module defines cursor data types that can be used for inserting instructions. + +use crate::ir; +use crate::isa::TargetIsa; + +/// The possible positions of a cursor. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum CursorPosition { + /// Cursor is not pointing anywhere. No instructions can be inserted. + Nowhere, + /// Cursor is pointing at an existing instruction. + /// New instructions will be inserted *before* the current instruction. + At(ir::Inst), + /// Cursor is before the beginning of an block. No instructions can be inserted. Calling + /// `next_inst()` will move to the first instruction in the block. + Before(ir::Block), + /// Cursor is pointing after the end of an block. + /// New instructions will be appended to the block. + After(ir::Block), +} + +/// All cursor types implement the `Cursor` which provides common navigation operations. +pub trait Cursor { + /// Get the current cursor position. + fn position(&self) -> CursorPosition; + + /// Set the current position. + fn set_position(&mut self, pos: CursorPosition); + + /// Get the source location that should be assigned to new instructions. + fn srcloc(&self) -> ir::SourceLoc; + + /// Set the source location that should be assigned to new instructions. + fn set_srcloc(&mut self, srcloc: ir::SourceLoc); + + /// Borrow a reference to the function layout that this cursor is navigating. + fn layout(&self) -> &ir::Layout; + + /// Borrow a mutable reference to the function layout that this cursor is navigating. + fn layout_mut(&mut self) -> &mut ir::Layout; + + /// Exchange this cursor for one with a set source location. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, SourceLoc}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, srcloc: SourceLoc) { + /// let mut pos = FuncCursor::new(func).with_srcloc(srcloc); + /// + /// // Use `pos`... + /// } + /// ``` + fn with_srcloc(mut self, srcloc: ir::SourceLoc) -> Self + where + Self: Sized, + { + self.set_srcloc(srcloc); + self + } + + /// Rebuild this cursor positioned at `pos`. + fn at_position(mut self, pos: CursorPosition) -> Self + where + Self: Sized, + { + self.set_position(pos); + self + } + + /// Rebuild this cursor positioned at `inst`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, inst: Inst) { + /// let mut pos = FuncCursor::new(func).at_inst(inst); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_inst(mut self, inst: ir::Inst) -> Self + where + Self: Sized, + { + self.goto_inst(inst); + self + } + + /// Rebuild this cursor positioned at the first insertion point for `block`. + /// This differs from `at_first_inst` in that it doesn't assume that any + /// instructions have been inserted into `block` yet. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_first_insertion_point(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_first_insertion_point(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_first_insertion_point(block); + self + } + + /// Rebuild this cursor positioned at the first instruction in `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_first_inst(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_first_inst(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_first_inst(block); + self + } + + /// Rebuild this cursor positioned at the last instruction in `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_last_inst(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_last_inst(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_last_inst(block); + self + } + + /// Rebuild this cursor positioned after `inst`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, inst: Inst) { + /// let mut pos = FuncCursor::new(func).after_inst(inst); + /// + /// // Use `pos`... + /// } + /// ``` + fn after_inst(mut self, inst: ir::Inst) -> Self + where + Self: Sized, + { + self.goto_after_inst(inst); + self + } + + /// Rebuild this cursor positioned at the top of `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_top(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_top(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_top(block); + self + } + + /// Rebuild this cursor positioned at the bottom of `block`. + /// + /// This is intended to be used as a builder method: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block, Inst}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function, block: Block) { + /// let mut pos = FuncCursor::new(func).at_bottom(block); + /// + /// // Use `pos`... + /// } + /// ``` + fn at_bottom(mut self, block: ir::Block) -> Self + where + Self: Sized, + { + self.goto_bottom(block); + self + } + + /// Get the block corresponding to the current position. + fn current_block(&self) -> Option { + use self::CursorPosition::*; + match self.position() { + Nowhere => None, + At(inst) => self.layout().inst_block(inst), + Before(block) | After(block) => Some(block), + } + } + + /// Get the instruction corresponding to the current position, if any. + fn current_inst(&self) -> Option { + use self::CursorPosition::*; + match self.position() { + At(inst) => Some(inst), + _ => None, + } + } + + /// Go to the position after a specific instruction, which must be inserted + /// in the layout. New instructions will be inserted after `inst`. + fn goto_after_inst(&mut self, inst: ir::Inst) { + debug_assert!(self.layout().inst_block(inst).is_some()); + let new_pos = if let Some(next) = self.layout().next_inst(inst) { + CursorPosition::At(next) + } else { + CursorPosition::After( + self.layout() + .inst_block(inst) + .expect("current instruction removed?"), + ) + }; + self.set_position(new_pos); + } + + /// Go to a specific instruction which must be inserted in the layout. + /// New instructions will be inserted before `inst`. + fn goto_inst(&mut self, inst: ir::Inst) { + debug_assert!(self.layout().inst_block(inst).is_some()); + self.set_position(CursorPosition::At(inst)); + } + + /// Go to the position for inserting instructions at the beginning of `block`, + /// which unlike `goto_first_inst` doesn't assume that any instructions have + /// been inserted into `block` yet. + fn goto_first_insertion_point(&mut self, block: ir::Block) { + if let Some(inst) = self.layout().first_inst(block) { + self.goto_inst(inst); + } else { + self.goto_bottom(block); + } + } + + /// Go to the first instruction in `block`. + fn goto_first_inst(&mut self, block: ir::Block) { + let inst = self.layout().first_inst(block).expect("Empty block"); + self.goto_inst(inst); + } + + /// Go to the last instruction in `block`. + fn goto_last_inst(&mut self, block: ir::Block) { + let inst = self.layout().last_inst(block).expect("Empty block"); + self.goto_inst(inst); + } + + /// Go to the top of `block` which must be inserted into the layout. + /// At this position, instructions cannot be inserted, but `next_inst()` will move to the first + /// instruction in `block`. + fn goto_top(&mut self, block: ir::Block) { + debug_assert!(self.layout().is_block_inserted(block)); + self.set_position(CursorPosition::Before(block)); + } + + /// Go to the bottom of `block` which must be inserted into the layout. + /// At this position, inserted instructions will be appended to `block`. + fn goto_bottom(&mut self, block: ir::Block) { + debug_assert!(self.layout().is_block_inserted(block)); + self.set_position(CursorPosition::After(block)); + } + + /// Go to the top of the next block in layout order and return it. + /// + /// - If the cursor wasn't pointing at anything, go to the top of the first block in the + /// function. + /// - If there are no more blocks, leave the cursor pointing at nothing and return `None`. + /// + /// # Examples + /// + /// The `next_block()` method is intended for iterating over the blocks in layout order: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function) { + /// let mut cursor = FuncCursor::new(func); + /// while let Some(block) = cursor.next_block() { + /// // Edit block. + /// } + /// } + /// ``` + fn next_block(&mut self) -> Option { + let next = if let Some(block) = self.current_block() { + self.layout().next_block(block) + } else { + self.layout().entry_block() + }; + self.set_position(match next { + Some(block) => CursorPosition::Before(block), + None => CursorPosition::Nowhere, + }); + next + } + + /// Go to the bottom of the previous block in layout order and return it. + /// + /// - If the cursor wasn't pointing at anything, go to the bottom of the last block in the + /// function. + /// - If there are no more blocks, leave the cursor pointing at nothing and return `None`. + /// + /// # Examples + /// + /// The `prev_block()` method is intended for iterating over the blocks in backwards layout order: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function) { + /// let mut cursor = FuncCursor::new(func); + /// while let Some(block) = cursor.prev_block() { + /// // Edit block. + /// } + /// } + /// ``` + fn prev_block(&mut self) -> Option { + let prev = if let Some(block) = self.current_block() { + self.layout().prev_block(block) + } else { + self.layout().last_block() + }; + self.set_position(match prev { + Some(block) => CursorPosition::After(block), + None => CursorPosition::Nowhere, + }); + prev + } + + /// Move to the next instruction in the same block and return it. + /// + /// - If the cursor was positioned before an block, go to the first instruction in that block. + /// - If there are no more instructions in the block, go to the `After(block)` position and return + /// `None`. + /// - If the cursor wasn't pointing anywhere, keep doing that. + /// + /// This method will never move the cursor to a different block. + /// + /// # Examples + /// + /// The `next_inst()` method is intended for iterating over the instructions in an block like + /// this: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_block(func: &mut Function, block: Block) { + /// let mut cursor = FuncCursor::new(func).at_top(block); + /// while let Some(inst) = cursor.next_inst() { + /// // Edit instructions... + /// } + /// } + /// ``` + /// The loop body can insert and remove instructions via the cursor. + /// + /// Iterating over all the instructions in a function looks like this: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_func(func: &mut Function) { + /// let mut cursor = FuncCursor::new(func); + /// while let Some(block) = cursor.next_block() { + /// while let Some(inst) = cursor.next_inst() { + /// // Edit instructions... + /// } + /// } + /// } + /// ``` + fn next_inst(&mut self) -> Option { + use self::CursorPosition::*; + match self.position() { + Nowhere | After(..) => None, + At(inst) => { + if let Some(next) = self.layout().next_inst(inst) { + self.set_position(At(next)); + Some(next) + } else { + let pos = After( + self.layout() + .inst_block(inst) + .expect("current instruction removed?"), + ); + self.set_position(pos); + None + } + } + Before(block) => { + if let Some(next) = self.layout().first_inst(block) { + self.set_position(At(next)); + Some(next) + } else { + self.set_position(After(block)); + None + } + } + } + } + + /// Move to the previous instruction in the same block and return it. + /// + /// - If the cursor was positioned after an block, go to the last instruction in that block. + /// - If there are no more instructions in the block, go to the `Before(block)` position and return + /// `None`. + /// - If the cursor wasn't pointing anywhere, keep doing that. + /// + /// This method will never move the cursor to a different block. + /// + /// # Examples + /// + /// The `prev_inst()` method is intended for iterating backwards over the instructions in an + /// block like this: + /// + /// ``` + /// # use cranelift_codegen::ir::{Function, Block}; + /// # use cranelift_codegen::cursor::{Cursor, FuncCursor}; + /// fn edit_block(func: &mut Function, block: Block) { + /// let mut cursor = FuncCursor::new(func).at_bottom(block); + /// while let Some(inst) = cursor.prev_inst() { + /// // Edit instructions... + /// } + /// } + /// ``` + fn prev_inst(&mut self) -> Option { + use self::CursorPosition::*; + match self.position() { + Nowhere | Before(..) => None, + At(inst) => { + if let Some(prev) = self.layout().prev_inst(inst) { + self.set_position(At(prev)); + Some(prev) + } else { + let pos = Before( + self.layout() + .inst_block(inst) + .expect("current instruction removed?"), + ); + self.set_position(pos); + None + } + } + After(block) => { + if let Some(prev) = self.layout().last_inst(block) { + self.set_position(At(prev)); + Some(prev) + } else { + self.set_position(Before(block)); + None + } + } + } + } + + /// Insert an instruction at the current position. + /// + /// - If pointing at an instruction, the new instruction is inserted before the current + /// instruction. + /// - If pointing at the bottom of an block, the new instruction is appended to the block. + /// - Otherwise panic. + /// + /// In either case, the cursor is not moved, such that repeated calls to `insert_inst()` causes + /// instructions to appear in insertion order in the block. + fn insert_inst(&mut self, inst: ir::Inst) { + use self::CursorPosition::*; + match self.position() { + Nowhere | Before(..) => panic!("Invalid insert_inst position"), + At(cur) => self.layout_mut().insert_inst(inst, cur), + After(block) => self.layout_mut().append_inst(inst, block), + } + } + + /// Remove the instruction under the cursor. + /// + /// The cursor is left pointing at the position following the current instruction. + /// + /// Return the instruction that was removed. + fn remove_inst(&mut self) -> ir::Inst { + let inst = self.current_inst().expect("No instruction to remove"); + self.next_inst(); + self.layout_mut().remove_inst(inst); + inst + } + + /// Remove the instruction under the cursor. + /// + /// The cursor is left pointing at the position preceding the current instruction. + /// + /// Return the instruction that was removed. + fn remove_inst_and_step_back(&mut self) -> ir::Inst { + let inst = self.current_inst().expect("No instruction to remove"); + self.prev_inst(); + self.layout_mut().remove_inst(inst); + inst + } + + /// Insert an block at the current position and switch to it. + /// + /// As far as possible, this method behaves as if the block header were an instruction inserted + /// at the current position. + /// + /// - If the cursor is pointing at an existing instruction, *the current block is split in two* + /// and the current instruction becomes the first instruction in the inserted block. + /// - If the cursor points at the bottom of an block, the new block is inserted after the current + /// one, and moved to the bottom of the new block where instructions can be appended. + /// - If the cursor points to the top of an block, the new block is inserted above the current one. + /// - If the cursor is not pointing at anything, the new block is placed last in the layout. + /// + /// This means that it is always valid to call this method, and it always leaves the cursor in + /// a state that will insert instructions into the new block. + fn insert_block(&mut self, new_block: ir::Block) { + use self::CursorPosition::*; + match self.position() { + At(inst) => { + self.layout_mut().split_block(new_block, inst); + // All other cases move to `After(block)`, but in this case we'll stay `At(inst)`. + return; + } + Nowhere => self.layout_mut().append_block(new_block), + Before(block) => self.layout_mut().insert_block(new_block, block), + After(block) => self.layout_mut().insert_block_after(new_block, block), + } + // For everything but `At(inst)` we end up appending to the new block. + self.set_position(After(new_block)); + } +} + +/// Function cursor. +/// +/// A `FuncCursor` holds a mutable reference to a whole `ir::Function` while keeping a position +/// too. The function can be re-borrowed by accessing the public `cur.func` member. +/// +/// This cursor is for use before legalization. The inserted instructions are not given an +/// encoding. +pub struct FuncCursor<'f> { + pos: CursorPosition, + srcloc: ir::SourceLoc, + + /// The referenced function. + pub func: &'f mut ir::Function, +} + +impl<'f> FuncCursor<'f> { + /// Create a new `FuncCursor` pointing nowhere. + pub fn new(func: &'f mut ir::Function) -> Self { + Self { + pos: CursorPosition::Nowhere, + srcloc: Default::default(), + func, + } + } + + /// Use the source location of `inst` for future instructions. + pub fn use_srcloc(&mut self, inst: ir::Inst) { + self.srcloc = self.func.srclocs[inst]; + } + + /// Create an instruction builder that inserts an instruction at the current position. + pub fn ins(&mut self) -> ir::InsertBuilder<&mut FuncCursor<'f>> { + ir::InsertBuilder::new(self) + } +} + +impl<'f> Cursor for FuncCursor<'f> { + fn position(&self) -> CursorPosition { + self.pos + } + + fn set_position(&mut self, pos: CursorPosition) { + self.pos = pos + } + + fn srcloc(&self) -> ir::SourceLoc { + self.srcloc + } + + fn set_srcloc(&mut self, srcloc: ir::SourceLoc) { + self.srcloc = srcloc; + } + + fn layout(&self) -> &ir::Layout { + &self.func.layout + } + + fn layout_mut(&mut self) -> &mut ir::Layout { + &mut self.func.layout + } +} + +impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> { + fn data_flow_graph(&self) -> &ir::DataFlowGraph { + &self.func.dfg + } + + fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph { + &mut self.func.dfg + } + + fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph { + // TODO: Remove this assertion once #796 is fixed. + #[cfg(debug_assertions)] + { + if let CursorPosition::At(_) = self.position() { + if let Some(curr) = self.current_inst() { + if let Some(prev) = self.layout().prev_inst(curr) { + let prev_op = self.data_flow_graph()[prev].opcode(); + let inst_op = self.data_flow_graph()[inst].opcode(); + let curr_op = self.data_flow_graph()[curr].opcode(); + if prev_op.is_branch() + && !prev_op.is_terminator() + && !inst_op.is_terminator() + { + panic!( + "Inserting instruction {} after {}, and before {}", + inst_op, prev_op, curr_op + ) + } + }; + }; + }; + } + self.insert_inst(inst); + if !self.srcloc.is_default() { + self.func.srclocs[inst] = self.srcloc; + } + &mut self.func.dfg + } +} + +/// Encoding cursor. +/// +/// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding. +/// The cursor holds a mutable reference to the whole function which can be re-borrowed from the +/// public `pos.func` member. +pub struct EncCursor<'f> { + pos: CursorPosition, + srcloc: ir::SourceLoc, + built_inst: Option, + + /// The referenced function. + pub func: &'f mut ir::Function, + + /// The target ISA that will be used to encode instructions. + pub isa: &'f dyn TargetIsa, +} + +impl<'f> EncCursor<'f> { + /// Create a new `EncCursor` pointing nowhere. + pub fn new(func: &'f mut ir::Function, isa: &'f dyn TargetIsa) -> Self { + Self { + pos: CursorPosition::Nowhere, + srcloc: Default::default(), + built_inst: None, + func, + isa, + } + } + + /// Use the source location of `inst` for future instructions. + pub fn use_srcloc(&mut self, inst: ir::Inst) { + self.srcloc = self.func.srclocs[inst]; + } + + /// Create an instruction builder that will insert an encoded instruction at the current + /// position. + /// + /// The builder will panic if it is used to insert an instruction that can't be encoded for + /// `self.isa`. + pub fn ins(&mut self) -> ir::InsertBuilder<&mut EncCursor<'f>> { + ir::InsertBuilder::new(self) + } + + /// Get the last built instruction. + /// + /// This returns the last instruction that was built using the `ins()` method on this cursor. + /// Panics if no instruction was built. + pub fn built_inst(&self) -> ir::Inst { + self.built_inst.expect("No instruction was inserted") + } + + /// Return an object that can display `inst`. + /// + /// This is a convenience wrapper for the DFG equivalent. + pub fn display_inst(&self, inst: ir::Inst) -> ir::dfg::DisplayInst { + self.func.dfg.display_inst(inst, self.isa) + } +} + +impl<'f> Cursor for EncCursor<'f> { + fn position(&self) -> CursorPosition { + self.pos + } + + fn set_position(&mut self, pos: CursorPosition) { + self.pos = pos + } + + fn srcloc(&self) -> ir::SourceLoc { + self.srcloc + } + + fn set_srcloc(&mut self, srcloc: ir::SourceLoc) { + self.srcloc = srcloc; + } + + fn layout(&self) -> &ir::Layout { + &self.func.layout + } + + fn layout_mut(&mut self) -> &mut ir::Layout { + &mut self.func.layout + } +} + +impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> { + fn data_flow_graph(&self) -> &ir::DataFlowGraph { + &self.func.dfg + } + + fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph { + &mut self.func.dfg + } + + fn insert_built_inst( + self, + inst: ir::Inst, + ctrl_typevar: ir::Type, + ) -> &'c mut ir::DataFlowGraph { + // TODO: Remove this assertion once #796 is fixed. + #[cfg(debug_assertions)] + { + if let CursorPosition::At(_) = self.position() { + if let Some(curr) = self.current_inst() { + if let Some(prev) = self.layout().prev_inst(curr) { + let prev_op = self.data_flow_graph()[prev].opcode(); + let inst_op = self.data_flow_graph()[inst].opcode(); + if prev_op.is_branch() + && !prev_op.is_terminator() + && !inst_op.is_terminator() + { + panic!( + "Inserting instruction {} after {} and before {}", + self.display_inst(inst), + self.display_inst(prev), + self.display_inst(curr) + ) + } + }; + }; + }; + } + // Insert the instruction and remember the reference. + self.insert_inst(inst); + self.built_inst = Some(inst); + + if !self.srcloc.is_default() { + self.func.srclocs[inst] = self.srcloc; + } + // Assign an encoding. + // XXX Is there a way to describe this error to the user? + #[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))] + match self + .isa + .encode(&self.func, &self.func.dfg[inst], ctrl_typevar) + { + Ok(e) => self.func.encodings[inst] = e, + Err(_) => panic!("can't encode {}", self.display_inst(inst)), + } + + &mut self.func.dfg + } +} diff --git a/cranelift/codegen/src/dbg.rs b/cranelift/codegen/src/dbg.rs new file mode 100644 index 0000000000..1d814ceedb --- /dev/null +++ b/cranelift/codegen/src/dbg.rs @@ -0,0 +1,28 @@ +//! Debug tracing helpers. +use core::fmt; + +/// Prefix added to the log file names, just before the thread name or id. +pub static LOG_FILENAME_PREFIX: &str = "cranelift.dbg."; + +/// Helper for printing lists. +pub struct DisplayList<'a, T>(pub &'a [T]) +where + T: 'a + fmt::Display; + +impl<'a, T> fmt::Display for DisplayList<'a, T> +where + T: 'a + fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0.split_first() { + None => write!(f, "[]"), + Some((first, rest)) => { + write!(f, "[{}", first)?; + for x in rest { + write!(f, ", {}", x)?; + } + write!(f, "]") + } + } + } +} diff --git a/cranelift/codegen/src/dce.rs b/cranelift/codegen/src/dce.rs new file mode 100644 index 0000000000..b217534c3e --- /dev/null +++ b/cranelift/codegen/src/dce.rs @@ -0,0 +1,69 @@ +//! A Dead-Code Elimination (DCE) pass. +//! +//! Dead code here means instructions that have no side effects and have no +//! result values used by other instructions. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::entity::EntityRef; +use crate::ir::instructions::InstructionData; +use crate::ir::{DataFlowGraph, Function, Inst, Opcode}; +use crate::timing; + +/// Test whether the given opcode is unsafe to even consider for DCE. +fn trivially_unsafe_for_dce(opcode: Opcode) -> bool { + opcode.is_call() + || opcode.is_branch() + || opcode.is_terminator() + || opcode.is_return() + || opcode.can_trap() + || opcode.other_side_effects() + || opcode.can_store() +} + +/// Preserve instructions with used result values. +fn any_inst_results_used(inst: Inst, live: &[bool], dfg: &DataFlowGraph) -> bool { + dfg.inst_results(inst).iter().any(|v| live[v.index()]) +} + +/// Load instructions without the `notrap` flag are defined to trap when +/// operating on inaccessible memory, so we can't DCE them even if the +/// loaded value is unused. +fn is_load_with_defined_trapping(opcode: Opcode, data: &InstructionData) -> bool { + if !opcode.can_load() { + return false; + } + match *data { + InstructionData::StackLoad { .. } => false, + InstructionData::Load { flags, .. } => !flags.notrap(), + _ => true, + } +} + +/// Perform DCE on `func`. +pub fn do_dce(func: &mut Function, domtree: &mut DominatorTree) { + let _tt = timing::dce(); + debug_assert!(domtree.is_valid()); + + let mut live = vec![false; func.dfg.num_values()]; + for &block in domtree.cfg_postorder() { + let mut pos = FuncCursor::new(func).at_bottom(block); + while let Some(inst) = pos.prev_inst() { + { + let data = &pos.func.dfg[inst]; + let opcode = data.opcode(); + if trivially_unsafe_for_dce(opcode) + || is_load_with_defined_trapping(opcode, &data) + || any_inst_results_used(inst, &live, &pos.func.dfg) + { + for arg in pos.func.dfg.inst_args(inst) { + let v = pos.func.dfg.resolve_aliases(*arg); + live[v.index()] = true; + } + continue; + } + } + pos.remove_inst(); + } + } +} diff --git a/cranelift/codegen/src/divconst_magic_numbers.rs b/cranelift/codegen/src/divconst_magic_numbers.rs new file mode 100644 index 0000000000..af45444c40 --- /dev/null +++ b/cranelift/codegen/src/divconst_magic_numbers.rs @@ -0,0 +1,1083 @@ +//! Compute "magic numbers" for division-by-constants transformations. +//! +//! Math helpers for division by (non-power-of-2) constants. This is based +//! on the presentation in "Hacker's Delight" by Henry Warren, 2003. There +//! are four cases: {unsigned, signed} x {32 bit, 64 bit}. The word size +//! makes little difference, but the signed-vs-unsigned aspect has a large +//! effect. Therefore everything is presented in the order U32 U64 S32 S64 +//! so as to emphasise the similarity of the U32 and U64 cases and the S32 +//! and S64 cases. + +// Structures to hold the "magic numbers" computed. + +#[derive(PartialEq, Debug)] +pub struct MU32 { + pub mul_by: u32, + pub do_add: bool, + pub shift_by: i32, +} + +#[derive(PartialEq, Debug)] +pub struct MU64 { + pub mul_by: u64, + pub do_add: bool, + pub shift_by: i32, +} + +#[derive(PartialEq, Debug)] +pub struct MS32 { + pub mul_by: i32, + pub shift_by: i32, +} + +#[derive(PartialEq, Debug)] +pub struct MS64 { + pub mul_by: i64, + pub shift_by: i32, +} + +// The actual "magic number" generators follow. + +pub fn magic_u32(d: u32) -> MU32 { + debug_assert_ne!(d, 0); + debug_assert_ne!(d, 1); // d==1 generates out of range shifts. + + let mut do_add: bool = false; + let mut p: i32 = 31; + let nc: u32 = 0xFFFFFFFFu32 - u32::wrapping_neg(d) % d; + let mut q1: u32 = 0x80000000u32 / nc; + let mut r1: u32 = 0x80000000u32 - q1 * nc; + let mut q2: u32 = 0x7FFFFFFFu32 / d; + let mut r2: u32 = 0x7FFFFFFFu32 - q2 * d; + loop { + p = p + 1; + if r1 >= nc - r1 { + q1 = u32::wrapping_add(u32::wrapping_mul(2, q1), 1); + r1 = u32::wrapping_sub(u32::wrapping_mul(2, r1), nc); + } else { + q1 = u32::wrapping_mul(2, q1); + r1 = 2 * r1; + } + if r2 + 1 >= d - r2 { + if q2 >= 0x7FFFFFFFu32 { + do_add = true; + } + q2 = 2 * q2 + 1; + r2 = u32::wrapping_sub(u32::wrapping_add(u32::wrapping_mul(2, r2), 1), d); + } else { + if q2 >= 0x80000000u32 { + do_add = true; + } + q2 = u32::wrapping_mul(2, q2); + r2 = 2 * r2 + 1; + } + let delta: u32 = d - 1 - r2; + if !(p < 64 && (q1 < delta || (q1 == delta && r1 == 0))) { + break; + } + } + + MU32 { + mul_by: q2 + 1, + do_add, + shift_by: p - 32, + } +} + +pub fn magic_u64(d: u64) -> MU64 { + debug_assert_ne!(d, 0); + debug_assert_ne!(d, 1); // d==1 generates out of range shifts. + + let mut do_add: bool = false; + let mut p: i32 = 63; + let nc: u64 = 0xFFFFFFFFFFFFFFFFu64 - u64::wrapping_neg(d) % d; + let mut q1: u64 = 0x8000000000000000u64 / nc; + let mut r1: u64 = 0x8000000000000000u64 - q1 * nc; + let mut q2: u64 = 0x7FFFFFFFFFFFFFFFu64 / d; + let mut r2: u64 = 0x7FFFFFFFFFFFFFFFu64 - q2 * d; + loop { + p = p + 1; + if r1 >= nc - r1 { + q1 = u64::wrapping_add(u64::wrapping_mul(2, q1), 1); + r1 = u64::wrapping_sub(u64::wrapping_mul(2, r1), nc); + } else { + q1 = u64::wrapping_mul(2, q1); + r1 = 2 * r1; + } + if r2 + 1 >= d - r2 { + if q2 >= 0x7FFFFFFFFFFFFFFFu64 { + do_add = true; + } + q2 = 2 * q2 + 1; + r2 = u64::wrapping_sub(u64::wrapping_add(u64::wrapping_mul(2, r2), 1), d); + } else { + if q2 >= 0x8000000000000000u64 { + do_add = true; + } + q2 = u64::wrapping_mul(2, q2); + r2 = 2 * r2 + 1; + } + let delta: u64 = d - 1 - r2; + if !(p < 128 && (q1 < delta || (q1 == delta && r1 == 0))) { + break; + } + } + + MU64 { + mul_by: q2 + 1, + do_add, + shift_by: p - 64, + } +} + +pub fn magic_s32(d: i32) -> MS32 { + debug_assert_ne!(d, -1); + debug_assert_ne!(d, 0); + debug_assert_ne!(d, 1); + let two31: u32 = 0x80000000u32; + let mut p: i32 = 31; + let ad: u32 = i32::wrapping_abs(d) as u32; + let t: u32 = two31 + ((d as u32) >> 31); + let anc: u32 = u32::wrapping_sub(t - 1, t % ad); + let mut q1: u32 = two31 / anc; + let mut r1: u32 = two31 - q1 * anc; + let mut q2: u32 = two31 / ad; + let mut r2: u32 = two31 - q2 * ad; + loop { + p = p + 1; + q1 = 2 * q1; + r1 = 2 * r1; + if r1 >= anc { + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2 * q2; + r2 = 2 * r2; + if r2 >= ad { + q2 = q2 + 1; + r2 = r2 - ad; + } + let delta: u32 = ad - r2; + if !(q1 < delta || (q1 == delta && r1 == 0)) { + break; + } + } + + MS32 { + mul_by: (if d < 0 { + u32::wrapping_neg(q2 + 1) + } else { + q2 + 1 + }) as i32, + shift_by: p - 32, + } +} + +pub fn magic_s64(d: i64) -> MS64 { + debug_assert_ne!(d, -1); + debug_assert_ne!(d, 0); + debug_assert_ne!(d, 1); + let two63: u64 = 0x8000000000000000u64; + let mut p: i32 = 63; + let ad: u64 = i64::wrapping_abs(d) as u64; + let t: u64 = two63 + ((d as u64) >> 63); + let anc: u64 = u64::wrapping_sub(t - 1, t % ad); + let mut q1: u64 = two63 / anc; + let mut r1: u64 = two63 - q1 * anc; + let mut q2: u64 = two63 / ad; + let mut r2: u64 = two63 - q2 * ad; + loop { + p = p + 1; + q1 = 2 * q1; + r1 = 2 * r1; + if r1 >= anc { + q1 = q1 + 1; + r1 = r1 - anc; + } + q2 = 2 * q2; + r2 = 2 * r2; + if r2 >= ad { + q2 = q2 + 1; + r2 = r2 - ad; + } + let delta: u64 = ad - r2; + if !(q1 < delta || (q1 == delta && r1 == 0)) { + break; + } + } + + MS64 { + mul_by: (if d < 0 { + u64::wrapping_neg(q2 + 1) + } else { + q2 + 1 + }) as i64, + shift_by: p - 64, + } +} + +#[cfg(test)] +mod tests { + use super::{magic_s32, magic_s64, magic_u32, magic_u64}; + use super::{MS32, MS64, MU32, MU64}; + + fn make_mu32(mul_by: u32, do_add: bool, shift_by: i32) -> MU32 { + MU32 { + mul_by, + do_add, + shift_by, + } + } + + fn make_mu64(mul_by: u64, do_add: bool, shift_by: i32) -> MU64 { + MU64 { + mul_by, + do_add, + shift_by, + } + } + + fn make_ms32(mul_by: i32, shift_by: i32) -> MS32 { + MS32 { mul_by, shift_by } + } + + fn make_ms64(mul_by: i64, shift_by: i32) -> MS64 { + MS64 { mul_by, shift_by } + } + + #[test] + fn test_magic_u32() { + assert_eq!(magic_u32(2u32), make_mu32(0x80000000u32, false, 0)); + assert_eq!(magic_u32(3u32), make_mu32(0xaaaaaaabu32, false, 1)); + assert_eq!(magic_u32(4u32), make_mu32(0x40000000u32, false, 0)); + assert_eq!(magic_u32(5u32), make_mu32(0xcccccccdu32, false, 2)); + assert_eq!(magic_u32(6u32), make_mu32(0xaaaaaaabu32, false, 2)); + assert_eq!(magic_u32(7u32), make_mu32(0x24924925u32, true, 3)); + assert_eq!(magic_u32(9u32), make_mu32(0x38e38e39u32, false, 1)); + assert_eq!(magic_u32(10u32), make_mu32(0xcccccccdu32, false, 3)); + assert_eq!(magic_u32(11u32), make_mu32(0xba2e8ba3u32, false, 3)); + assert_eq!(magic_u32(12u32), make_mu32(0xaaaaaaabu32, false, 3)); + assert_eq!(magic_u32(25u32), make_mu32(0x51eb851fu32, false, 3)); + assert_eq!(magic_u32(125u32), make_mu32(0x10624dd3u32, false, 3)); + assert_eq!(magic_u32(625u32), make_mu32(0xd1b71759u32, false, 9)); + assert_eq!(magic_u32(1337u32), make_mu32(0x88233b2bu32, true, 11)); + assert_eq!(magic_u32(65535u32), make_mu32(0x80008001u32, false, 15)); + assert_eq!(magic_u32(65536u32), make_mu32(0x00010000u32, false, 0)); + assert_eq!(magic_u32(65537u32), make_mu32(0xffff0001u32, false, 16)); + assert_eq!(magic_u32(31415927u32), make_mu32(0x445b4553u32, false, 23)); + assert_eq!( + magic_u32(0xdeadbeefu32), + make_mu32(0x93275ab3u32, false, 31) + ); + assert_eq!( + magic_u32(0xfffffffdu32), + make_mu32(0x40000001u32, false, 30) + ); + assert_eq!(magic_u32(0xfffffffeu32), make_mu32(0x00000003u32, true, 32)); + assert_eq!( + magic_u32(0xffffffffu32), + make_mu32(0x80000001u32, false, 31) + ); + } + + #[test] + fn test_magic_u64() { + assert_eq!(magic_u64(2u64), make_mu64(0x8000000000000000u64, false, 0)); + assert_eq!(magic_u64(3u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 1)); + assert_eq!(magic_u64(4u64), make_mu64(0x4000000000000000u64, false, 0)); + assert_eq!(magic_u64(5u64), make_mu64(0xcccccccccccccccdu64, false, 2)); + assert_eq!(magic_u64(6u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 2)); + assert_eq!(magic_u64(7u64), make_mu64(0x2492492492492493u64, true, 3)); + assert_eq!(magic_u64(9u64), make_mu64(0xe38e38e38e38e38fu64, false, 3)); + assert_eq!(magic_u64(10u64), make_mu64(0xcccccccccccccccdu64, false, 3)); + assert_eq!(magic_u64(11u64), make_mu64(0x2e8ba2e8ba2e8ba3u64, false, 1)); + assert_eq!(magic_u64(12u64), make_mu64(0xaaaaaaaaaaaaaaabu64, false, 3)); + assert_eq!(magic_u64(25u64), make_mu64(0x47ae147ae147ae15u64, true, 5)); + assert_eq!(magic_u64(125u64), make_mu64(0x0624dd2f1a9fbe77u64, true, 7)); + assert_eq!( + magic_u64(625u64), + make_mu64(0x346dc5d63886594bu64, false, 7) + ); + assert_eq!( + magic_u64(1337u64), + make_mu64(0xc4119d952866a139u64, false, 10) + ); + assert_eq!( + magic_u64(31415927u64), + make_mu64(0x116d154b9c3d2f85u64, true, 25) + ); + assert_eq!( + magic_u64(0x00000000deadbeefu64), + make_mu64(0x93275ab2dfc9094bu64, false, 31) + ); + assert_eq!( + magic_u64(0x00000000fffffffdu64), + make_mu64(0x8000000180000005u64, false, 31) + ); + assert_eq!( + magic_u64(0x00000000fffffffeu64), + make_mu64(0x0000000200000005u64, true, 32) + ); + assert_eq!( + magic_u64(0x00000000ffffffffu64), + make_mu64(0x8000000080000001u64, false, 31) + ); + assert_eq!( + magic_u64(0x0000000100000000u64), + make_mu64(0x0000000100000000u64, false, 0) + ); + assert_eq!( + magic_u64(0x0000000100000001u64), + make_mu64(0xffffffff00000001u64, false, 32) + ); + assert_eq!( + magic_u64(0x0ddc0ffeebadf00du64), + make_mu64(0x2788e9d394b77da1u64, true, 60) + ); + assert_eq!( + magic_u64(0xfffffffffffffffdu64), + make_mu64(0x4000000000000001u64, false, 62) + ); + assert_eq!( + magic_u64(0xfffffffffffffffeu64), + make_mu64(0x0000000000000003u64, true, 64) + ); + assert_eq!( + magic_u64(0xffffffffffffffffu64), + make_mu64(0x8000000000000001u64, false, 63) + ); + } + + #[test] + fn test_magic_s32() { + assert_eq!( + magic_s32(-0x80000000i32), + make_ms32(0x7fffffffu32 as i32, 30) + ); + assert_eq!( + magic_s32(-0x7FFFFFFFi32), + make_ms32(0xbfffffffu32 as i32, 29) + ); + assert_eq!( + magic_s32(-0x7FFFFFFEi32), + make_ms32(0x7ffffffdu32 as i32, 30) + ); + assert_eq!(magic_s32(-31415927i32), make_ms32(0xbba4baadu32 as i32, 23)); + assert_eq!(magic_s32(-1337i32), make_ms32(0x9df73135u32 as i32, 9)); + assert_eq!(magic_s32(-256i32), make_ms32(0x7fffffffu32 as i32, 7)); + assert_eq!(magic_s32(-5i32), make_ms32(0x99999999u32 as i32, 1)); + assert_eq!(magic_s32(-3i32), make_ms32(0x55555555u32 as i32, 1)); + assert_eq!(magic_s32(-2i32), make_ms32(0x7fffffffu32 as i32, 0)); + assert_eq!(magic_s32(2i32), make_ms32(0x80000001u32 as i32, 0)); + assert_eq!(magic_s32(3i32), make_ms32(0x55555556u32 as i32, 0)); + assert_eq!(magic_s32(4i32), make_ms32(0x80000001u32 as i32, 1)); + assert_eq!(magic_s32(5i32), make_ms32(0x66666667u32 as i32, 1)); + assert_eq!(magic_s32(6i32), make_ms32(0x2aaaaaabu32 as i32, 0)); + assert_eq!(magic_s32(7i32), make_ms32(0x92492493u32 as i32, 2)); + assert_eq!(magic_s32(9i32), make_ms32(0x38e38e39u32 as i32, 1)); + assert_eq!(magic_s32(10i32), make_ms32(0x66666667u32 as i32, 2)); + assert_eq!(magic_s32(11i32), make_ms32(0x2e8ba2e9u32 as i32, 1)); + assert_eq!(magic_s32(12i32), make_ms32(0x2aaaaaabu32 as i32, 1)); + assert_eq!(magic_s32(25i32), make_ms32(0x51eb851fu32 as i32, 3)); + assert_eq!(magic_s32(125i32), make_ms32(0x10624dd3u32 as i32, 3)); + assert_eq!(magic_s32(625i32), make_ms32(0x68db8badu32 as i32, 8)); + assert_eq!(magic_s32(1337i32), make_ms32(0x6208cecbu32 as i32, 9)); + assert_eq!(magic_s32(31415927i32), make_ms32(0x445b4553u32 as i32, 23)); + assert_eq!( + magic_s32(0x7ffffffei32), + make_ms32(0x80000003u32 as i32, 30) + ); + assert_eq!( + magic_s32(0x7fffffffi32), + make_ms32(0x40000001u32 as i32, 29) + ); + } + + #[test] + fn test_magic_s64() { + assert_eq!( + magic_s64(-0x8000000000000000i64), + make_ms64(0x7fffffffffffffffu64 as i64, 62) + ); + assert_eq!( + magic_s64(-0x7FFFFFFFFFFFFFFFi64), + make_ms64(0xbfffffffffffffffu64 as i64, 61) + ); + assert_eq!( + magic_s64(-0x7FFFFFFFFFFFFFFEi64), + make_ms64(0x7ffffffffffffffdu64 as i64, 62) + ); + assert_eq!( + magic_s64(-0x0ddC0ffeeBadF00di64), + make_ms64(0x6c3b8b1635a4412fu64 as i64, 59) + ); + assert_eq!( + magic_s64(-0x100000001i64), + make_ms64(0x800000007fffffffu64 as i64, 31) + ); + assert_eq!( + magic_s64(-0x100000000i64), + make_ms64(0x7fffffffffffffffu64 as i64, 31) + ); + assert_eq!( + magic_s64(-0xFFFFFFFFi64), + make_ms64(0x7fffffff7fffffffu64 as i64, 31) + ); + assert_eq!( + magic_s64(-0xFFFFFFFEi64), + make_ms64(0x7ffffffefffffffdu64 as i64, 31) + ); + assert_eq!( + magic_s64(-0xFFFFFFFDi64), + make_ms64(0x7ffffffe7ffffffbu64 as i64, 31) + ); + assert_eq!( + magic_s64(-0xDeadBeefi64), + make_ms64(0x6cd8a54d2036f6b5u64 as i64, 31) + ); + assert_eq!( + magic_s64(-31415927i64), + make_ms64(0x7749755a31e1683du64 as i64, 24) + ); + assert_eq!( + magic_s64(-1337i64), + make_ms64(0x9df731356bccaf63u64 as i64, 9) + ); + assert_eq!( + magic_s64(-256i64), + make_ms64(0x7fffffffffffffffu64 as i64, 7) + ); + assert_eq!(magic_s64(-5i64), make_ms64(0x9999999999999999u64 as i64, 1)); + assert_eq!(magic_s64(-3i64), make_ms64(0x5555555555555555u64 as i64, 1)); + assert_eq!(magic_s64(-2i64), make_ms64(0x7fffffffffffffffu64 as i64, 0)); + assert_eq!(magic_s64(2i64), make_ms64(0x8000000000000001u64 as i64, 0)); + assert_eq!(magic_s64(3i64), make_ms64(0x5555555555555556u64 as i64, 0)); + assert_eq!(magic_s64(4i64), make_ms64(0x8000000000000001u64 as i64, 1)); + assert_eq!(magic_s64(5i64), make_ms64(0x6666666666666667u64 as i64, 1)); + assert_eq!(magic_s64(6i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 0)); + assert_eq!(magic_s64(7i64), make_ms64(0x4924924924924925u64 as i64, 1)); + assert_eq!(magic_s64(9i64), make_ms64(0x1c71c71c71c71c72u64 as i64, 0)); + assert_eq!(magic_s64(10i64), make_ms64(0x6666666666666667u64 as i64, 2)); + assert_eq!(magic_s64(11i64), make_ms64(0x2e8ba2e8ba2e8ba3u64 as i64, 1)); + assert_eq!(magic_s64(12i64), make_ms64(0x2aaaaaaaaaaaaaabu64 as i64, 1)); + assert_eq!(magic_s64(25i64), make_ms64(0xa3d70a3d70a3d70bu64 as i64, 4)); + assert_eq!( + magic_s64(125i64), + make_ms64(0x20c49ba5e353f7cfu64 as i64, 4) + ); + assert_eq!( + magic_s64(625i64), + make_ms64(0x346dc5d63886594bu64 as i64, 7) + ); + assert_eq!( + magic_s64(1337i64), + make_ms64(0x6208ceca9433509du64 as i64, 9) + ); + assert_eq!( + magic_s64(31415927i64), + make_ms64(0x88b68aa5ce1e97c3u64 as i64, 24) + ); + assert_eq!( + magic_s64(0x00000000deadbeefi64), + make_ms64(0x93275ab2dfc9094bu64 as i64, 31) + ); + assert_eq!( + magic_s64(0x00000000fffffffdi64), + make_ms64(0x8000000180000005u64 as i64, 31) + ); + assert_eq!( + magic_s64(0x00000000fffffffei64), + make_ms64(0x8000000100000003u64 as i64, 31) + ); + assert_eq!( + magic_s64(0x00000000ffffffffi64), + make_ms64(0x8000000080000001u64 as i64, 31) + ); + assert_eq!( + magic_s64(0x0000000100000000i64), + make_ms64(0x8000000000000001u64 as i64, 31) + ); + assert_eq!( + magic_s64(0x0000000100000001i64), + make_ms64(0x7fffffff80000001u64 as i64, 31) + ); + assert_eq!( + magic_s64(0x0ddc0ffeebadf00di64), + make_ms64(0x93c474e9ca5bbed1u64 as i64, 59) + ); + assert_eq!( + magic_s64(0x7ffffffffffffffdi64), + make_ms64(0x2000000000000001u64 as i64, 60) + ); + assert_eq!( + magic_s64(0x7ffffffffffffffei64), + make_ms64(0x8000000000000003u64 as i64, 62) + ); + assert_eq!( + magic_s64(0x7fffffffffffffffi64), + make_ms64(0x4000000000000001u64 as i64, 61) + ); + } + + #[test] + fn test_magic_generators_dont_panic() { + // The point of this is to check that the magic number generators + // don't panic with integer wraparounds, especially at boundary cases + // for their arguments. The actual results are thrown away, although + // we force `total` to be used, so that rustc can't optimise the + // entire computation away. + + // Testing UP magic_u32 + let mut total: u64 = 0; + for x in 2..(200 * 1000u32) { + let m = magic_u32(x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + total = total + (if m.do_add { 123 } else { 456 }); + } + assert_eq!(total, 2481999609); + + total = 0; + // Testing MIDPOINT magic_u32 + for x in 0x8000_0000u32 - 10 * 1000u32..0x8000_0000u32 + 10 * 1000u32 { + let m = magic_u32(x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + total = total + (if m.do_add { 123 } else { 456 }); + } + assert_eq!(total, 2399809723); + + total = 0; + // Testing DOWN magic_u32 + for x in 0..(200 * 1000u32) { + let m = magic_u32(0xFFFF_FFFFu32 - x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + total = total + (if m.do_add { 123 } else { 456 }); + } + assert_eq!(total, 271138267); + + // Testing UP magic_u64 + total = 0; + for x in 2..(200 * 1000u64) { + let m = magic_u64(x); + total = total ^ m.mul_by; + total = total + (m.shift_by as u64); + total = total + (if m.do_add { 123 } else { 456 }); + } + assert_eq!(total, 7430004086976261161); + + total = 0; + // Testing MIDPOINT magic_u64 + for x in 0x8000_0000_0000_0000u64 - 10 * 1000u64..0x8000_0000_0000_0000u64 + 10 * 1000u64 { + let m = magic_u64(x); + total = total ^ m.mul_by; + total = total + (m.shift_by as u64); + total = total + (if m.do_add { 123 } else { 456 }); + } + assert_eq!(total, 10312117246769520603); + + // Testing DOWN magic_u64 + total = 0; + for x in 0..(200 * 1000u64) { + let m = magic_u64(0xFFFF_FFFF_FFFF_FFFFu64 - x); + total = total ^ m.mul_by; + total = total + (m.shift_by as u64); + total = total + (if m.do_add { 123 } else { 456 }); + } + assert_eq!(total, 1126603594357269734); + + // Testing UP magic_s32 + total = 0; + for x in 0..(200 * 1000i32) { + let m = magic_s32(-0x8000_0000i32 + x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + } + assert_eq!(total, 18446744069953376812); + + total = 0; + // Testing MIDPOINT magic_s32 + for x in 0..(200 * 1000i32) { + let x2 = -100 * 1000i32 + x; + if x2 != -1 && x2 != 0 && x2 != 1 { + let m = magic_s32(x2); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + } + } + assert_eq!(total, 351839350); + + // Testing DOWN magic_s32 + total = 0; + for x in 0..(200 * 1000i32) { + let m = magic_s32(0x7FFF_FFFFi32 - x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + } + assert_eq!(total, 18446744072916880714); + + // Testing UP magic_s64 + total = 0; + for x in 0..(200 * 1000i64) { + let m = magic_s64(-0x8000_0000_0000_0000i64 + x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + } + assert_eq!(total, 17929885647724831014); + + total = 0; + // Testing MIDPOINT magic_s64 + for x in 0..(200 * 1000i64) { + let x2 = -100 * 1000i64 + x; + if x2 != -1 && x2 != 0 && x2 != 1 { + let m = magic_s64(x2); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + } + } + assert_eq!(total, 18106042338125661964); + + // Testing DOWN magic_s64 + total = 0; + for x in 0..(200 * 1000i64) { + let m = magic_s64(0x7FFF_FFFF_FFFF_FFFFi64 - x); + total = total ^ (m.mul_by as u64); + total = total + (m.shift_by as u64); + } + assert_eq!(total, 563301797155560970); + } + + #[test] + fn test_magic_generators_give_correct_numbers() { + // For a variety of values for both `n` and `d`, compute the magic + // numbers for `d`, and in effect interpret them so as to compute + // `n / d`. Check that that equals the value of `n / d` computed + // directly by the hardware. This serves to check that the magic + // number generates work properly. In total, 50,148,000 tests are + // done. + + // Some constants + const MIN_U32: u32 = 0; + const MAX_U32: u32 = 0xFFFF_FFFFu32; + const MAX_U32_HALF: u32 = 0x8000_0000u32; // more or less + + const MIN_S32: i32 = 0x8000_0000u32 as i32; + const MAX_S32: i32 = 0x7FFF_FFFFu32 as i32; + + const MIN_U64: u64 = 0; + const MAX_U64: u64 = 0xFFFF_FFFF_FFFF_FFFFu64; + const MAX_U64_HALF: u64 = 0x8000_0000_0000_0000u64; // ditto + + const MIN_S64: i64 = 0x8000_0000_0000_0000u64 as i64; + const MAX_S64: i64 = 0x7FFF_FFFF_FFFF_FFFFu64 as i64; + + // These generate reference results for signed/unsigned 32/64 bit + // division, rounding towards zero. + fn div_u32(x: u32, y: u32) -> u32 { + return x / y; + } + fn div_s32(x: i32, y: i32) -> i32 { + return x / y; + } + fn div_u64(x: u64, y: u64) -> u64 { + return x / y; + } + fn div_s64(x: i64, y: i64) -> i64 { + return x / y; + } + + // Returns the high half of a 32 bit unsigned widening multiply. + fn mulhw_u32(x: u32, y: u32) -> u32 { + let x64: u64 = x as u64; + let y64: u64 = y as u64; + let r64: u64 = x64 * y64; + (r64 >> 32) as u32 + } + + // Returns the high half of a 32 bit signed widening multiply. + fn mulhw_s32(x: i32, y: i32) -> i32 { + let x64: i64 = x as i64; + let y64: i64 = y as i64; + let r64: i64 = x64 * y64; + (r64 >> 32) as i32 + } + + // Returns the high half of a 64 bit unsigned widening multiply. + fn mulhw_u64(x: u64, y: u64) -> u64 { + let t0: u64 = x & 0xffffffffu64; + let t1: u64 = x >> 32; + let t2: u64 = y & 0xffffffffu64; + let t3: u64 = y >> 32; + let t4: u64 = t0 * t2; + let t5: u64 = t1 * t2 + (t4 >> 32); + let t6: u64 = t5 & 0xffffffffu64; + let t7: u64 = t5 >> 32; + let t8: u64 = t0 * t3 + t6; + let t9: u64 = t1 * t3 + t7 + (t8 >> 32); + t9 + } + + // Returns the high half of a 64 bit signed widening multiply. + fn mulhw_s64(x: i64, y: i64) -> i64 { + let t0: u64 = x as u64 & 0xffffffffu64; + let t1: i64 = x >> 32; + let t2: u64 = y as u64 & 0xffffffffu64; + let t3: i64 = y >> 32; + let t4: u64 = t0 * t2; + let t5: i64 = t1 * t2 as i64 + (t4 >> 32) as i64; + let t6: u64 = t5 as u64 & 0xffffffffu64; + let t7: i64 = t5 >> 32; + let t8: i64 = t0 as i64 * t3 + t6 as i64; + let t9: i64 = t1 * t3 + t7 + (t8 >> 32); + t9 + } + + // Compute the magic numbers for `d` and then use them to compute and + // check `n / d` for around 1000 values of `n`, using unsigned 32-bit + // division. + fn test_magic_u32_inner(d: u32, n_tests_done: &mut i32) { + // Advance the numerator (the `n` in `n / d`) so as to test + // densely near the range ends (and, in the signed variants, near + // zero) but not so densely away from those regions. + fn advance_n_u32(x: u32) -> u32 { + if x < MIN_U32 + 110 { + return x + 1; + } + if x < MIN_U32 + 1700 { + return x + 23; + } + if x < MAX_U32 - 1700 { + let xd: f64 = (x as f64) * 1.06415927; + return if xd >= (MAX_U32 - 1700) as f64 { + MAX_U32 - 1700 + } else { + xd as u32 + }; + } + if x < MAX_U32 - 110 { + return x + 23; + } + u32::wrapping_add(x, 1) + } + + let magic: MU32 = magic_u32(d); + let mut n: u32 = MIN_U32; + loop { + *n_tests_done += 1; + // Compute and check `q = n / d` using `magic`. + let mut q: u32 = mulhw_u32(n, magic.mul_by); + if magic.do_add { + assert!(magic.shift_by >= 1 && magic.shift_by <= 32); + let mut t: u32 = n - q; + t >>= 1; + t = t + q; + q = t >> (magic.shift_by - 1); + } else { + assert!(magic.shift_by >= 0 && magic.shift_by <= 31); + q >>= magic.shift_by; + } + + assert_eq!(q, div_u32(n, d)); + + n = advance_n_u32(n); + if n == MIN_U32 { + break; + } + } + } + + // Compute the magic numbers for `d` and then use them to compute and + // check `n / d` for around 1000 values of `n`, using signed 32-bit + // division. + fn test_magic_s32_inner(d: i32, n_tests_done: &mut i32) { + // See comment on advance_n_u32 above. + fn advance_n_s32(x: i32) -> i32 { + if x >= 0 && x <= 29 { + return x + 1; + } + if x < MIN_S32 + 110 { + return x + 1; + } + if x < MIN_S32 + 1700 { + return x + 23; + } + if x < MAX_S32 - 1700 { + let mut xd: f64 = x as f64; + xd = if xd < 0.0 { + xd / 1.06415927 + } else { + xd * 1.06415927 + }; + return if xd >= (MAX_S32 - 1700) as f64 { + MAX_S32 - 1700 + } else { + xd as i32 + }; + } + if x < MAX_S32 - 110 { + return x + 23; + } + if x == MAX_S32 { + return MIN_S32; + } + x + 1 + } + + let magic: MS32 = magic_s32(d); + let mut n: i32 = MIN_S32; + loop { + *n_tests_done += 1; + // Compute and check `q = n / d` using `magic`. + let mut q: i32 = mulhw_s32(n, magic.mul_by); + if d > 0 && magic.mul_by < 0 { + q = q + n; + } else if d < 0 && magic.mul_by > 0 { + q = q - n; + } + assert!(magic.shift_by >= 0 && magic.shift_by <= 31); + q = q >> magic.shift_by; + let mut t: u32 = q as u32; + t = t >> 31; + q = q + (t as i32); + + assert_eq!(q, div_s32(n, d)); + + n = advance_n_s32(n); + if n == MIN_S32 { + break; + } + } + } + + // Compute the magic numbers for `d` and then use them to compute and + // check `n / d` for around 1000 values of `n`, using unsigned 64-bit + // division. + fn test_magic_u64_inner(d: u64, n_tests_done: &mut i32) { + // See comment on advance_n_u32 above. + fn advance_n_u64(x: u64) -> u64 { + if x < MIN_U64 + 110 { + return x + 1; + } + if x < MIN_U64 + 1700 { + return x + 23; + } + if x < MAX_U64 - 1700 { + let xd: f64 = (x as f64) * 1.06415927; + return if xd >= (MAX_U64 - 1700) as f64 { + MAX_U64 - 1700 + } else { + xd as u64 + }; + } + if x < MAX_U64 - 110 { + return x + 23; + } + u64::wrapping_add(x, 1) + } + + let magic: MU64 = magic_u64(d); + let mut n: u64 = MIN_U64; + loop { + *n_tests_done += 1; + // Compute and check `q = n / d` using `magic`. + let mut q = mulhw_u64(n, magic.mul_by); + if magic.do_add { + assert!(magic.shift_by >= 1 && magic.shift_by <= 64); + let mut t: u64 = n - q; + t >>= 1; + t = t + q; + q = t >> (magic.shift_by - 1); + } else { + assert!(magic.shift_by >= 0 && magic.shift_by <= 63); + q >>= magic.shift_by; + } + + assert_eq!(q, div_u64(n, d)); + + n = advance_n_u64(n); + if n == MIN_U64 { + break; + } + } + } + + // Compute the magic numbers for `d` and then use them to compute and + // check `n / d` for around 1000 values of `n`, using signed 64-bit + // division. + fn test_magic_s64_inner(d: i64, n_tests_done: &mut i32) { + // See comment on advance_n_u32 above. + fn advance_n_s64(x: i64) -> i64 { + if x >= 0 && x <= 29 { + return x + 1; + } + if x < MIN_S64 + 110 { + return x + 1; + } + if x < MIN_S64 + 1700 { + return x + 23; + } + if x < MAX_S64 - 1700 { + let mut xd: f64 = x as f64; + xd = if xd < 0.0 { + xd / 1.06415927 + } else { + xd * 1.06415927 + }; + return if xd >= (MAX_S64 - 1700) as f64 { + MAX_S64 - 1700 + } else { + xd as i64 + }; + } + if x < MAX_S64 - 110 { + return x + 23; + } + if x == MAX_S64 { + return MIN_S64; + } + x + 1 + } + + let magic: MS64 = magic_s64(d); + let mut n: i64 = MIN_S64; + loop { + *n_tests_done += 1; + // Compute and check `q = n / d` using `magic`. */ + let mut q: i64 = mulhw_s64(n, magic.mul_by); + if d > 0 && magic.mul_by < 0 { + q = q + n; + } else if d < 0 && magic.mul_by > 0 { + q = q - n; + } + assert!(magic.shift_by >= 0 && magic.shift_by <= 63); + q = q >> magic.shift_by; + let mut t: u64 = q as u64; + t = t >> 63; + q = q + (t as i64); + + assert_eq!(q, div_s64(n, d)); + + n = advance_n_s64(n); + if n == MIN_S64 { + break; + } + } + } + + // Using all the above support machinery, actually run the tests. + + let mut n_tests_done: i32 = 0; + + // u32 division tests + { + // 2 .. 3k + let mut d: u32 = 2; + for _ in 0..3 * 1000 { + test_magic_u32_inner(d, &mut n_tests_done); + d += 1; + } + + // across the midpoint: midpoint - 3k .. midpoint + 3k + d = MAX_U32_HALF - 3 * 1000; + for _ in 0..2 * 3 * 1000 { + test_magic_u32_inner(d, &mut n_tests_done); + d += 1; + } + + // MAX_U32 - 3k .. MAX_U32 (in reverse order) + d = MAX_U32; + for _ in 0..3 * 1000 { + test_magic_u32_inner(d, &mut n_tests_done); + d -= 1; + } + } + + // s32 division tests + { + // MIN_S32 .. MIN_S32 + 3k + let mut d: i32 = MIN_S32; + for _ in 0..3 * 1000 { + test_magic_s32_inner(d, &mut n_tests_done); + d += 1; + } + + // -3k .. -2 (in reverse order) + d = -2; + for _ in 0..3 * 1000 { + test_magic_s32_inner(d, &mut n_tests_done); + d -= 1; + } + + // 2 .. 3k + d = 2; + for _ in 0..3 * 1000 { + test_magic_s32_inner(d, &mut n_tests_done); + d += 1; + } + + // MAX_S32 - 3k .. MAX_S32 (in reverse order) + d = MAX_S32; + for _ in 0..3 * 1000 { + test_magic_s32_inner(d, &mut n_tests_done); + d -= 1; + } + } + + // u64 division tests + { + // 2 .. 3k + let mut d: u64 = 2; + for _ in 0..3 * 1000 { + test_magic_u64_inner(d, &mut n_tests_done); + d += 1; + } + + // across the midpoint: midpoint - 3k .. midpoint + 3k + d = MAX_U64_HALF - 3 * 1000; + for _ in 0..2 * 3 * 1000 { + test_magic_u64_inner(d, &mut n_tests_done); + d += 1; + } + + // mAX_U64 - 3000 .. mAX_U64 (in reverse order) + d = MAX_U64; + for _ in 0..3 * 1000 { + test_magic_u64_inner(d, &mut n_tests_done); + d -= 1; + } + } + + // s64 division tests + { + // MIN_S64 .. MIN_S64 + 3k + let mut d: i64 = MIN_S64; + for _ in 0..3 * 1000 { + test_magic_s64_inner(d, &mut n_tests_done); + d += 1; + } + + // -3k .. -2 (in reverse order) + d = -2; + for _ in 0..3 * 1000 { + test_magic_s64_inner(d, &mut n_tests_done); + d -= 1; + } + + // 2 .. 3k + d = 2; + for _ in 0..3 * 1000 { + test_magic_s64_inner(d, &mut n_tests_done); + d += 1; + } + + // MAX_S64 - 3k .. MAX_S64 (in reverse order) + d = MAX_S64; + for _ in 0..3 * 1000 { + test_magic_s64_inner(d, &mut n_tests_done); + d -= 1; + } + } + assert_eq!(n_tests_done, 50_148_000); + } +} diff --git a/cranelift/codegen/src/dominator_tree.rs b/cranelift/codegen/src/dominator_tree.rs new file mode 100644 index 0000000000..d397fc7183 --- /dev/null +++ b/cranelift/codegen/src/dominator_tree.rs @@ -0,0 +1,837 @@ +//! A Dominator Tree represented as mappings of Blocks to their immediate dominator. + +use crate::entity::SecondaryMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::instructions::BranchInfo; +use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Layout, ProgramOrder, Value}; +use crate::packed_option::PackedOption; +use crate::timing; +use alloc::vec::Vec; +use core::cmp; +use core::cmp::Ordering; +use core::mem; + +/// RPO numbers are not first assigned in a contiguous way but as multiples of STRIDE, to leave +/// room for modifications of the dominator tree. +const STRIDE: u32 = 4; + +/// Special RPO numbers used during `compute_postorder`. +const DONE: u32 = 1; +const SEEN: u32 = 2; + +/// Dominator tree node. We keep one of these per block. +#[derive(Clone, Default)] +struct DomNode { + /// Number of this node in a reverse post-order traversal of the CFG, starting from 1. + /// This number is monotonic in the reverse postorder but not contiguous, since we leave + /// holes for later localized modifications of the dominator tree. + /// Unreachable nodes get number 0, all others are positive. + rpo_number: u32, + + /// The immediate dominator of this block, represented as the branch or jump instruction at the + /// end of the dominating basic block. + /// + /// This is `None` for unreachable blocks and the entry block which doesn't have an immediate + /// dominator. + idom: PackedOption, +} + +/// The dominator tree for a single function. +pub struct DominatorTree { + nodes: SecondaryMap, + + /// CFG post-order of all reachable blocks. + postorder: Vec, + + /// Scratch memory used by `compute_postorder()`. + stack: Vec, + + valid: bool, +} + +/// Methods for querying the dominator tree. +impl DominatorTree { + /// Is `block` reachable from the entry block? + pub fn is_reachable(&self, block: Block) -> bool { + self.nodes[block].rpo_number != 0 + } + + /// Get the CFG post-order of blocks that was used to compute the dominator tree. + /// + /// Note that this post-order is not updated automatically when the CFG is modified. It is + /// computed from scratch and cached by `compute()`. + pub fn cfg_postorder(&self) -> &[Block] { + debug_assert!(self.is_valid()); + &self.postorder + } + + /// Returns the immediate dominator of `block`. + /// + /// The immediate dominator of a basic block is a basic block which we represent by + /// the branch or jump instruction at the end of the basic block. This does not have to be the + /// terminator of its block. + /// + /// A branch or jump is said to *dominate* `block` if all control flow paths from the function + /// entry to `block` must go through the branch. + /// + /// The *immediate dominator* is the dominator that is closest to `block`. All other dominators + /// also dominate the immediate dominator. + /// + /// This returns `None` if `block` is not reachable from the entry block, or if it is the entry block + /// which has no dominators. + pub fn idom(&self, block: Block) -> Option { + self.nodes[block].idom.into() + } + + /// Compare two blocks relative to the reverse post-order. + fn rpo_cmp_block(&self, a: Block, b: Block) -> Ordering { + self.nodes[a].rpo_number.cmp(&self.nodes[b].rpo_number) + } + + /// Compare two program points relative to a reverse post-order traversal of the control-flow + /// graph. + /// + /// Return `Ordering::Less` if `a` comes before `b` in the RPO. + /// + /// If `a` and `b` belong to the same block, compare their relative position in the block. + pub fn rpo_cmp(&self, a: A, b: B, layout: &Layout) -> Ordering + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + self.rpo_cmp_block(layout.pp_block(a), layout.pp_block(b)) + .then(layout.cmp(a, b)) + } + + /// Returns `true` if `a` dominates `b`. + /// + /// This means that every control-flow path from the function entry to `b` must go through `a`. + /// + /// Dominance is ill defined for unreachable blocks. This function can always determine + /// dominance for instructions in the same block, but otherwise returns `false` if either block + /// is unreachable. + /// + /// An instruction is considered to dominate itself. + pub fn dominates(&self, a: A, b: B, layout: &Layout) -> bool + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + match a { + ExpandedProgramPoint::Block(block_a) => { + a == b || self.last_dominator(block_a, b, layout).is_some() + } + ExpandedProgramPoint::Inst(inst_a) => { + let block_a = layout + .inst_block(inst_a) + .expect("Instruction not in layout."); + match self.last_dominator(block_a, b, layout) { + Some(last) => layout.cmp(inst_a, last) != Ordering::Greater, + None => false, + } + } + } + } + + /// Find the last instruction in `a` that dominates `b`. + /// If no instructions in `a` dominate `b`, return `None`. + pub fn last_dominator(&self, a: Block, b: B, layout: &Layout) -> Option + where + B: Into, + { + let (mut block_b, mut inst_b) = match b.into() { + ExpandedProgramPoint::Block(block) => (block, None), + ExpandedProgramPoint::Inst(inst) => ( + layout.inst_block(inst).expect("Instruction not in layout."), + Some(inst), + ), + }; + let rpo_a = self.nodes[a].rpo_number; + + // Run a finger up the dominator tree from b until we see a. + // Do nothing if b is unreachable. + while rpo_a < self.nodes[block_b].rpo_number { + let idom = match self.idom(block_b) { + Some(idom) => idom, + None => return None, // a is unreachable, so we climbed past the entry + }; + block_b = layout.inst_block(idom).expect("Dominator got removed."); + inst_b = Some(idom); + } + if a == block_b { + inst_b + } else { + None + } + } + + /// Compute the common dominator of two basic blocks. + /// + /// Both basic blocks are assumed to be reachable. + pub fn common_dominator( + &self, + mut a: BlockPredecessor, + mut b: BlockPredecessor, + layout: &Layout, + ) -> BlockPredecessor { + loop { + match self.rpo_cmp_block(a.block, b.block) { + Ordering::Less => { + // `a` comes before `b` in the RPO. Move `b` up. + let idom = self.nodes[b.block].idom.expect("Unreachable basic block?"); + b = BlockPredecessor::new( + layout.inst_block(idom).expect("Dangling idom instruction"), + idom, + ); + } + Ordering::Greater => { + // `b` comes before `a` in the RPO. Move `a` up. + let idom = self.nodes[a.block].idom.expect("Unreachable basic block?"); + a = BlockPredecessor::new( + layout.inst_block(idom).expect("Dangling idom instruction"), + idom, + ); + } + Ordering::Equal => break, + } + } + + debug_assert_eq!( + a.block, b.block, + "Unreachable block passed to common_dominator?" + ); + + // We're in the same block. The common dominator is the earlier instruction. + if layout.cmp(a.inst, b.inst) == Ordering::Less { + a + } else { + b + } + } +} + +impl DominatorTree { + /// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a + /// function. + pub fn new() -> Self { + Self { + nodes: SecondaryMap::new(), + postorder: Vec::new(), + stack: Vec::new(), + valid: false, + } + } + + /// Allocate and compute a dominator tree. + pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self { + let block_capacity = func.layout.block_capacity(); + let mut domtree = Self { + nodes: SecondaryMap::with_capacity(block_capacity), + postorder: Vec::with_capacity(block_capacity), + stack: Vec::new(), + valid: false, + }; + domtree.compute(func, cfg); + domtree + } + + /// Reset and compute a CFG post-order and dominator tree. + pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) { + let _tt = timing::domtree(); + debug_assert!(cfg.is_valid()); + self.compute_postorder(func); + self.compute_domtree(func, cfg); + self.valid = true; + } + + /// Clear the data structures used to represent the dominator tree. This will leave the tree in + /// a state where `is_valid()` returns false. + pub fn clear(&mut self) { + self.nodes.clear(); + self.postorder.clear(); + debug_assert!(self.stack.is_empty()); + self.valid = false; + } + + /// Check if the dominator tree is in a valid state. + /// + /// Note that this doesn't perform any kind of validity checks. It simply checks if the + /// `compute()` method has been called since the last `clear()`. It does not check that the + /// dominator tree is consistent with the CFG. + pub fn is_valid(&self) -> bool { + self.valid + } + + /// Reset all internal data structures and compute a post-order of the control flow graph. + /// + /// This leaves `rpo_number == 1` for all reachable blocks, 0 for unreachable ones. + fn compute_postorder(&mut self, func: &Function) { + self.clear(); + self.nodes.resize(func.dfg.num_blocks()); + + // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a + // post-order of the blocks that are reachable form the entry block. A DFT post-order is not + // unique. The specific order we get is controlled by two factors: + // + // 1. The order each node's children are visited, and + // 2. The method used for pruning graph edges to get a tree. + // + // There are two ways of viewing the CFG as a graph: + // + // 1. Each block is a node, with outgoing edges for all the branches in the block. + // 2. Each basic block is a node, with outgoing edges for the single branch at the end of + // the BB. (An block is a linear sequence of basic blocks). + // + // The first graph is a contraction of the second one. We want to compute an block post-order + // that is compatible both graph interpretations. That is, if you compute a BB post-order + // and then remove those BBs that do not correspond to block headers, you get a post-order of + // the block graph. + // + // Node child order: + // + // In the BB graph, we always go down the fall-through path first and follow the branch + // destination second. + // + // In the block graph, this is equivalent to visiting block successors in a bottom-up + // order, starting from the destination of the block's terminating jump, ending at the + // destination of the first branch in the block. + // + // Edge pruning: + // + // In the BB graph, we keep an edge to an block the first time we visit the *source* side + // of the edge. Any subsequent edges to the same block are pruned. + // + // The equivalent tree is reached in the block graph by keeping the first edge to an block + // in a top-down traversal of the successors. (And then visiting edges in a bottom-up + // order). + // + // This pruning method makes it possible to compute the DFT without storing lots of + // information about the progress through an block. + + // During this algorithm only, use `rpo_number` to hold the following state: + // + // 0: block has not yet been reached in the pre-order. + // SEEN: block has been pushed on the stack but successors not yet pushed. + // DONE: Successors pushed. + + match func.layout.entry_block() { + Some(block) => { + self.stack.push(block); + self.nodes[block].rpo_number = SEEN; + } + None => return, + } + + while let Some(block) = self.stack.pop() { + match self.nodes[block].rpo_number { + SEEN => { + // This is the first time we pop the block, so we need to scan its successors and + // then revisit it. + self.nodes[block].rpo_number = DONE; + self.stack.push(block); + self.push_successors(func, block); + } + DONE => { + // This is the second time we pop the block, so all successors have been + // processed. + self.postorder.push(block); + } + _ => unreachable!(), + } + } + } + + /// Push `block` successors onto `self.stack`, filtering out those that have already been seen. + /// + /// The successors are pushed in program order which is important to get a split-invariant + /// post-order. Split-invariant means that if an block is split in two, we get the same + /// post-order except for the insertion of the new block header at the split point. + fn push_successors(&mut self, func: &Function, block: Block) { + for inst in func.layout.block_insts(block) { + match func.dfg.analyze_branch(inst) { + BranchInfo::SingleDest(succ, _) => self.push_if_unseen(succ), + BranchInfo::Table(jt, dest) => { + for succ in func.jump_tables[jt].iter() { + self.push_if_unseen(*succ); + } + if let Some(dest) = dest { + self.push_if_unseen(dest); + } + } + BranchInfo::NotABranch => {} + } + } + } + + /// Push `block` onto `self.stack` if it has not already been seen. + fn push_if_unseen(&mut self, block: Block) { + if self.nodes[block].rpo_number == 0 { + self.nodes[block].rpo_number = SEEN; + self.stack.push(block); + } + } + + /// Build a dominator tree from a control flow graph using Keith D. Cooper's + /// "Simple, Fast Dominator Algorithm." + fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) { + // During this algorithm, `rpo_number` has the following values: + // + // 0: block is not reachable. + // 1: block is reachable, but has not yet been visited during the first pass. This is set by + // `compute_postorder`. + // 2+: block is reachable and has an assigned RPO number. + + // We'll be iterating over a reverse post-order of the CFG, skipping the entry block. + let (entry_block, postorder) = match self.postorder.as_slice().split_last() { + Some((&eb, rest)) => (eb, rest), + None => return, + }; + debug_assert_eq!(Some(entry_block), func.layout.entry_block()); + + // Do a first pass where we assign RPO numbers to all reachable nodes. + self.nodes[entry_block].rpo_number = 2 * STRIDE; + for (rpo_idx, &block) in postorder.iter().rev().enumerate() { + // Update the current node and give it an RPO number. + // The entry block got 2, the rest start at 3 by multiples of STRIDE to leave + // room for future dominator tree modifications. + // + // Since `compute_idom` will only look at nodes with an assigned RPO number, the + // function will never see an uninitialized predecessor. + // + // Due to the nature of the post-order traversal, every node we visit will have at + // least one predecessor that has previously been visited during this RPO. + self.nodes[block] = DomNode { + idom: self.compute_idom(block, cfg, &func.layout).into(), + rpo_number: (rpo_idx as u32 + 3) * STRIDE, + } + } + + // Now that we have RPO numbers for everything and initial immediate dominator estimates, + // iterate until convergence. + // + // If the function is free of irreducible control flow, this will exit after one iteration. + let mut changed = true; + while changed { + changed = false; + for &block in postorder.iter().rev() { + let idom = self.compute_idom(block, cfg, &func.layout).into(); + if self.nodes[block].idom != idom { + self.nodes[block].idom = idom; + changed = true; + } + } + } + } + + // Compute the immediate dominator for `block` using the current `idom` states for the reachable + // nodes. + fn compute_idom(&self, block: Block, cfg: &ControlFlowGraph, layout: &Layout) -> Inst { + // Get an iterator with just the reachable, already visited predecessors to `block`. + // Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't + // been visited yet, 0 for unreachable blocks. + let mut reachable_preds = cfg + .pred_iter(block) + .filter(|&BlockPredecessor { block: pred, .. }| self.nodes[pred].rpo_number > 1); + + // The RPO must visit at least one predecessor before this node. + let mut idom = reachable_preds + .next() + .expect("block node must have one reachable predecessor"); + + for pred in reachable_preds { + idom = self.common_dominator(idom, pred, layout); + } + + idom.inst + } +} + +/// Optional pre-order information that can be computed for a dominator tree. +/// +/// This data structure is computed from a `DominatorTree` and provides: +/// +/// - A forward traversable dominator tree through the `children()` iterator. +/// - An ordering of blocks according to a dominator tree pre-order. +/// - Constant time dominance checks at the block granularity. +/// +/// The information in this auxiliary data structure is not easy to update when the control flow +/// graph changes, which is why it is kept separate. +pub struct DominatorTreePreorder { + nodes: SecondaryMap, + + // Scratch memory used by `compute_postorder()`. + stack: Vec, +} + +#[derive(Default, Clone)] +struct ExtraNode { + /// First child node in the domtree. + child: PackedOption, + + /// Next sibling node in the domtree. This linked list is ordered according to the CFG RPO. + sibling: PackedOption, + + /// Sequence number for this node in a pre-order traversal of the dominator tree. + /// Unreachable blocks have number 0, the entry block is 1. + pre_number: u32, + + /// Maximum `pre_number` for the sub-tree of the dominator tree that is rooted at this node. + /// This is always >= `pre_number`. + pre_max: u32, +} + +/// Creating and computing the dominator tree pre-order. +impl DominatorTreePreorder { + /// Create a new blank `DominatorTreePreorder`. + pub fn new() -> Self { + Self { + nodes: SecondaryMap::new(), + stack: Vec::new(), + } + } + + /// Recompute this data structure to match `domtree`. + pub fn compute(&mut self, domtree: &DominatorTree, layout: &Layout) { + self.nodes.clear(); + debug_assert_eq!(self.stack.len(), 0); + + // Step 1: Populate the child and sibling links. + // + // By following the CFG post-order and pushing to the front of the lists, we make sure that + // sibling lists are ordered according to the CFG reverse post-order. + for &block in domtree.cfg_postorder() { + if let Some(idom_inst) = domtree.idom(block) { + let idom = layout.pp_block(idom_inst); + let sib = mem::replace(&mut self.nodes[idom].child, block.into()); + self.nodes[block].sibling = sib; + } else { + // The only block without an immediate dominator is the entry. + self.stack.push(block); + } + } + + // Step 2. Assign pre-order numbers from a DFS of the dominator tree. + debug_assert!(self.stack.len() <= 1); + let mut n = 0; + while let Some(block) = self.stack.pop() { + n += 1; + let node = &mut self.nodes[block]; + node.pre_number = n; + node.pre_max = n; + if let Some(n) = node.sibling.expand() { + self.stack.push(n); + } + if let Some(n) = node.child.expand() { + self.stack.push(n); + } + } + + // Step 3. Propagate the `pre_max` numbers up the tree. + // The CFG post-order is topologically ordered w.r.t. dominance so a node comes after all + // its dominator tree children. + for &block in domtree.cfg_postorder() { + if let Some(idom_inst) = domtree.idom(block) { + let idom = layout.pp_block(idom_inst); + let pre_max = cmp::max(self.nodes[block].pre_max, self.nodes[idom].pre_max); + self.nodes[idom].pre_max = pre_max; + } + } + } +} + +/// An iterator that enumerates the direct children of an block in the dominator tree. +pub struct ChildIter<'a> { + dtpo: &'a DominatorTreePreorder, + next: PackedOption, +} + +impl<'a> Iterator for ChildIter<'a> { + type Item = Block; + + fn next(&mut self) -> Option { + let n = self.next.expand(); + if let Some(block) = n { + self.next = self.dtpo.nodes[block].sibling; + } + n + } +} + +/// Query interface for the dominator tree pre-order. +impl DominatorTreePreorder { + /// Get an iterator over the direct children of `block` in the dominator tree. + /// + /// These are the block's whose immediate dominator is an instruction in `block`, ordered according + /// to the CFG reverse post-order. + pub fn children(&self, block: Block) -> ChildIter { + ChildIter { + dtpo: self, + next: self.nodes[block].child, + } + } + + /// Fast, constant time dominance check with block granularity. + /// + /// This computes the same result as `domtree.dominates(a, b)`, but in guaranteed fast constant + /// time. This is less general than the `DominatorTree` method because it only works with block + /// program points. + /// + /// An block is considered to dominate itself. + pub fn dominates(&self, a: Block, b: Block) -> bool { + let na = &self.nodes[a]; + let nb = &self.nodes[b]; + na.pre_number <= nb.pre_number && na.pre_max >= nb.pre_max + } + + /// Compare two blocks according to the dominator pre-order. + pub fn pre_cmp_block(&self, a: Block, b: Block) -> Ordering { + self.nodes[a].pre_number.cmp(&self.nodes[b].pre_number) + } + + /// Compare two program points according to the dominator tree pre-order. + /// + /// This ordering of program points have the property that given a program point, pp, all the + /// program points dominated by pp follow immediately and contiguously after pp in the order. + pub fn pre_cmp(&self, a: A, b: B, layout: &Layout) -> Ordering + where + A: Into, + B: Into, + { + let a = a.into(); + let b = b.into(); + self.pre_cmp_block(layout.pp_block(a), layout.pp_block(b)) + .then(layout.cmp(a, b)) + } + + /// Compare two value defs according to the dominator tree pre-order. + /// + /// Two values defined at the same program point are compared according to their parameter or + /// result order. + /// + /// This is a total ordering of the values in the function. + pub fn pre_cmp_def(&self, a: Value, b: Value, func: &Function) -> Ordering { + let da = func.dfg.value_def(a); + let db = func.dfg.value_def(b); + self.pre_cmp(da, db, &func.layout) + .then_with(|| da.num().cmp(&db.num())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::flowgraph::ControlFlowGraph; + use crate::ir::types::*; + use crate::ir::{Function, InstBuilder, TrapCode}; + + #[test] + fn empty() { + let func = Function::new(); + let cfg = ControlFlowGraph::with_function(&func); + debug_assert!(cfg.is_valid()); + let dtree = DominatorTree::with_function(&func, &cfg); + assert_eq!(0, dtree.nodes.keys().count()); + assert_eq!(dtree.cfg_postorder(), &[]); + + let mut dtpo = DominatorTreePreorder::new(); + dtpo.compute(&dtree, &func.layout); + } + + #[test] + fn unreachable_node() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + cur.ins().brnz(v0, block2, &[]); + cur.ins().trap(TrapCode::User(0)); + + cur.insert_block(block1); + let v1 = cur.ins().iconst(I32, 1); + let v2 = cur.ins().iadd(v0, v1); + cur.ins().jump(block0, &[v2]); + + cur.insert_block(block2); + cur.ins().return_(&[v0]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + // Fall-through-first, prune-at-source DFT: + // + // block0 { + // brnz block2 { + // trap + // block2 { + // return + // } block2 + // } block0 + assert_eq!(dt.cfg_postorder(), &[block2, block0]); + + let v2_def = cur.func.dfg.value_def(v2).unwrap_inst(); + assert!(!dt.dominates(v2_def, block0, &cur.func.layout)); + assert!(!dt.dominates(block0, v2_def, &cur.func.layout)); + + let mut dtpo = DominatorTreePreorder::new(); + dtpo.compute(&dt, &cur.func.layout); + assert!(dtpo.dominates(block0, block0)); + assert!(!dtpo.dominates(block0, block1)); + assert!(dtpo.dominates(block0, block2)); + assert!(!dtpo.dominates(block1, block0)); + assert!(dtpo.dominates(block1, block1)); + assert!(!dtpo.dominates(block1, block2)); + assert!(!dtpo.dominates(block2, block0)); + assert!(!dtpo.dominates(block2, block1)); + assert!(dtpo.dominates(block2, block2)); + } + + #[test] + fn non_zero_entry_block() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let block3 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block3, I32); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block3); + let jmp_block3_block1 = cur.ins().jump(block1, &[]); + + cur.insert_block(block1); + let br_block1_block0 = cur.ins().brnz(cond, block0, &[]); + let jmp_block1_block2 = cur.ins().jump(block2, &[]); + + cur.insert_block(block2); + cur.ins().jump(block0, &[]); + + cur.insert_block(block0); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + // Fall-through-first, prune-at-source DFT: + // + // block3 { + // block3:jump block1 { + // block1 { + // block1:brnz block0 { + // block1:jump block2 { + // block2 { + // block2:jump block0 (seen) + // } block2 + // } block1:jump block2 + // block0 { + // } block0 + // } block1:brnz block0 + // } block1 + // } block3:jump block1 + // } block3 + + assert_eq!(dt.cfg_postorder(), &[block2, block0, block1, block3]); + + assert_eq!(cur.func.layout.entry_block().unwrap(), block3); + assert_eq!(dt.idom(block3), None); + assert_eq!(dt.idom(block1).unwrap(), jmp_block3_block1); + assert_eq!(dt.idom(block2).unwrap(), jmp_block1_block2); + assert_eq!(dt.idom(block0).unwrap(), br_block1_block0); + + assert!(dt.dominates(br_block1_block0, br_block1_block0, &cur.func.layout)); + assert!(!dt.dominates(br_block1_block0, jmp_block3_block1, &cur.func.layout)); + assert!(dt.dominates(jmp_block3_block1, br_block1_block0, &cur.func.layout)); + + assert_eq!( + dt.rpo_cmp(block3, block3, &cur.func.layout), + Ordering::Equal + ); + assert_eq!(dt.rpo_cmp(block3, block1, &cur.func.layout), Ordering::Less); + assert_eq!( + dt.rpo_cmp(block3, jmp_block3_block1, &cur.func.layout), + Ordering::Less + ); + assert_eq!( + dt.rpo_cmp(jmp_block3_block1, jmp_block1_block2, &cur.func.layout), + Ordering::Less + ); + } + + #[test] + fn backwards_layout() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + let jmp02 = cur.ins().jump(block2, &[]); + + cur.insert_block(block1); + let trap = cur.ins().trap(TrapCode::User(5)); + + cur.insert_block(block2); + let jmp21 = cur.ins().jump(block1, &[]); + + let cfg = ControlFlowGraph::with_function(cur.func); + let dt = DominatorTree::with_function(cur.func, &cfg); + + assert_eq!(cur.func.layout.entry_block(), Some(block0)); + assert_eq!(dt.idom(block0), None); + assert_eq!(dt.idom(block1), Some(jmp21)); + assert_eq!(dt.idom(block2), Some(jmp02)); + + assert!(dt.dominates(block0, block0, &cur.func.layout)); + assert!(dt.dominates(block0, jmp02, &cur.func.layout)); + assert!(dt.dominates(block0, block1, &cur.func.layout)); + assert!(dt.dominates(block0, trap, &cur.func.layout)); + assert!(dt.dominates(block0, block2, &cur.func.layout)); + assert!(dt.dominates(block0, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(jmp02, block0, &cur.func.layout)); + assert!(dt.dominates(jmp02, jmp02, &cur.func.layout)); + assert!(dt.dominates(jmp02, block1, &cur.func.layout)); + assert!(dt.dominates(jmp02, trap, &cur.func.layout)); + assert!(dt.dominates(jmp02, block2, &cur.func.layout)); + assert!(dt.dominates(jmp02, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(block1, block0, &cur.func.layout)); + assert!(!dt.dominates(block1, jmp02, &cur.func.layout)); + assert!(dt.dominates(block1, block1, &cur.func.layout)); + assert!(dt.dominates(block1, trap, &cur.func.layout)); + assert!(!dt.dominates(block1, block2, &cur.func.layout)); + assert!(!dt.dominates(block1, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(trap, block0, &cur.func.layout)); + assert!(!dt.dominates(trap, jmp02, &cur.func.layout)); + assert!(!dt.dominates(trap, block1, &cur.func.layout)); + assert!(dt.dominates(trap, trap, &cur.func.layout)); + assert!(!dt.dominates(trap, block2, &cur.func.layout)); + assert!(!dt.dominates(trap, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(block2, block0, &cur.func.layout)); + assert!(!dt.dominates(block2, jmp02, &cur.func.layout)); + assert!(dt.dominates(block2, block1, &cur.func.layout)); + assert!(dt.dominates(block2, trap, &cur.func.layout)); + assert!(dt.dominates(block2, block2, &cur.func.layout)); + assert!(dt.dominates(block2, jmp21, &cur.func.layout)); + + assert!(!dt.dominates(jmp21, block0, &cur.func.layout)); + assert!(!dt.dominates(jmp21, jmp02, &cur.func.layout)); + assert!(dt.dominates(jmp21, block1, &cur.func.layout)); + assert!(dt.dominates(jmp21, trap, &cur.func.layout)); + assert!(!dt.dominates(jmp21, block2, &cur.func.layout)); + assert!(dt.dominates(jmp21, jmp21, &cur.func.layout)); + } +} diff --git a/cranelift/codegen/src/flowgraph.rs b/cranelift/codegen/src/flowgraph.rs new file mode 100644 index 0000000000..37245da912 --- /dev/null +++ b/cranelift/codegen/src/flowgraph.rs @@ -0,0 +1,350 @@ +//! A control flow graph represented as mappings of basic blocks to their predecessors +//! and successors. +//! +//! Successors are represented as basic blocks while predecessors are represented by basic +//! blocks. Basic blocks are denoted by tuples of block and branch/jump instructions. Each +//! predecessor tuple corresponds to the end of a basic block. +//! +//! ```c +//! Block0: +//! ... ; beginning of basic block +//! +//! ... +//! +//! brz vx, Block1 ; end of basic block +//! +//! ... ; beginning of basic block +//! +//! ... +//! +//! jmp Block2 ; end of basic block +//! ``` +//! +//! Here `Block1` and `Block2` would each have a single predecessor denoted as `(Block0, brz)` +//! and `(Block0, jmp Block2)` respectively. + +use crate::bforest; +use crate::entity::SecondaryMap; +use crate::ir::instructions::BranchInfo; +use crate::ir::{Block, Function, Inst}; +use crate::timing; +use core::mem; + +/// A basic block denoted by its enclosing Block and last instruction. +#[derive(Debug, PartialEq, Eq)] +pub struct BlockPredecessor { + /// Enclosing Block key. + pub block: Block, + /// Last instruction in the basic block. + pub inst: Inst, +} + +impl BlockPredecessor { + /// Convenient method to construct new BlockPredecessor. + pub fn new(block: Block, inst: Inst) -> Self { + Self { block, inst } + } +} + +/// A container for the successors and predecessors of some Block. +#[derive(Clone, Default)] +struct CFGNode { + /// Instructions that can branch or jump to this block. + /// + /// This maps branch instruction -> predecessor block which is redundant since the block containing + /// the branch instruction is available from the `layout.inst_block()` method. We store the + /// redundant information because: + /// + /// 1. Many `pred_iter()` consumers want the block anyway, so it is handily available. + /// 2. The `invalidate_block_successors()` may be called *after* branches have been removed from + /// their block, but we still need to remove them form the old block predecessor map. + /// + /// The redundant block stored here is always consistent with the CFG successor lists, even after + /// the IR has been edited. + pub predecessors: bforest::Map, + + /// Set of blocks that are the targets of branches and jumps in this block. + /// The set is ordered by block number, indicated by the `()` comparator type. + pub successors: bforest::Set, +} + +/// The Control Flow Graph maintains a mapping of blocks to their predecessors +/// and successors where predecessors are basic blocks and successors are +/// basic blocks. +pub struct ControlFlowGraph { + data: SecondaryMap, + pred_forest: bforest::MapForest, + succ_forest: bforest::SetForest, + valid: bool, +} + +impl ControlFlowGraph { + /// Allocate a new blank control flow graph. + pub fn new() -> Self { + Self { + data: SecondaryMap::new(), + valid: false, + pred_forest: bforest::MapForest::new(), + succ_forest: bforest::SetForest::new(), + } + } + + /// Clear all data structures in this control flow graph. + pub fn clear(&mut self) { + self.data.clear(); + self.pred_forest.clear(); + self.succ_forest.clear(); + self.valid = false; + } + + /// Allocate and compute the control flow graph for `func`. + pub fn with_function(func: &Function) -> Self { + let mut cfg = Self::new(); + cfg.compute(func); + cfg + } + + /// Compute the control flow graph of `func`. + /// + /// This will clear and overwrite any information already stored in this data structure. + pub fn compute(&mut self, func: &Function) { + let _tt = timing::flowgraph(); + self.clear(); + self.data.resize(func.dfg.num_blocks()); + + for block in &func.layout { + self.compute_block(func, block); + } + + self.valid = true; + } + + fn compute_block(&mut self, func: &Function, block: Block) { + for inst in func.layout.block_insts(block) { + match func.dfg.analyze_branch(inst) { + BranchInfo::SingleDest(dest, _) => { + self.add_edge(block, inst, dest); + } + BranchInfo::Table(jt, dest) => { + if let Some(dest) = dest { + self.add_edge(block, inst, dest); + } + for dest in func.jump_tables[jt].iter() { + self.add_edge(block, inst, *dest); + } + } + BranchInfo::NotABranch => {} + } + } + } + + fn invalidate_block_successors(&mut self, block: Block) { + // Temporarily take ownership because we need mutable access to self.data inside the loop. + // Unfortunately borrowck cannot see that our mut accesses to predecessors don't alias + // our iteration over successors. + let mut successors = mem::replace(&mut self.data[block].successors, Default::default()); + for succ in successors.iter(&self.succ_forest) { + self.data[succ] + .predecessors + .retain(&mut self.pred_forest, |_, &mut e| e != block); + } + successors.clear(&mut self.succ_forest); + } + + /// Recompute the control flow graph of `block`. + /// + /// This is for use after modifying instructions within a specific block. It recomputes all edges + /// from `block` while leaving edges to `block` intact. Its functionality a subset of that of the + /// more expensive `compute`, and should be used when we know we don't need to recompute the CFG + /// from scratch, but rather that our changes have been restricted to specific blocks. + pub fn recompute_block(&mut self, func: &Function, block: Block) { + debug_assert!(self.is_valid()); + self.invalidate_block_successors(block); + self.compute_block(func, block); + } + + fn add_edge(&mut self, from: Block, from_inst: Inst, to: Block) { + self.data[from] + .successors + .insert(to, &mut self.succ_forest, &()); + self.data[to] + .predecessors + .insert(from_inst, from, &mut self.pred_forest, &()); + } + + /// Get an iterator over the CFG predecessors to `block`. + pub fn pred_iter(&self, block: Block) -> PredIter { + PredIter(self.data[block].predecessors.iter(&self.pred_forest)) + } + + /// Get an iterator over the CFG successors to `block`. + pub fn succ_iter(&self, block: Block) -> SuccIter { + debug_assert!(self.is_valid()); + self.data[block].successors.iter(&self.succ_forest) + } + + /// Check if the CFG is in a valid state. + /// + /// Note that this doesn't perform any kind of validity checks. It simply checks if the + /// `compute()` method has been called since the last `clear()`. It does not check that the + /// CFG is consistent with the function. + pub fn is_valid(&self) -> bool { + self.valid + } +} + +/// An iterator over block predecessors. The iterator type is `BlockPredecessor`. +/// +/// Each predecessor is an instruction that branches to the block. +pub struct PredIter<'a>(bforest::MapIter<'a, Inst, Block>); + +impl<'a> Iterator for PredIter<'a> { + type Item = BlockPredecessor; + + fn next(&mut self) -> Option { + self.0.next().map(|(i, e)| BlockPredecessor::new(e, i)) + } +} + +/// An iterator over block successors. The iterator type is `Block`. +pub type SuccIter<'a> = bforest::SetIter<'a, Block>; + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{types, Function, InstBuilder}; + use alloc::vec::Vec; + + #[test] + fn empty() { + let func = Function::new(); + ControlFlowGraph::with_function(&func); + } + + #[test] + fn no_predecessors() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + func.layout.append_block(block0); + func.layout.append_block(block1); + func.layout.append_block(block2); + + let cfg = ControlFlowGraph::with_function(&func); + + let mut fun_blocks = func.layout.blocks(); + for block in func.layout.blocks() { + assert_eq!(block, fun_blocks.next().unwrap()); + assert_eq!(cfg.pred_iter(block).count(), 0); + assert_eq!(cfg.succ_iter(block).count(), 0); + } + } + + #[test] + fn branches_and_jumps() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let br_block0_block2; + let br_block1_block1; + let jmp_block0_block1; + let jmp_block1_block2; + + { + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + br_block0_block2 = cur.ins().brnz(cond, block2, &[]); + jmp_block0_block1 = cur.ins().jump(block1, &[]); + + cur.insert_block(block1); + br_block1_block1 = cur.ins().brnz(cond, block1, &[]); + jmp_block1_block2 = cur.ins().jump(block2, &[]); + + cur.insert_block(block2); + } + + let mut cfg = ControlFlowGraph::with_function(&func); + + { + let block0_predecessors = cfg.pred_iter(block0).collect::>(); + let block1_predecessors = cfg.pred_iter(block1).collect::>(); + let block2_predecessors = cfg.pred_iter(block2).collect::>(); + + let block0_successors = cfg.succ_iter(block0).collect::>(); + let block1_successors = cfg.succ_iter(block1).collect::>(); + let block2_successors = cfg.succ_iter(block2).collect::>(); + + assert_eq!(block0_predecessors.len(), 0); + assert_eq!(block1_predecessors.len(), 2); + assert_eq!(block2_predecessors.len(), 2); + + assert_eq!( + block1_predecessors.contains(&BlockPredecessor::new(block0, jmp_block0_block1)), + true + ); + assert_eq!( + block1_predecessors.contains(&BlockPredecessor::new(block1, br_block1_block1)), + true + ); + assert_eq!( + block2_predecessors.contains(&BlockPredecessor::new(block0, br_block0_block2)), + true + ); + assert_eq!( + block2_predecessors.contains(&BlockPredecessor::new(block1, jmp_block1_block2)), + true + ); + + assert_eq!(block0_successors, [block1, block2]); + assert_eq!(block1_successors, [block1, block2]); + assert_eq!(block2_successors, []); + } + + // Change some instructions and recompute block0 + func.dfg.replace(br_block0_block2).brnz(cond, block1, &[]); + func.dfg.replace(jmp_block0_block1).return_(&[]); + cfg.recompute_block(&mut func, block0); + let br_block0_block1 = br_block0_block2; + + { + let block0_predecessors = cfg.pred_iter(block0).collect::>(); + let block1_predecessors = cfg.pred_iter(block1).collect::>(); + let block2_predecessors = cfg.pred_iter(block2).collect::>(); + + let block0_successors = cfg.succ_iter(block0); + let block1_successors = cfg.succ_iter(block1); + let block2_successors = cfg.succ_iter(block2); + + assert_eq!(block0_predecessors.len(), 0); + assert_eq!(block1_predecessors.len(), 2); + assert_eq!(block2_predecessors.len(), 1); + + assert_eq!( + block1_predecessors.contains(&BlockPredecessor::new(block0, br_block0_block1)), + true + ); + assert_eq!( + block1_predecessors.contains(&BlockPredecessor::new(block1, br_block1_block1)), + true + ); + assert_eq!( + block2_predecessors.contains(&BlockPredecessor::new(block0, br_block0_block2)), + false + ); + assert_eq!( + block2_predecessors.contains(&BlockPredecessor::new(block1, jmp_block1_block2)), + true + ); + + assert_eq!(block0_successors.collect::>(), [block1]); + assert_eq!(block1_successors.collect::>(), [block1, block2]); + assert_eq!(block2_successors.collect::>(), []); + } + } +} diff --git a/cranelift/codegen/src/fx.rs b/cranelift/codegen/src/fx.rs new file mode 100644 index 0000000000..36eb62df90 --- /dev/null +++ b/cranelift/codegen/src/fx.rs @@ -0,0 +1,111 @@ +// This file is taken from the Rust compiler: src/librustc_data_structures/fx.rs + +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use super::{HashMap, HashSet}; +use core::default::Default; +use core::hash::{BuildHasherDefault, Hash, Hasher}; +use core::ops::BitXor; + +pub type FxHashMap = HashMap>; +pub type FxHashSet = HashSet>; + +#[allow(non_snake_case)] +pub fn FxHashMap() -> FxHashMap { + HashMap::default() +} + +#[allow(non_snake_case)] +pub fn FxHashSet() -> FxHashSet { + HashSet::default() +} + +/// A speedy hash algorithm for use within rustc. The hashmap in liballoc +/// by default uses SipHash which isn't quite as speedy as we want. In the +/// compiler we're not really worried about DOS attempts, so we use a fast +/// non-cryptographic hash. +/// +/// This is the same as the algorithm used by Firefox -- which is a homespun +/// one not based on any widely-known algorithm -- though modified to produce +/// 64-bit hash values instead of 32-bit hash values. It consistently +/// out-performs an FNV-based hash within rustc itself -- the collision rate is +/// similar or slightly worse than FNV, but the speed of the hash function +/// itself is much higher because it works on up to 8 bytes at a time. +pub struct FxHasher { + hash: usize, +} + +#[cfg(target_pointer_width = "32")] +const K: usize = 0x9e3779b9; +#[cfg(target_pointer_width = "64")] +const K: usize = 0x517cc1b727220a95; + +impl Default for FxHasher { + #[inline] + fn default() -> Self { + Self { hash: 0 } + } +} + +impl FxHasher { + #[inline] + fn add_to_hash(&mut self, i: usize) { + self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); + } +} + +impl Hasher for FxHasher { + #[inline] + fn write(&mut self, bytes: &[u8]) { + for byte in bytes { + let i = *byte; + self.add_to_hash(i as usize); + } + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.add_to_hash(i as usize); + } + + #[cfg(target_pointer_width = "32")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + self.add_to_hash((i >> 32) as usize); + } + + #[cfg(target_pointer_width = "64")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.add_to_hash(i); + } + + #[inline] + fn finish(&self) -> u64 { + self.hash as u64 + } +} diff --git a/cranelift/codegen/src/ir/builder.rs b/cranelift/codegen/src/ir/builder.rs new file mode 100644 index 0000000000..63054928f2 --- /dev/null +++ b/cranelift/codegen/src/ir/builder.rs @@ -0,0 +1,266 @@ +//! Cranelift instruction builder. +//! +//! A `Builder` provides a convenient interface for inserting instructions into a Cranelift +//! function. Many of its methods are generated from the meta language instruction definitions. + +use crate::ir; +use crate::ir::types; +use crate::ir::{DataFlowGraph, InstructionData}; +use crate::ir::{Inst, Opcode, Type, Value}; +use crate::isa; + +/// Base trait for instruction builders. +/// +/// The `InstBuilderBase` trait provides the basic functionality required by the methods of the +/// generated `InstBuilder` trait. These methods should not normally be used directly. Use the +/// methods in the `InstBuilder` trait instead. +/// +/// Any data type that implements `InstBuilderBase` also gets all the methods of the `InstBuilder` +/// trait. +pub trait InstBuilderBase<'f>: Sized { + /// Get an immutable reference to the data flow graph that will hold the constructed + /// instructions. + fn data_flow_graph(&self) -> &DataFlowGraph; + /// Get a mutable reference to the data flow graph that will hold the constructed + /// instructions. + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph; + + /// Insert an instruction and return a reference to it, consuming the builder. + /// + /// The result types may depend on a controlling type variable. For non-polymorphic + /// instructions with multiple results, pass `INVALID` for the `ctrl_typevar` argument. + fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph); +} + +// Include trait code generated by `cranelift-codegen/meta/src/gen_inst.rs`. +// +// This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per +// instruction format and per opcode. +include!(concat!(env!("OUT_DIR"), "/inst_builder.rs")); + +/// Any type implementing `InstBuilderBase` gets all the `InstBuilder` methods for free. +impl<'f, T: InstBuilderBase<'f>> InstBuilder<'f> for T {} + +/// Base trait for instruction inserters. +/// +/// This is an alternative base trait for an instruction builder to implement. +/// +/// An instruction inserter can be adapted into an instruction builder by wrapping it in an +/// `InsertBuilder`. This provides some common functionality for instruction builders that insert +/// new instructions, as opposed to the `ReplaceBuilder` which overwrites existing instructions. +pub trait InstInserterBase<'f>: Sized { + /// Get an immutable reference to the data flow graph. + fn data_flow_graph(&self) -> &DataFlowGraph; + + /// Get a mutable reference to the data flow graph. + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph; + + /// Insert a new instruction which belongs to the DFG. + fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph; +} + +use core::marker::PhantomData; + +/// Builder that inserts an instruction at the current position. +/// +/// An `InsertBuilder` is a wrapper for an `InstInserterBase` that turns it into an instruction +/// builder with some additional facilities for creating instructions that reuse existing values as +/// their results. +pub struct InsertBuilder<'f, IIB: InstInserterBase<'f>> { + inserter: IIB, + unused: PhantomData<&'f u32>, +} + +impl<'f, IIB: InstInserterBase<'f>> InsertBuilder<'f, IIB> { + /// Create a new builder which inserts instructions at `pos`. + /// The `dfg` and `pos.layout` references should be from the same `Function`. + pub fn new(inserter: IIB) -> Self { + Self { + inserter, + unused: PhantomData, + } + } + + /// Reuse result values in `reuse`. + /// + /// Convert this builder into one that will reuse the provided result values instead of + /// allocating new ones. The provided values for reuse must not be attached to anything. Any + /// missing result values will be allocated as normal. + /// + /// The `reuse` argument is expected to be an array of `Option`. + pub fn with_results(self, reuse: Array) -> InsertReuseBuilder<'f, IIB, Array> + where + Array: AsRef<[Option]>, + { + InsertReuseBuilder { + inserter: self.inserter, + reuse, + unused: PhantomData, + } + } + + /// Reuse a single result value. + /// + /// Convert this into a builder that will reuse `v` as the single result value. The reused + /// result value `v` must not be attached to anything. + /// + /// This method should only be used when building an instruction with exactly one result. Use + /// `with_results()` for the more general case. + pub fn with_result(self, v: Value) -> InsertReuseBuilder<'f, IIB, [Option; 1]> { + // TODO: Specialize this to return a different builder that just attaches `v` instead of + // calling `make_inst_results_reusing()`. + self.with_results([Some(v)]) + } +} + +impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, IIB> { + fn data_flow_graph(&self) -> &DataFlowGraph { + self.inserter.data_flow_graph() + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + self.inserter.data_flow_graph_mut() + } + + fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) { + let inst; + { + let dfg = self.inserter.data_flow_graph_mut(); + inst = dfg.make_inst(data); + dfg.make_inst_results(inst, ctrl_typevar); + } + (inst, self.inserter.insert_built_inst(inst, ctrl_typevar)) + } +} + +/// Builder that inserts a new instruction like `InsertBuilder`, but reusing result values. +pub struct InsertReuseBuilder<'f, IIB, Array> +where + IIB: InstInserterBase<'f>, + Array: AsRef<[Option]>, +{ + inserter: IIB, + reuse: Array, + unused: PhantomData<&'f u32>, +} + +impl<'f, IIB, Array> InstBuilderBase<'f> for InsertReuseBuilder<'f, IIB, Array> +where + IIB: InstInserterBase<'f>, + Array: AsRef<[Option]>, +{ + fn data_flow_graph(&self) -> &DataFlowGraph { + self.inserter.data_flow_graph() + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + self.inserter.data_flow_graph_mut() + } + + fn build(mut self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) { + let inst; + { + let dfg = self.inserter.data_flow_graph_mut(); + inst = dfg.make_inst(data); + // Make an `Iterator>`. + let ru = self.reuse.as_ref().iter().cloned(); + dfg.make_inst_results_reusing(inst, ctrl_typevar, ru); + } + (inst, self.inserter.insert_built_inst(inst, ctrl_typevar)) + } +} + +/// Instruction builder that replaces an existing instruction. +/// +/// The inserted instruction will have the same `Inst` number as the old one. +/// +/// If the old instruction still has result values attached, it is assumed that the new instruction +/// produces the same number and types of results. The old result values are preserved. If the +/// replacement instruction format does not support multiple results, the builder panics. It is a +/// bug to leave result values dangling. +pub struct ReplaceBuilder<'f> { + dfg: &'f mut DataFlowGraph, + inst: Inst, +} + +impl<'f> ReplaceBuilder<'f> { + /// Create a `ReplaceBuilder` that will overwrite `inst`. + pub fn new(dfg: &'f mut DataFlowGraph, inst: Inst) -> Self { + Self { dfg, inst } + } +} + +impl<'f> InstBuilderBase<'f> for ReplaceBuilder<'f> { + fn data_flow_graph(&self) -> &DataFlowGraph { + self.dfg + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + self.dfg + } + + fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph) { + // Splat the new instruction on top of the old one. + self.dfg[self.inst] = data; + + if !self.dfg.has_results(self.inst) { + // The old result values were either detached or non-existent. + // Construct new ones. + self.dfg.make_inst_results(self.inst, ctrl_typevar); + } + + (self.inst, self.dfg) + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::condcodes::*; + use crate::ir::types::*; + use crate::ir::{Function, InstBuilder, ValueDef}; + + #[test] + fn types() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(block0, I32); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + // Explicit types. + let v0 = pos.ins().iconst(I32, 3); + assert_eq!(pos.func.dfg.value_type(v0), I32); + + // Inferred from inputs. + let v1 = pos.ins().iadd(arg0, v0); + assert_eq!(pos.func.dfg.value_type(v1), I32); + + // Formula. + let cmp = pos.ins().icmp(IntCC::Equal, arg0, v0); + assert_eq!(pos.func.dfg.value_type(cmp), B1); + } + + #[test] + fn reuse_results() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(block0, I32); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + let v0 = pos.ins().iadd_imm(arg0, 17); + assert_eq!(pos.func.dfg.value_type(v0), I32); + let iadd = pos.prev_inst().unwrap(); + assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iadd, 0)); + + // Detach v0 and reuse it for a different instruction. + pos.func.dfg.clear_results(iadd); + let v0b = pos.ins().with_result(v0).iconst(I32, 3); + assert_eq!(v0, v0b); + assert_eq!(pos.current_inst(), Some(iadd)); + let iconst = pos.prev_inst().unwrap(); + assert!(iadd != iconst); + assert_eq!(pos.func.dfg.value_def(v0), ValueDef::Result(iconst, 0)); + } +} diff --git a/cranelift/codegen/src/ir/constant.rs b/cranelift/codegen/src/ir/constant.rs new file mode 100644 index 0000000000..2dc95544c8 --- /dev/null +++ b/cranelift/codegen/src/ir/constant.rs @@ -0,0 +1,492 @@ +//! Constants +//! +//! The constant pool defined here allows Cranelift to avoid emitting the same constant multiple +//! times. As constants are inserted in the pool, a handle is returned; the handle is a Cranelift +//! Entity. Inserting the same data multiple times will always return the same handle. +//! +//! Future work could include: +//! - ensuring alignment of constants within the pool, +//! - bucketing constants by size. + +use crate::ir::immediates::{IntoBytes, V128Imm}; +use crate::ir::Constant; +use crate::HashMap; +use alloc::collections::BTreeMap; +use alloc::vec::Vec; +use core::fmt; +use core::iter::FromIterator; +use core::slice::Iter; +use core::str::{from_utf8, FromStr}; +use cranelift_entity::EntityRef; + +/// This type describes the actual constant data. Note that the bytes stored in this structure are +/// expected to be in little-endian order; this is due to ease-of-use when interacting with +/// WebAssembly values, which are [little-endian by design]. +/// +/// [little-endian by design]: https://github.com/WebAssembly/design/blob/master/Portability.md +#[derive(Clone, Hash, Eq, PartialEq, Debug, Default)] +pub struct ConstantData(Vec); + +impl FromIterator for ConstantData { + fn from_iter>(iter: T) -> Self { + let v = iter.into_iter().collect(); + Self(v) + } +} + +impl From> for ConstantData { + fn from(v: Vec) -> Self { + Self(v) + } +} + +impl From<&[u8]> for ConstantData { + fn from(v: &[u8]) -> Self { + Self(v.to_vec()) + } +} + +impl From for ConstantData { + fn from(v: V128Imm) -> Self { + Self(v.to_vec()) + } +} + +impl ConstantData { + /// Return the number of bytes in the constant. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Convert the data to a vector. + pub fn into_vec(self) -> Vec { + self.0 + } + + /// Iterate over the constant's bytes. + pub fn iter(&self) -> Iter { + self.0.iter() + } + + /// Add new bytes to the constant data. + pub fn append(mut self, bytes: impl IntoBytes) -> Self { + let mut to_add = bytes.into_bytes(); + self.0.append(&mut to_add); + self + } + + /// Expand the size of the constant data to `expected_size` number of bytes by adding zeroes + /// in the high-order byte slots. + pub fn expand_to(mut self, expected_size: usize) -> Self { + if self.len() > expected_size { + panic!( + "The constant data is already expanded beyond {} bytes", + expected_size + ) + } + self.0.resize(expected_size, 0); + self + } +} + +impl fmt::Display for ConstantData { + /// Print the constant data in hexadecimal format, e.g. 0x000102030405060708090a0b0c0d0e0f. + /// This function will flip the stored order of bytes--little-endian--to the more readable + /// big-endian ordering. Any zero bytes in high-order bytes will be discarded in the formatted + /// string. + /// + /// ``` + /// use cranelift_codegen::ir::ConstantData; + /// let data = ConstantData::from([3, 2, 1, 0, 0].as_ref()); // note the little-endian order + /// assert_eq!(data.to_string(), "0x010203"); + /// ``` + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "0x")?; + let mut bytes_written = 0; + for b in self.0.iter().rev().skip_while(|&&b| b == 0) { + write!(f, "{:02x}", b)?; + bytes_written += 1; + } + if bytes_written < 1 { + write!(f, "00")?; + } + Ok(()) + } +} + +impl FromStr for ConstantData { + type Err = &'static str; + + /// Parse a hexadecimal string to `ConstantData`. This is the inverse of `Display::fmt`. + /// + /// ``` + /// use cranelift_codegen::ir::ConstantData; + /// let c: ConstantData = "0x000102".parse().unwrap(); + /// assert_eq!(c.into_vec(), [2, 1, 0]); + /// ``` + fn from_str(s: &str) -> Result { + if s.len() <= 2 || &s[0..2] != "0x" { + return Err("Expected a hexadecimal string, e.g. 0x1234"); + } + + // clean and check the string + let cleaned: Vec = s[2..] + .as_bytes() + .iter() + .filter(|&&b| b as char != '_') + .cloned() + .collect(); // remove 0x prefix and any intervening _ characters + + if cleaned.is_empty() { + Err("Hexadecimal string must have some digits") + } else if cleaned.len() % 2 != 0 { + Err("Hexadecimal string must have an even number of digits") + } else if cleaned.len() > 32 { + Err("Hexadecimal string has too many digits to fit in a 128-bit vector") + } else { + let mut buffer = Vec::with_capacity((s.len() - 2) / 2); + for i in (0..cleaned.len()).step_by(2) { + let pair = from_utf8(&cleaned[i..i + 2]) + .or_else(|_| Err("Unable to parse hexadecimal pair as UTF-8"))?; + let byte = u8::from_str_radix(pair, 16) + .or_else(|_| Err("Unable to parse as hexadecimal"))?; + buffer.insert(0, byte); + } + Ok(Self(buffer)) + } + } +} + +/// This type describes an offset in bytes within a constant pool. +pub type ConstantOffset = u32; + +/// Inner type for storing data and offset together in the constant pool. The offset is optional +/// because it must be set relative to the function code size (i.e. constants are emitted after the +/// function body); because the function is not yet compiled when constants are inserted, +/// [`set_offset`](crate::ir::ConstantPool::set_offset) must be called once a constant's offset +/// from the beginning of the function is known (see +/// [`relaxation.rs`](crate::binemit::relaxation)). +#[derive(Clone)] +pub struct ConstantPoolEntry { + data: ConstantData, + offset: Option, +} + +impl ConstantPoolEntry { + fn new(data: ConstantData) -> Self { + Self { data, offset: None } + } + + /// Return the size of the constant at this entry. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Assign a new offset to the constant at this entry. + pub fn set_offset(&mut self, offset: ConstantOffset) { + self.offset = Some(offset) + } +} + +/// Maintains the mapping between a constant handle (i.e. [`Constant`](crate::ir::Constant)) and +/// its constant data (i.e. [`ConstantData`](crate::ir::ConstantData)). +#[derive(Clone)] +pub struct ConstantPool { + /// This mapping maintains the insertion order as long as Constants are created with + /// sequentially increasing integers. + handles_to_values: BTreeMap, + + /// This mapping is unordered (no need for lexicographic ordering) but allows us to map + /// constant data back to handles. + values_to_handles: HashMap, +} + +impl ConstantPool { + /// Create a new constant pool instance. + pub fn new() -> Self { + Self { + handles_to_values: BTreeMap::new(), + values_to_handles: HashMap::new(), + } + } + + /// Empty the constant pool of all data. + pub fn clear(&mut self) { + self.handles_to_values.clear(); + self.values_to_handles.clear(); + } + + /// Insert constant data into the pool, returning a handle for later referencing; when constant + /// data is inserted that is a duplicate of previous constant data, the existing handle will be + /// returned. + pub fn insert(&mut self, constant_value: ConstantData) -> Constant { + if self.values_to_handles.contains_key(&constant_value) { + *self.values_to_handles.get(&constant_value).unwrap() + } else { + let constant_handle = Constant::new(self.len()); + self.values_to_handles + .insert(constant_value.clone(), constant_handle); + self.handles_to_values + .insert(constant_handle, ConstantPoolEntry::new(constant_value)); + constant_handle + } + } + + /// Retrieve the constant data given a handle. + pub fn get(&self, constant_handle: Constant) -> &ConstantData { + assert!(self.handles_to_values.contains_key(&constant_handle)); + &self.handles_to_values.get(&constant_handle).unwrap().data + } + + /// Assign an offset to a given constant, where the offset is the number of bytes from the + /// beginning of the function to the beginning of the constant data inside the pool. + pub fn set_offset(&mut self, constant_handle: Constant, constant_offset: ConstantOffset) { + assert!( + self.handles_to_values.contains_key(&constant_handle), + "A constant handle must have already been inserted into the pool; perhaps a \ + constant pool was created outside of the pool?" + ); + self.handles_to_values + .entry(constant_handle) + .and_modify(|e| e.offset = Some(constant_offset)); + } + + /// Retrieve the offset of a given constant, where the offset is the number of bytes from the + /// beginning of the function to the beginning of the constant data inside the pool. + pub fn get_offset(&self, constant_handle: Constant) -> ConstantOffset { + self.handles_to_values + .get(&constant_handle) + .expect( + "A constant handle must have a corresponding constant value; was a constant \ + handle created outside of the pool?", + ) + .offset + .expect( + "A constant offset has not yet been set; verify that `set_offset` has been \ + called before this point", + ) + } + + /// Iterate over the constants in insertion order. + pub fn iter(&self) -> impl Iterator { + self.handles_to_values.iter().map(|(h, e)| (h, &e.data)) + } + + /// Iterate over mutable entries in the constant pool in insertion order. + pub fn entries_mut(&mut self) -> impl Iterator { + self.handles_to_values.values_mut() + } + + /// Return the number of constants in the pool. + pub fn len(&self) -> usize { + self.handles_to_values.len() + } + + /// Return the combined size of all of the constant values in the pool. + pub fn byte_size(&self) -> usize { + self.values_to_handles.keys().map(|c| c.len()).sum() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::string::ToString; + + #[test] + fn empty() { + let sut = ConstantPool::new(); + assert_eq!(sut.len(), 0); + } + + #[test] + fn insert() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3].into()); + sut.insert(vec![4, 5, 6].into()); + assert_eq!(sut.len(), 2); + } + + #[test] + fn insert_duplicate() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1, 2, 3].into()); + sut.insert(vec![4, 5, 6].into()); + let b = sut.insert(vec![1, 2, 3].into()); + assert_eq!(a, b); + } + + #[test] + fn clear() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3].into()); + assert_eq!(sut.len(), 1); + + sut.clear(); + assert_eq!(sut.len(), 0); + } + + #[test] + fn iteration_order() { + let mut sut = ConstantPool::new(); + sut.insert(vec![1, 2, 3].into()); + sut.insert(vec![4, 5, 6].into()); + sut.insert(vec![1, 2, 3].into()); + let data = sut.iter().map(|(_, v)| v).collect::>(); + assert_eq!(data, vec![&vec![1, 2, 3].into(), &vec![4, 5, 6].into()]); + } + + #[test] + fn get() { + let mut sut = ConstantPool::new(); + let data = vec![1, 2, 3]; + let handle = sut.insert(data.clone().into()); + assert_eq!(sut.get(handle), &data.into()); + } + + #[test] + #[should_panic] + fn get_nonexistent_constant() { + let sut = ConstantPool::new(); + let a = Constant::with_number(42).unwrap(); + sut.get(a); // panics, only use constants returned by ConstantPool + } + + #[test] + fn get_offset() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1].into()); + sut.set_offset(a, 42); + assert_eq!(sut.get_offset(a), 42) + } + + #[test] + #[should_panic] + fn get_nonexistent_offset() { + let mut sut = ConstantPool::new(); + let a = sut.insert(vec![1].into()); + sut.get_offset(a); // panics, set_offset should have been called + } + + #[test] + fn display_constant_data() { + assert_eq!(ConstantData::from([0].as_ref()).to_string(), "0x00"); + assert_eq!(ConstantData::from([42].as_ref()).to_string(), "0x2a"); + assert_eq!( + ConstantData::from([3, 2, 1, 0].as_ref()).to_string(), + "0x010203" + ); + assert_eq!( + ConstantData::from(3735928559u32.to_le_bytes().as_ref()).to_string(), + "0xdeadbeef" + ); + assert_eq!( + ConstantData::from(0x0102030405060708u64.to_le_bytes().as_ref()).to_string(), + "0x0102030405060708" + ); + } + + #[test] + fn iterate_over_constant_data() { + let c = ConstantData::from([1, 2, 3].as_ref()); + let mut iter = c.iter(); + assert_eq!(iter.next(), Some(&1)); + assert_eq!(iter.next(), Some(&2)); + assert_eq!(iter.next(), Some(&3)); + assert_eq!(iter.next(), None); + } + + #[test] + fn add_to_constant_data() { + let d = ConstantData::from([1, 2].as_ref()); + let e = d.append(i16::from(3u8)); + assert_eq!(e.into_vec(), vec![1, 2, 3, 0]) + } + + #[test] + fn extend_constant_data() { + let d = ConstantData::from([1, 2].as_ref()); + assert_eq!(d.expand_to(4).into_vec(), vec![1, 2, 0, 0]) + } + + #[test] + #[should_panic] + fn extend_constant_data_to_invalid_length() { + ConstantData::from([1, 2].as_ref()).expand_to(1); + } + + #[test] + fn parse_constant_data_and_restringify() { + // Verify that parsing of `from` succeeds and stringifies to `to`. + fn parse_ok(from: &str, to: &str) { + let parsed = from.parse::().unwrap(); + assert_eq!(parsed.to_string(), to); + } + + // Verify that parsing of `from` fails with `error_msg`. + fn parse_err(from: &str, error_msg: &str) { + let parsed = from.parse::(); + assert!( + parsed.is_err(), + "Expected a parse error but parsing succeeded: {}", + from + ); + assert_eq!(parsed.err().unwrap(), error_msg); + } + + parse_ok("0x00", "0x00"); + parse_ok("0x00000042", "0x42"); + parse_ok( + "0x0102030405060708090a0b0c0d0e0f00", + "0x0102030405060708090a0b0c0d0e0f00", + ); + parse_ok("0x_0000_0043_21", "0x4321"); + + parse_err("", "Expected a hexadecimal string, e.g. 0x1234"); + parse_err("0x", "Expected a hexadecimal string, e.g. 0x1234"); + parse_err( + "0x042", + "Hexadecimal string must have an even number of digits", + ); + parse_err( + "0x00000000000000000000000000000000000000000000000000", + "Hexadecimal string has too many digits to fit in a 128-bit vector", + ); + parse_err("0xrstu", "Unable to parse as hexadecimal"); + parse_err("0x__", "Hexadecimal string must have some digits"); + } + + #[test] + fn verify_stored_bytes_in_constant_data() { + assert_eq!("0x01".parse::().unwrap().into_vec(), [1]); + assert_eq!(ConstantData::from([1, 0].as_ref()).0, [1, 0]); + assert_eq!(ConstantData::from(vec![1, 0, 0, 0]).0, [1, 0, 0, 0]); + } + + #[test] + fn check_constant_data_endianness_as_uimm128() { + fn parse_to_uimm128(from: &str) -> Vec { + from.parse::() + .unwrap() + .expand_to(16) + .into_vec() + } + + assert_eq!( + parse_to_uimm128("0x42"), + [0x42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + parse_to_uimm128("0x00"), + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + parse_to_uimm128("0x12345678"), + [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + assert_eq!( + parse_to_uimm128("0x1234_5678"), + [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ); + } +} diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs new file mode 100644 index 0000000000..479aec1cfb --- /dev/null +++ b/cranelift/codegen/src/ir/dfg.rs @@ -0,0 +1,1303 @@ +//! Data flow graph tracking Instructions, Values, and blocks. + +use crate::entity::{self, PrimaryMap, SecondaryMap}; +use crate::ir; +use crate::ir::builder::ReplaceBuilder; +use crate::ir::extfunc::ExtFuncData; +use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData}; +use crate::ir::{types, ConstantData, ConstantPool, Immediate}; +use crate::ir::{ + Block, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList, + ValueListPool, +}; +use crate::isa::TargetIsa; +use crate::packed_option::ReservedValue; +use crate::write::write_operands; +use crate::HashMap; +use alloc::vec::Vec; +use core::fmt; +use core::iter; +use core::mem; +use core::ops::{Index, IndexMut}; +use core::u16; + +/// A data flow graph defines all instructions and basic blocks in a function as well as +/// the data flow dependencies between them. The DFG also tracks values which can be either +/// instruction results or block parameters. +/// +/// The layout of blocks in the function and of instructions in each block is recorded by the +/// `Layout` data structure which forms the other half of the function representation. +/// +#[derive(Clone)] +pub struct DataFlowGraph { + /// Data about all of the instructions in the function, including opcodes and operands. + /// The instructions in this map are not in program order. That is tracked by `Layout`, along + /// with the block containing each instruction. + insts: PrimaryMap, + + /// List of result values for each instruction. + /// + /// This map gets resized automatically by `make_inst()` so it is always in sync with the + /// primary `insts` map. + results: SecondaryMap, + + /// basic blocks in the function and their parameters. + /// + /// This map is not in program order. That is handled by `Layout`, and so is the sequence of + /// instructions contained in each block. + blocks: PrimaryMap, + + /// Memory pool of value lists. + /// + /// The `ValueList` references into this pool appear in many places: + /// + /// - Instructions in `insts` that don't have room for their entire argument list inline. + /// - Instruction result values in `results`. + /// - block parameters in `blocks`. + pub value_lists: ValueListPool, + + /// Primary value table with entries for all values. + values: PrimaryMap, + + /// Function signature table. These signatures are referenced by indirect call instructions as + /// well as the external function references. + pub signatures: PrimaryMap, + + /// The pre-legalization signature for each entry in `signatures`, if any. + pub old_signatures: SecondaryMap>, + + /// External function references. These are functions that can be called directly. + pub ext_funcs: PrimaryMap, + + /// Saves Value labels. + pub values_labels: Option>, + + /// Constants used within the function + pub constants: ConstantPool, + + /// Stores large immediates that otherwise will not fit on InstructionData + pub immediates: PrimaryMap, +} + +impl DataFlowGraph { + /// Create a new empty `DataFlowGraph`. + pub fn new() -> Self { + Self { + insts: PrimaryMap::new(), + results: SecondaryMap::new(), + blocks: PrimaryMap::new(), + value_lists: ValueListPool::new(), + values: PrimaryMap::new(), + signatures: PrimaryMap::new(), + old_signatures: SecondaryMap::new(), + ext_funcs: PrimaryMap::new(), + values_labels: None, + constants: ConstantPool::new(), + immediates: PrimaryMap::new(), + } + } + + /// Clear everything. + pub fn clear(&mut self) { + self.insts.clear(); + self.results.clear(); + self.blocks.clear(); + self.value_lists.clear(); + self.values.clear(); + self.signatures.clear(); + self.old_signatures.clear(); + self.ext_funcs.clear(); + self.values_labels = None; + self.constants.clear(); + self.immediates.clear(); + } + + /// Get the total number of instructions created in this function, whether they are currently + /// inserted in the layout or not. + /// + /// This is intended for use with `SecondaryMap::with_capacity`. + pub fn num_insts(&self) -> usize { + self.insts.len() + } + + /// Returns `true` if the given instruction reference is valid. + pub fn inst_is_valid(&self, inst: Inst) -> bool { + self.insts.is_valid(inst) + } + + /// Get the total number of basic blocks created in this function, whether they are + /// currently inserted in the layout or not. + /// + /// This is intended for use with `SecondaryMap::with_capacity`. + pub fn num_blocks(&self) -> usize { + self.blocks.len() + } + + /// Returns `true` if the given block reference is valid. + pub fn block_is_valid(&self, block: Block) -> bool { + self.blocks.is_valid(block) + } + + /// Get the total number of values. + pub fn num_values(&self) -> usize { + self.values.len() + } + + /// Starts collection of debug information. + pub fn collect_debug_info(&mut self) { + if self.values_labels.is_none() { + self.values_labels = Some(HashMap::new()); + } + } +} + +/// Resolve value aliases. +/// +/// Find the original SSA value that `value` aliases, or None if an +/// alias cycle is detected. +fn maybe_resolve_aliases(values: &PrimaryMap, value: Value) -> Option { + let mut v = value; + + // Note that values may be empty here. + for _ in 0..=values.len() { + if let ValueData::Alias { original, .. } = values[v] { + v = original; + } else { + return Some(v); + } + } + + None +} + +/// Resolve value aliases. +/// +/// Find the original SSA value that `value` aliases. +fn resolve_aliases(values: &PrimaryMap, value: Value) -> Value { + if let Some(v) = maybe_resolve_aliases(values, value) { + v + } else { + panic!("Value alias loop detected for {}", value); + } +} + +/// Iterator over all Values in a DFG +pub struct Values<'a> { + inner: entity::Iter<'a, Value, ValueData>, +} + +/// Check for non-values +fn valid_valuedata(data: &ValueData) -> bool { + if let ValueData::Alias { + ty: types::INVALID, + original, + } = *data + { + if original == Value::reserved_value() { + return false; + } + } + true +} + +impl<'a> Iterator for Values<'a> { + type Item = Value; + + fn next(&mut self) -> Option { + self.inner + .by_ref() + .find(|kv| valid_valuedata(kv.1)) + .map(|kv| kv.0) + } +} + +/// Handling values. +/// +/// Values are either block parameters or instruction results. +impl DataFlowGraph { + /// Allocate an extended value entry. + fn make_value(&mut self, data: ValueData) -> Value { + self.values.push(data) + } + + /// Get an iterator over all values. + pub fn values<'a>(&'a self) -> Values { + Values { + inner: self.values.iter(), + } + } + + /// Check if a value reference is valid. + pub fn value_is_valid(&self, v: Value) -> bool { + self.values.is_valid(v) + } + + /// Get the type of a value. + pub fn value_type(&self, v: Value) -> Type { + match self.values[v] { + ValueData::Inst { ty, .. } + | ValueData::Param { ty, .. } + | ValueData::Alias { ty, .. } => ty, + } + } + + /// Get the definition of a value. + /// + /// This is either the instruction that defined it or the Block that has the value as an + /// parameter. + pub fn value_def(&self, v: Value) -> ValueDef { + match self.values[v] { + ValueData::Inst { inst, num, .. } => ValueDef::Result(inst, num as usize), + ValueData::Param { block, num, .. } => ValueDef::Param(block, num as usize), + ValueData::Alias { original, .. } => { + // Make sure we only recurse one level. `resolve_aliases` has safeguards to + // detect alias loops without overrunning the stack. + self.value_def(self.resolve_aliases(original)) + } + } + } + + /// Determine if `v` is an attached instruction result / block parameter. + /// + /// An attached value can't be attached to something else without first being detached. + /// + /// Value aliases are not considered to be attached to anything. Use `resolve_aliases()` to + /// determine if the original aliased value is attached. + pub fn value_is_attached(&self, v: Value) -> bool { + use self::ValueData::*; + match self.values[v] { + Inst { inst, num, .. } => Some(&v) == self.inst_results(inst).get(num as usize), + Param { block, num, .. } => Some(&v) == self.block_params(block).get(num as usize), + Alias { .. } => false, + } + } + + /// Resolve value aliases. + /// + /// Find the original SSA value that `value` aliases. + pub fn resolve_aliases(&self, value: Value) -> Value { + resolve_aliases(&self.values, value) + } + + /// Resolve all aliases among inst's arguments. + /// + /// For each argument of inst which is defined by an alias, replace the + /// alias with the aliased value. + pub fn resolve_aliases_in_arguments(&mut self, inst: Inst) { + for arg in self.insts[inst].arguments_mut(&mut self.value_lists) { + let resolved = resolve_aliases(&self.values, *arg); + if resolved != *arg { + *arg = resolved; + } + } + } + + /// Turn a value into an alias of another. + /// + /// Change the `dest` value to behave as an alias of `src`. This means that all uses of `dest` + /// will behave as if they used that value `src`. + /// + /// The `dest` value can't be attached to an instruction or block. + pub fn change_to_alias(&mut self, dest: Value, src: Value) { + debug_assert!(!self.value_is_attached(dest)); + // Try to create short alias chains by finding the original source value. + // This also avoids the creation of loops. + let original = self.resolve_aliases(src); + debug_assert_ne!( + dest, original, + "Aliasing {} to {} would create a loop", + dest, src + ); + let ty = self.value_type(original); + debug_assert_eq!( + self.value_type(dest), + ty, + "Aliasing {} to {} would change its type {} to {}", + dest, + src, + self.value_type(dest), + ty + ); + debug_assert_ne!(ty, types::INVALID); + + self.values[dest] = ValueData::Alias { ty, original }; + } + + /// Replace the results of one instruction with aliases to the results of another. + /// + /// Change all the results of `dest_inst` to behave as aliases of + /// corresponding results of `src_inst`, as if calling change_to_alias for + /// each. + /// + /// After calling this instruction, `dest_inst` will have had its results + /// cleared, so it likely needs to be removed from the graph. + /// + pub fn replace_with_aliases(&mut self, dest_inst: Inst, src_inst: Inst) { + debug_assert_ne!( + dest_inst, src_inst, + "Replacing {} with itself would create a loop", + dest_inst + ); + debug_assert_eq!( + self.results[dest_inst].len(&self.value_lists), + self.results[src_inst].len(&self.value_lists), + "Replacing {} with {} would produce a different number of results.", + dest_inst, + src_inst + ); + + for (&dest, &src) in self.results[dest_inst] + .as_slice(&self.value_lists) + .iter() + .zip(self.results[src_inst].as_slice(&self.value_lists)) + { + let original = src; + let ty = self.value_type(original); + debug_assert_eq!( + self.value_type(dest), + ty, + "Aliasing {} to {} would change its type {} to {}", + dest, + src, + self.value_type(dest), + ty + ); + debug_assert_ne!(ty, types::INVALID); + + self.values[dest] = ValueData::Alias { ty, original }; + } + + self.clear_results(dest_inst); + } +} + +/// Where did a value come from? +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ValueDef { + /// Value is the n'th result of an instruction. + Result(Inst, usize), + /// Value is the n'th parameter to an block. + Param(Block, usize), +} + +impl ValueDef { + /// Unwrap the instruction where the value was defined, or panic. + pub fn unwrap_inst(&self) -> Inst { + match *self { + Self::Result(inst, _) => inst, + _ => panic!("Value is not an instruction result"), + } + } + + /// Unwrap the block there the parameter is defined, or panic. + pub fn unwrap_block(&self) -> Block { + match *self { + Self::Param(block, _) => block, + _ => panic!("Value is not an block parameter"), + } + } + + /// Get the program point where the value was defined. + pub fn pp(self) -> ir::ExpandedProgramPoint { + self.into() + } + + /// Get the number component of this definition. + /// + /// When multiple values are defined at the same program point, this indicates the index of + /// this value. + pub fn num(self) -> usize { + match self { + Self::Result(_, n) | Self::Param(_, n) => n, + } + } +} + +/// Internal table storage for extended values. +#[derive(Clone, Debug)] +enum ValueData { + /// Value is defined by an instruction. + Inst { ty: Type, num: u16, inst: Inst }, + + /// Value is an block parameter. + Param { ty: Type, num: u16, block: Block }, + + /// Value is an alias of another value. + /// An alias value can't be linked as an instruction result or block parameter. It is used as a + /// placeholder when the original instruction or block has been rewritten or modified. + Alias { ty: Type, original: Value }, +} + +/// Instructions. +/// +impl DataFlowGraph { + /// Create a new instruction. + /// + /// The type of the first result is indicated by `data.ty`. If the instruction produces + /// multiple results, also call `make_inst_results` to allocate value table entries. + pub fn make_inst(&mut self, data: InstructionData) -> Inst { + let n = self.num_insts() + 1; + self.results.resize(n); + self.insts.push(data) + } + + /// Returns an object that displays `inst`. + pub fn display_inst<'a, I: Into>>( + &'a self, + inst: Inst, + isa: I, + ) -> DisplayInst<'a> { + DisplayInst(self, isa.into(), inst) + } + + /// Get all value arguments on `inst` as a slice. + pub fn inst_args(&self, inst: Inst) -> &[Value] { + self.insts[inst].arguments(&self.value_lists) + } + + /// Get all value arguments on `inst` as a mutable slice. + pub fn inst_args_mut(&mut self, inst: Inst) -> &mut [Value] { + self.insts[inst].arguments_mut(&mut self.value_lists) + } + + /// Get the fixed value arguments on `inst` as a slice. + pub fn inst_fixed_args(&self, inst: Inst) -> &[Value] { + let num_fixed_args = self[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &self.inst_args(inst)[..num_fixed_args] + } + + /// Get the fixed value arguments on `inst` as a mutable slice. + pub fn inst_fixed_args_mut(&mut self, inst: Inst) -> &mut [Value] { + let num_fixed_args = self[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &mut self.inst_args_mut(inst)[..num_fixed_args] + } + + /// Get the variable value arguments on `inst` as a slice. + pub fn inst_variable_args(&self, inst: Inst) -> &[Value] { + let num_fixed_args = self[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &self.inst_args(inst)[num_fixed_args..] + } + + /// Get the variable value arguments on `inst` as a mutable slice. + pub fn inst_variable_args_mut(&mut self, inst: Inst) -> &mut [Value] { + let num_fixed_args = self[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + &mut self.inst_args_mut(inst)[num_fixed_args..] + } + + /// Create result values for an instruction that produces multiple results. + /// + /// Instructions that produce no result values only need to be created with `make_inst`, + /// otherwise call `make_inst_results` to allocate value table entries for the results. + /// + /// The result value types are determined from the instruction's value type constraints and the + /// provided `ctrl_typevar` type for polymorphic instructions. For non-polymorphic + /// instructions, `ctrl_typevar` is ignored, and `INVALID` can be used. + /// + /// The type of the first result value is also set, even if it was already set in the + /// `InstructionData` passed to `make_inst`. If this function is called with a single-result + /// instruction, that is the only effect. + pub fn make_inst_results(&mut self, inst: Inst, ctrl_typevar: Type) -> usize { + self.make_inst_results_reusing(inst, ctrl_typevar, iter::empty()) + } + + /// Create result values for `inst`, reusing the provided detached values. + /// + /// Create a new set of result values for `inst` using `ctrl_typevar` to determine the result + /// types. Any values provided by `reuse` will be reused. When `reuse` is exhausted or when it + /// produces `None`, a new value is created. + pub fn make_inst_results_reusing( + &mut self, + inst: Inst, + ctrl_typevar: Type, + reuse: I, + ) -> usize + where + I: Iterator>, + { + let mut reuse = reuse.fuse(); + + self.results[inst].clear(&mut self.value_lists); + + // Get the call signature if this is a function call. + if let Some(sig) = self.call_signature(inst) { + // Create result values corresponding to the call return types. + debug_assert_eq!( + self.insts[inst].opcode().constraints().num_fixed_results(), + 0 + ); + let num_results = self.signatures[sig].returns.len(); + for res_idx in 0..num_results { + let ty = self.signatures[sig].returns[res_idx].value_type; + if let Some(Some(v)) = reuse.next() { + debug_assert_eq!(self.value_type(v), ty, "Reused {} is wrong type", ty); + self.attach_result(inst, v); + } else { + self.append_result(inst, ty); + } + } + num_results + } else { + // Create result values corresponding to the opcode's constraints. + let constraints = self.insts[inst].opcode().constraints(); + let num_results = constraints.num_fixed_results(); + for res_idx in 0..num_results { + let ty = constraints.result_type(res_idx, ctrl_typevar); + if let Some(Some(v)) = reuse.next() { + debug_assert_eq!(self.value_type(v), ty, "Reused {} is wrong type", ty); + self.attach_result(inst, v); + } else { + self.append_result(inst, ty); + } + } + num_results + } + } + + /// Create a `ReplaceBuilder` that will replace `inst` with a new instruction in place. + pub fn replace(&mut self, inst: Inst) -> ReplaceBuilder { + ReplaceBuilder::new(self, inst) + } + + /// Detach the list of result values from `inst` and return it. + /// + /// This leaves `inst` without any result values. New result values can be created by calling + /// `make_inst_results` or by using a `replace(inst)` builder. + pub fn detach_results(&mut self, inst: Inst) -> ValueList { + self.results[inst].take() + } + + /// Clear the list of result values from `inst`. + /// + /// This leaves `inst` without any result values. New result values can be created by calling + /// `make_inst_results` or by using a `replace(inst)` builder. + pub fn clear_results(&mut self, inst: Inst) { + self.results[inst].clear(&mut self.value_lists) + } + + /// Attach an existing value to the result value list for `inst`. + /// + /// The `res` value is appended to the end of the result list. + /// + /// This is a very low-level operation. Usually, instruction results with the correct types are + /// created automatically. The `res` value must not be attached to anything else. + pub fn attach_result(&mut self, inst: Inst, res: Value) { + debug_assert!(!self.value_is_attached(res)); + let num = self.results[inst].push(res, &mut self.value_lists); + debug_assert!(num <= u16::MAX as usize, "Too many result values"); + let ty = self.value_type(res); + self.values[res] = ValueData::Inst { + ty, + num: num as u16, + inst, + }; + } + + /// Replace an instruction result with a new value of type `new_type`. + /// + /// The `old_value` must be an attached instruction result. + /// + /// The old value is left detached, so it should probably be changed into something else. + /// + /// Returns the new value. + pub fn replace_result(&mut self, old_value: Value, new_type: Type) -> Value { + let (num, inst) = match self.values[old_value] { + ValueData::Inst { num, inst, .. } => (num, inst), + _ => panic!("{} is not an instruction result value", old_value), + }; + let new_value = self.make_value(ValueData::Inst { + ty: new_type, + num, + inst, + }); + let num = num as usize; + let attached = mem::replace( + self.results[inst] + .get_mut(num, &mut self.value_lists) + .expect("Replacing detached result"), + new_value, + ); + debug_assert_eq!( + attached, + old_value, + "{} wasn't detached from {}", + old_value, + self.display_inst(inst, None) + ); + new_value + } + + /// Append a new instruction result value to `inst`. + pub fn append_result(&mut self, inst: Inst, ty: Type) -> Value { + let res = self.values.next_key(); + let num = self.results[inst].push(res, &mut self.value_lists); + debug_assert!(num <= u16::MAX as usize, "Too many result values"); + self.make_value(ValueData::Inst { + ty, + inst, + num: num as u16, + }) + } + + /// Append a new value argument to an instruction. + /// + /// Panics if the instruction doesn't support arguments. + pub fn append_inst_arg(&mut self, inst: Inst, new_arg: Value) { + let mut branch_values = self.insts[inst] + .take_value_list() + .expect("the instruction doesn't have value arguments"); + branch_values.push(new_arg, &mut self.value_lists); + self.insts[inst].put_value_list(branch_values) + } + + /// Get the first result of an instruction. + /// + /// This function panics if the instruction doesn't have any result. + pub fn first_result(&self, inst: Inst) -> Value { + self.results[inst] + .first(&self.value_lists) + .expect("Instruction has no results") + } + + /// Test if `inst` has any result values currently. + pub fn has_results(&self, inst: Inst) -> bool { + !self.results[inst].is_empty() + } + + /// Return all the results of an instruction. + pub fn inst_results(&self, inst: Inst) -> &[Value] { + self.results[inst].as_slice(&self.value_lists) + } + + /// Get the call signature of a direct or indirect call instruction. + /// Returns `None` if `inst` is not a call instruction. + pub fn call_signature(&self, inst: Inst) -> Option { + match self.insts[inst].analyze_call(&self.value_lists) { + CallInfo::NotACall => None, + CallInfo::Direct(f, _) => Some(self.ext_funcs[f].signature), + CallInfo::Indirect(s, _) => Some(s), + } + } + + /// Check if `inst` is a branch. + pub fn analyze_branch(&self, inst: Inst) -> BranchInfo { + self.insts[inst].analyze_branch(&self.value_lists) + } + + /// Compute the type of an instruction result from opcode constraints and call signatures. + /// + /// This computes the same sequence of result types that `make_inst_results()` above would + /// assign to the created result values, but it does not depend on `make_inst_results()` being + /// called first. + /// + /// Returns `None` if asked about a result index that is too large. + pub fn compute_result_type( + &self, + inst: Inst, + result_idx: usize, + ctrl_typevar: Type, + ) -> Option { + let constraints = self.insts[inst].opcode().constraints(); + let num_fixed_results = constraints.num_fixed_results(); + + if result_idx < num_fixed_results { + return Some(constraints.result_type(result_idx, ctrl_typevar)); + } + + // Not a fixed result, try to extract a return type from the call signature. + self.call_signature(inst).and_then(|sigref| { + self.signatures[sigref] + .returns + .get(result_idx - num_fixed_results) + .map(|&arg| arg.value_type) + }) + } + + /// Get the controlling type variable, or `INVALID` if `inst` isn't polymorphic. + pub fn ctrl_typevar(&self, inst: Inst) -> Type { + let constraints = self[inst].opcode().constraints(); + + if !constraints.is_polymorphic() { + types::INVALID + } else if constraints.requires_typevar_operand() { + // Not all instruction formats have a designated operand, but in that case + // `requires_typevar_operand()` should never be true. + self.value_type( + self[inst] + .typevar_operand(&self.value_lists) + .expect("Instruction format doesn't have a designated operand, bad opcode."), + ) + } else { + self.value_type(self.first_result(inst)) + } + } +} + +/// Allow immutable access to instructions via indexing. +impl Index for DataFlowGraph { + type Output = InstructionData; + + fn index(&self, inst: Inst) -> &InstructionData { + &self.insts[inst] + } +} + +/// Allow mutable access to instructions via indexing. +impl IndexMut for DataFlowGraph { + fn index_mut(&mut self, inst: Inst) -> &mut InstructionData { + &mut self.insts[inst] + } +} + +/// basic blocks. +impl DataFlowGraph { + /// Create a new basic block. + pub fn make_block(&mut self) -> Block { + self.blocks.push(BlockData::new()) + } + + /// Get the number of parameters on `block`. + pub fn num_block_params(&self, block: Block) -> usize { + self.blocks[block].params.len(&self.value_lists) + } + + /// Get the parameters on `block`. + pub fn block_params(&self, block: Block) -> &[Value] { + self.blocks[block].params.as_slice(&self.value_lists) + } + + /// Get the types of the parameters on `block`. + pub fn block_param_types(&self, block: Block) -> Vec { + self.block_params(block) + .iter() + .map(|&v| self.value_type(v)) + .collect() + } + + /// Append a parameter with type `ty` to `block`. + pub fn append_block_param(&mut self, block: Block, ty: Type) -> Value { + let param = self.values.next_key(); + let num = self.blocks[block].params.push(param, &mut self.value_lists); + debug_assert!(num <= u16::MAX as usize, "Too many parameters on block"); + self.make_value(ValueData::Param { + ty, + num: num as u16, + block, + }) + } + + /// Removes `val` from `block`'s parameters by swapping it with the last parameter on `block`. + /// Returns the position of `val` before removal. + /// + /// *Important*: to ensure O(1) deletion, this method swaps the removed parameter with the + /// last `block` parameter. This can disrupt all the branch instructions jumping to this + /// `block` for which you have to change the branch argument order if necessary. + /// + /// Panics if `val` is not an block parameter. + pub fn swap_remove_block_param(&mut self, val: Value) -> usize { + let (block, num) = if let ValueData::Param { num, block, .. } = self.values[val] { + (block, num) + } else { + panic!("{} must be an block parameter", val); + }; + self.blocks[block] + .params + .swap_remove(num as usize, &mut self.value_lists); + if let Some(last_arg_val) = self.blocks[block] + .params + .get(num as usize, &self.value_lists) + { + // We update the position of the old last arg. + if let ValueData::Param { + num: ref mut old_num, + .. + } = self.values[last_arg_val] + { + *old_num = num; + } else { + panic!("{} should be an Block parameter", last_arg_val); + } + } + num as usize + } + + /// Removes `val` from `block`'s parameters by a standard linear time list removal which + /// preserves ordering. Also updates the values' data. + pub fn remove_block_param(&mut self, val: Value) { + let (block, num) = if let ValueData::Param { num, block, .. } = self.values[val] { + (block, num) + } else { + panic!("{} must be an block parameter", val); + }; + self.blocks[block] + .params + .remove(num as usize, &mut self.value_lists); + for index in num..(self.num_block_params(block) as u16) { + match self.values[self.blocks[block] + .params + .get(index as usize, &self.value_lists) + .unwrap()] + { + ValueData::Param { ref mut num, .. } => { + *num -= 1; + } + _ => panic!( + "{} must be an block parameter", + self.blocks[block] + .params + .get(index as usize, &self.value_lists) + .unwrap() + ), + } + } + } + + /// Append an existing value to `block`'s parameters. + /// + /// The appended value can't already be attached to something else. + /// + /// In almost all cases, you should be using `append_block_param()` instead of this method. + pub fn attach_block_param(&mut self, block: Block, param: Value) { + debug_assert!(!self.value_is_attached(param)); + let num = self.blocks[block].params.push(param, &mut self.value_lists); + debug_assert!(num <= u16::MAX as usize, "Too many parameters on block"); + let ty = self.value_type(param); + self.values[param] = ValueData::Param { + ty, + num: num as u16, + block, + }; + } + + /// Replace an block parameter with a new value of type `ty`. + /// + /// The `old_value` must be an attached block parameter. It is removed from its place in the list + /// of parameters and replaced by a new value of type `new_type`. The new value gets the same + /// position in the list, and other parameters are not disturbed. + /// + /// The old value is left detached, so it should probably be changed into something else. + /// + /// Returns the new value. + pub fn replace_block_param(&mut self, old_value: Value, new_type: Type) -> Value { + // Create new value identical to the old one except for the type. + let (block, num) = if let ValueData::Param { num, block, .. } = self.values[old_value] { + (block, num) + } else { + panic!("{} must be an block parameter", old_value); + }; + let new_arg = self.make_value(ValueData::Param { + ty: new_type, + num, + block, + }); + + self.blocks[block] + .params + .as_mut_slice(&mut self.value_lists)[num as usize] = new_arg; + new_arg + } + + /// Detach all the parameters from `block` and return them as a `ValueList`. + /// + /// This is a quite low-level operation. Sensible things to do with the detached block parameters + /// is to put them back on the same block with `attach_block_param()` or change them into aliases + /// with `change_to_alias()`. + pub fn detach_block_params(&mut self, block: Block) -> ValueList { + self.blocks[block].params.take() + } +} + +/// Contents of a basic block. +/// +/// Parameters on a basic block are values that dominate everything in the block. All +/// branches to this block must provide matching arguments, and the arguments to the entry block must +/// match the function arguments. +#[derive(Clone)] +struct BlockData { + /// List of parameters to this block. + params: ValueList, +} + +impl BlockData { + fn new() -> Self { + Self { + params: ValueList::new(), + } + } +} + +/// Object that can display an instruction. +pub struct DisplayInst<'a>(&'a DataFlowGraph, Option<&'a dyn TargetIsa>, Inst); + +impl<'a> fmt::Display for DisplayInst<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let dfg = self.0; + let isa = self.1; + let inst = self.2; + + if let Some((first, rest)) = dfg.inst_results(inst).split_first() { + write!(f, "{}", first)?; + for v in rest { + write!(f, ", {}", v)?; + } + write!(f, " = ")?; + } + + let typevar = dfg.ctrl_typevar(inst); + if typevar.is_invalid() { + write!(f, "{}", dfg[inst].opcode())?; + } else { + write!(f, "{}.{}", dfg[inst].opcode(), typevar)?; + } + write_operands(f, dfg, isa, inst) + } +} + +/// Parser routines. These routines should not be used outside the parser. +impl DataFlowGraph { + /// Set the type of a value. This is only for use in the parser, which needs + /// to create invalid values for index padding which may be reassigned later. + #[cold] + fn set_value_type_for_parser(&mut self, v: Value, t: Type) { + assert_eq!( + self.value_type(v), + types::INVALID, + "this function is only for assigning types to previously invalid values" + ); + match self.values[v] { + ValueData::Inst { ref mut ty, .. } + | ValueData::Param { ref mut ty, .. } + | ValueData::Alias { ref mut ty, .. } => *ty = t, + } + } + + /// Create result values for `inst`, reusing the provided detached values. + /// This is similar to `make_inst_results_reusing` except it's only for use + /// in the parser, which needs to reuse previously invalid values. + #[cold] + pub fn make_inst_results_for_parser( + &mut self, + inst: Inst, + ctrl_typevar: Type, + reuse: &[Value], + ) -> usize { + // Get the call signature if this is a function call. + if let Some(sig) = self.call_signature(inst) { + assert_eq!( + self.insts[inst].opcode().constraints().num_fixed_results(), + 0 + ); + for res_idx in 0..self.signatures[sig].returns.len() { + let ty = self.signatures[sig].returns[res_idx].value_type; + if let Some(v) = reuse.get(res_idx) { + self.set_value_type_for_parser(*v, ty); + } + } + } else { + let constraints = self.insts[inst].opcode().constraints(); + for res_idx in 0..constraints.num_fixed_results() { + let ty = constraints.result_type(res_idx, ctrl_typevar); + if let Some(v) = reuse.get(res_idx) { + self.set_value_type_for_parser(*v, ty); + } + } + } + + self.make_inst_results_reusing(inst, ctrl_typevar, reuse.iter().map(|x| Some(*x))) + } + + /// Similar to `append_block_param`, append a parameter with type `ty` to + /// `block`, but using value `val`. This is only for use by the parser to + /// create parameters with specific values. + #[cold] + pub fn append_block_param_for_parser(&mut self, block: Block, ty: Type, val: Value) { + let num = self.blocks[block].params.push(val, &mut self.value_lists); + assert!(num <= u16::MAX as usize, "Too many parameters on block"); + self.values[val] = ValueData::Param { + ty, + num: num as u16, + block, + }; + } + + /// Create a new value alias. This is only for use by the parser to create + /// aliases with specific values, and the printer for testing. + #[cold] + pub fn make_value_alias_for_serialization(&mut self, src: Value, dest: Value) { + assert_ne!(src, Value::reserved_value()); + assert_ne!(dest, Value::reserved_value()); + + let ty = if self.values.is_valid(src) { + self.value_type(src) + } else { + // As a special case, if we can't resolve the aliasee yet, use INVALID + // temporarily. It will be resolved later in parsing. + types::INVALID + }; + let data = ValueData::Alias { ty, original: src }; + self.values[dest] = data; + } + + /// If `v` is already defined as an alias, return its destination value. + /// Otherwise return None. This allows the parser to coalesce identical + /// alias definitions, and the printer to identify an alias's immediate target. + #[cold] + pub fn value_alias_dest_for_serialization(&self, v: Value) -> Option { + if let ValueData::Alias { original, .. } = self.values[v] { + Some(original) + } else { + None + } + } + + /// Compute the type of an alias. This is only for use in the parser. + /// Returns false if an alias cycle was encountered. + #[cold] + pub fn set_alias_type_for_parser(&mut self, v: Value) -> bool { + if let Some(resolved) = maybe_resolve_aliases(&self.values, v) { + let old_ty = self.value_type(v); + let new_ty = self.value_type(resolved); + if old_ty == types::INVALID { + self.set_value_type_for_parser(v, new_ty); + } else { + assert_eq!(old_ty, new_ty); + } + true + } else { + false + } + } + + /// Create an invalid value, to pad the index space. This is only for use by + /// the parser to pad out the value index space. + #[cold] + pub fn make_invalid_value_for_parser(&mut self) { + let data = ValueData::Alias { + ty: types::INVALID, + original: Value::reserved_value(), + }; + self.make_value(data); + } + + /// Check if a value reference is valid, while being aware of aliases which + /// may be unresolved while parsing. + #[cold] + pub fn value_is_valid_for_parser(&self, v: Value) -> bool { + if !self.value_is_valid(v) { + return false; + } + if let ValueData::Alias { ty, .. } = self.values[v] { + ty != types::INVALID + } else { + true + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types; + use crate::ir::{Function, InstructionData, Opcode, TrapCode}; + use alloc::string::ToString; + + #[test] + fn make_inst() { + let mut dfg = DataFlowGraph::new(); + + let idata = InstructionData::UnaryImm { + opcode: Opcode::Iconst, + imm: 0.into(), + }; + let inst = dfg.make_inst(idata); + + dfg.make_inst_results(inst, types::I32); + assert_eq!(inst.to_string(), "inst0"); + assert_eq!( + dfg.display_inst(inst, None).to_string(), + "v0 = iconst.i32 0" + ); + + // Immutable reference resolution. + { + let immdfg = &dfg; + let ins = &immdfg[inst]; + assert_eq!(ins.opcode(), Opcode::Iconst); + } + + // Results. + let val = dfg.first_result(inst); + assert_eq!(dfg.inst_results(inst), &[val]); + + assert_eq!(dfg.value_def(val), ValueDef::Result(inst, 0)); + assert_eq!(dfg.value_type(val), types::I32); + + // Replacing results. + assert!(dfg.value_is_attached(val)); + let v2 = dfg.replace_result(val, types::F64); + assert!(!dfg.value_is_attached(val)); + assert!(dfg.value_is_attached(v2)); + assert_eq!(dfg.inst_results(inst), &[v2]); + assert_eq!(dfg.value_def(v2), ValueDef::Result(inst, 0)); + assert_eq!(dfg.value_type(v2), types::F64); + } + + #[test] + fn no_results() { + let mut dfg = DataFlowGraph::new(); + + let idata = InstructionData::Trap { + opcode: Opcode::Trap, + code: TrapCode::User(0), + }; + let inst = dfg.make_inst(idata); + assert_eq!(dfg.display_inst(inst, None).to_string(), "trap user0"); + + // Result slice should be empty. + assert_eq!(dfg.inst_results(inst), &[]); + } + + #[test] + fn block() { + let mut dfg = DataFlowGraph::new(); + + let block = dfg.make_block(); + assert_eq!(block.to_string(), "block0"); + assert_eq!(dfg.num_block_params(block), 0); + assert_eq!(dfg.block_params(block), &[]); + assert!(dfg.detach_block_params(block).is_empty()); + assert_eq!(dfg.num_block_params(block), 0); + assert_eq!(dfg.block_params(block), &[]); + + let arg1 = dfg.append_block_param(block, types::F32); + assert_eq!(arg1.to_string(), "v0"); + assert_eq!(dfg.num_block_params(block), 1); + assert_eq!(dfg.block_params(block), &[arg1]); + + let arg2 = dfg.append_block_param(block, types::I16); + assert_eq!(arg2.to_string(), "v1"); + assert_eq!(dfg.num_block_params(block), 2); + assert_eq!(dfg.block_params(block), &[arg1, arg2]); + + assert_eq!(dfg.value_def(arg1), ValueDef::Param(block, 0)); + assert_eq!(dfg.value_def(arg2), ValueDef::Param(block, 1)); + assert_eq!(dfg.value_type(arg1), types::F32); + assert_eq!(dfg.value_type(arg2), types::I16); + + // Swap the two block parameters. + let vlist = dfg.detach_block_params(block); + assert_eq!(dfg.num_block_params(block), 0); + assert_eq!(dfg.block_params(block), &[]); + assert_eq!(vlist.as_slice(&dfg.value_lists), &[arg1, arg2]); + dfg.attach_block_param(block, arg2); + let arg3 = dfg.append_block_param(block, types::I32); + dfg.attach_block_param(block, arg1); + assert_eq!(dfg.block_params(block), &[arg2, arg3, arg1]); + } + + #[test] + fn replace_block_params() { + let mut dfg = DataFlowGraph::new(); + + let block = dfg.make_block(); + let arg1 = dfg.append_block_param(block, types::F32); + + let new1 = dfg.replace_block_param(arg1, types::I64); + assert_eq!(dfg.value_type(arg1), types::F32); + assert_eq!(dfg.value_type(new1), types::I64); + assert_eq!(dfg.block_params(block), &[new1]); + + dfg.attach_block_param(block, arg1); + assert_eq!(dfg.block_params(block), &[new1, arg1]); + + let new2 = dfg.replace_block_param(arg1, types::I8); + assert_eq!(dfg.value_type(arg1), types::F32); + assert_eq!(dfg.value_type(new2), types::I8); + assert_eq!(dfg.block_params(block), &[new1, new2]); + + dfg.attach_block_param(block, arg1); + assert_eq!(dfg.block_params(block), &[new1, new2, arg1]); + + let new3 = dfg.replace_block_param(new2, types::I16); + assert_eq!(dfg.value_type(new1), types::I64); + assert_eq!(dfg.value_type(new2), types::I8); + assert_eq!(dfg.value_type(new3), types::I16); + assert_eq!(dfg.block_params(block), &[new1, new3, arg1]); + } + + #[test] + fn swap_remove_block_params() { + let mut dfg = DataFlowGraph::new(); + + let block = dfg.make_block(); + let arg1 = dfg.append_block_param(block, types::F32); + let arg2 = dfg.append_block_param(block, types::F32); + let arg3 = dfg.append_block_param(block, types::F32); + assert_eq!(dfg.block_params(block), &[arg1, arg2, arg3]); + + dfg.swap_remove_block_param(arg1); + assert_eq!(dfg.value_is_attached(arg1), false); + assert_eq!(dfg.value_is_attached(arg2), true); + assert_eq!(dfg.value_is_attached(arg3), true); + assert_eq!(dfg.block_params(block), &[arg3, arg2]); + dfg.swap_remove_block_param(arg2); + assert_eq!(dfg.value_is_attached(arg2), false); + assert_eq!(dfg.value_is_attached(arg3), true); + assert_eq!(dfg.block_params(block), &[arg3]); + dfg.swap_remove_block_param(arg3); + assert_eq!(dfg.value_is_attached(arg3), false); + assert_eq!(dfg.block_params(block), &[]); + } + + #[test] + fn aliases() { + use crate::ir::InstBuilder; + + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + // Build a little test program. + let v1 = pos.ins().iconst(types::I32, 42); + + // Make sure we can resolve value aliases even when values is empty. + assert_eq!(pos.func.dfg.resolve_aliases(v1), v1); + + let arg0 = pos.func.dfg.append_block_param(block0, types::I32); + let (s, c) = pos.ins().iadd_ifcout(v1, arg0); + let iadd = match pos.func.dfg.value_def(s) { + ValueDef::Result(i, 0) => i, + _ => panic!(), + }; + + // Remove `c` from the result list. + pos.func.dfg.clear_results(iadd); + pos.func.dfg.attach_result(iadd, s); + + // Replace `iadd_ifcout` with a normal `iadd` and an `ifcmp`. + pos.func.dfg.replace(iadd).iadd(v1, arg0); + let c2 = pos.ins().ifcmp(s, v1); + pos.func.dfg.change_to_alias(c, c2); + + assert_eq!(pos.func.dfg.resolve_aliases(c2), c2); + assert_eq!(pos.func.dfg.resolve_aliases(c), c2); + + // Make a copy of the alias. + let c3 = pos.ins().copy(c); + // This does not see through copies. + assert_eq!(pos.func.dfg.resolve_aliases(c3), c3); + } +} diff --git a/cranelift/codegen/src/ir/entities.rs b/cranelift/codegen/src/ir/entities.rs new file mode 100644 index 0000000000..57906ab63a --- /dev/null +++ b/cranelift/codegen/src/ir/entities.rs @@ -0,0 +1,510 @@ +//! Cranelift IR entity references. +//! +//! Instructions in Cranelift IR need to reference other entities in the function. This can be other +//! parts of the function like basic blocks or stack slots, or it can be external entities +//! that are declared in the function preamble in the text format. +//! +//! These entity references in instruction operands are not implemented as Rust references both +//! because Rust's ownership and mutability rules make it difficult, and because 64-bit pointers +//! take up a lot of space, and we want a compact in-memory representation. Instead, entity +//! references are structs wrapping a `u32` index into a table in the `Function` main data +//! structure. There is a separate index type for each entity type, so we don't lose type safety. +//! +//! The `entities` module defines public types for the entity references along with constants +//! representing an invalid reference. We prefer to use `Option` whenever possible, but +//! unfortunately that type is twice as large as the 32-bit index type on its own. Thus, compact +//! data structures use the `PackedOption` representation, while function arguments and +//! return values prefer the more Rust-like `Option` variant. +//! +//! The entity references all implement the `Display` trait in a way that matches the textual IR +//! format. + +use crate::entity::entity_impl; +use core::fmt; +use core::u32; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// An opaque reference to a [basic block](https://en.wikipedia.org/wiki/Basic_block) in a +/// [`Function`](super::function::Function). +/// +/// You can get a `Block` using +/// [`FunctionBuilder::create_block`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_block) +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the layout order. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Block(u32); +entity_impl!(Block, "block"); + +impl Block { + /// Create a new block reference from its number. This corresponds to the `blockNN` representation. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to an SSA value. +/// +/// You can get a constant `Value` from the following +/// [`InstBuilder`](super::InstBuilder) instructions: +/// +/// - [`iconst`](super::InstBuilder::iconst) for integer constants +/// - [`f32const`](super::InstBuilder::f32const) for 32-bit float constants +/// - [`f64const`](super::InstBuilder::f64const) for 64-bit float constants +/// - [`bconst`](super::InstBuilder::bconst) for boolean constants +/// - [`vconst`](super::InstBuilder::vconst) for vector constants +/// - [`null`](super::InstBuilder::null) for null reference constants +/// +/// Any `InstBuilder` instruction that has an output will also return a `Value`. +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Value(u32); +entity_impl!(Value, "v"); + +impl Value { + /// Create a value from its number representation. + /// This is the number in the `vNN` notation. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX / 2 { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to an instruction in a [`Function`](super::Function). +/// +/// Most usage of `Inst` is internal. `Inst`ructions are returned by +/// [`InstBuilder`](super::InstBuilder) instructions that do not return a +/// [`Value`], such as control flow and trap instructions. +/// +/// If you look around the API, you can find many inventive uses for `Inst`, +/// such as [annotating specific instructions with a comment][inst_comment] +/// or [performing reflection at compile time](super::DataFlowGraph::analyze_branch) +/// on the type of instruction. +/// +/// [inst_comment]: https://github.com/bjorn3/rustc_codegen_cranelift/blob/0f8814fd6da3d436a90549d4bb19b94034f2b19c/src/pretty_clif.rs +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the layout order. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Inst(u32); +entity_impl!(Inst, "inst"); + +/// An opaque reference to a stack slot. +/// +/// Stack slots represent an address on the +/// [call stack](https://en.wikipedia.org/wiki/Call_stack). +/// +/// `StackSlot`s can be created with +/// [`FunctionBuilder::create_stackslot`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_stack_slot). +/// +/// `StackSlot`s are most often used with +/// [`stack_addr`](super::InstBuilder::stack_addr), +/// [`stack_load`](super::InstBuilder::stack_load), and +/// [`stack_store`](super::InstBuilder::stack_store). +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the stack order. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct StackSlot(u32); +entity_impl!(StackSlot, "ss"); + +impl StackSlot { + /// Create a new stack slot reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a global value. +/// +/// A `GlobalValue` is a [`Value`](Value) that will be live across the entire +/// function lifetime. It can be preloaded from other global values. +/// +/// You can create a `GlobalValue` in the following ways: +/// +/// - When compiling to WASM, you can use it to load values from a +/// [`VmContext`](super::GlobalValueData::VMContext) using +/// [`FuncEnvironment::make_global`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_global). +/// - When compiling to native code, you can use it for objects in static memory with +/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_data_in_func). +/// - For any compilation target, it can be registered with +/// [`FunctionBuilder::create_global_value`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_global_value). +/// +/// `GlobalValue`s can be retrieved with +/// [`InstBuilder:global_value`](super::InstBuilder::global_value). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct GlobalValue(u32); +entity_impl!(GlobalValue, "gv"); + +impl GlobalValue { + /// Create a new global value reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a constant. +/// +/// You can store [`ConstantData`](super::ConstantData) in a +/// [`ConstantPool`](super::ConstantPool) for efficient storage and retrieval. +/// See [`ConstantPool::insert`](super::ConstantPool::insert). +/// +/// While the order is stable, it is arbitrary and does not necessarily resemble the order in which +/// the constants are written in the constant pool. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct Constant(u32); +entity_impl!(Constant, "const"); + +impl Constant { + /// Create a const reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to an immediate. +/// +/// Some immediates (e.g. SIMD shuffle masks) are too large to store in the +/// [`InstructionData`](super::instructions::InstructionData) struct and therefore must be +/// tracked separately in [`DataFlowGraph::immediates`](super::dfg::DataFlowGraph). `Immediate` +/// provides a way to reference values stored there. +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Immediate(u32); +entity_impl!(Immediate, "imm"); + +impl Immediate { + /// Create an immediate reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table). +/// +/// `JumpTable`s are used for indirect branching and are specialized for dense, +/// 0-based jump offsets. If you want a jump table which doesn't start at 0, +/// or is not contiguous, consider using a [`Switch`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.Switch.html) instead. +/// +/// `JumpTable` are used with [`br_table`](super::InstBuilder::br_table). +/// +/// `JumpTable`s can be created with +/// [`create_jump_table`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_jump_table). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct JumpTable(u32); +entity_impl!(JumpTable, "jt"); + +impl JumpTable { + /// Create a new jump table reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to another [`Function`](super::Function). +/// +/// `FuncRef`s are used for [direct](super::InstBuilder::call) function calls +/// and by [`func_addr`](super::InstBuilder::func_addr) for use in +/// [indirect](super::InstBuilder::call_indirect) function calls. +/// +/// `FuncRef`s can be created with +/// +/// - [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function) +/// for external functions +/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_func_in_func) +/// for functions declared elsewhere in the same native +/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html) +/// - [`FuncEnvironment::make_direct_func`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func) +/// for functions declared in the same WebAssembly +/// [`FuncEnvironment`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func) +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct FuncRef(u32); +entity_impl!(FuncRef, "fn"); + +impl FuncRef { + /// Create a new external function reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a function [`Signature`](super::Signature). +/// +/// `SigRef`s are used to declare a function with +/// [`FunctionBuiler::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function) +/// as well as to make an [indirect function call](super::InstBuilder::call_indirect). +/// +/// `SigRef`s can be created with +/// [`FunctionBuilder::import_signature`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_signature). +/// +/// You can retrieve the [`Signature`](super::Signature) that was used to create a `SigRef` with +/// [`FunctionBuilder::signature`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.signature) or +/// [`func.dfg.signatures`](super::dfg::DataFlowGraph::signatures). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct SigRef(u32); +entity_impl!(SigRef, "sig"); + +impl SigRef { + /// Create a new function signature reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a [heap](https://en.wikipedia.org/wiki/Memory_management#DYNAMIC). +/// +/// Heaps are used to access dynamically allocated memory through +/// [`heap_addr`](super::InstBuilder::heap_addr). +/// +/// To create a heap, use [`FunctionBuilder::create_heap`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_heap). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Heap(u32); +entity_impl!(Heap, "heap"); + +impl Heap { + /// Create a new heap reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to a [WebAssembly +/// table](https://developer.mozilla.org/en-US/docs/WebAssembly/Understanding_the_text_format#WebAssembly_tables). +/// +/// `Table`s are used to store a list of function references. +/// They can be created with [`FuncEnvironment::make_table`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_table). +/// They can be used with +/// [`FuncEnvironment::translate_call_indirect`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.translate_call_indirect). +/// +/// While the order is stable, it is arbitrary. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Table(u32); +entity_impl!(Table, "table"); + +impl Table { + /// Create a new table reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Self(n)) + } else { + None + } + } +} + +/// An opaque reference to any of the entities defined in this module that can appear in CLIF IR. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum AnyEntity { + /// The whole function. + Function, + /// a basic block. + Block(Block), + /// An instruction. + Inst(Inst), + /// An SSA value. + Value(Value), + /// A stack slot. + StackSlot(StackSlot), + /// A Global value. + GlobalValue(GlobalValue), + /// A jump table. + JumpTable(JumpTable), + /// An external function. + FuncRef(FuncRef), + /// A function call signature. + SigRef(SigRef), + /// A heap. + Heap(Heap), + /// A table. + Table(Table), +} + +impl fmt::Display for AnyEntity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Function => write!(f, "function"), + Self::Block(r) => r.fmt(f), + Self::Inst(r) => r.fmt(f), + Self::Value(r) => r.fmt(f), + Self::StackSlot(r) => r.fmt(f), + Self::GlobalValue(r) => r.fmt(f), + Self::JumpTable(r) => r.fmt(f), + Self::FuncRef(r) => r.fmt(f), + Self::SigRef(r) => r.fmt(f), + Self::Heap(r) => r.fmt(f), + Self::Table(r) => r.fmt(f), + } + } +} + +impl fmt::Debug for AnyEntity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + (self as &dyn fmt::Display).fmt(f) + } +} + +impl From for AnyEntity { + fn from(r: Block) -> Self { + Self::Block(r) + } +} + +impl From for AnyEntity { + fn from(r: Inst) -> Self { + Self::Inst(r) + } +} + +impl From for AnyEntity { + fn from(r: Value) -> Self { + Self::Value(r) + } +} + +impl From for AnyEntity { + fn from(r: StackSlot) -> Self { + Self::StackSlot(r) + } +} + +impl From for AnyEntity { + fn from(r: GlobalValue) -> Self { + Self::GlobalValue(r) + } +} + +impl From for AnyEntity { + fn from(r: JumpTable) -> Self { + Self::JumpTable(r) + } +} + +impl From for AnyEntity { + fn from(r: FuncRef) -> Self { + Self::FuncRef(r) + } +} + +impl From for AnyEntity { + fn from(r: SigRef) -> Self { + Self::SigRef(r) + } +} + +impl From for AnyEntity { + fn from(r: Heap) -> Self { + Self::Heap(r) + } +} + +impl From for AnyEntity { + fn from(r: Table) -> Self { + Self::Table(r) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + use core::u32; + + #[test] + fn value_with_number() { + assert_eq!(Value::with_number(0).unwrap().to_string(), "v0"); + assert_eq!(Value::with_number(1).unwrap().to_string(), "v1"); + + assert_eq!(Value::with_number(u32::MAX / 2), None); + assert!(Value::with_number(u32::MAX / 2 - 1).is_some()); + } + + #[test] + fn memory() { + use crate::packed_option::PackedOption; + use core::mem; + // This is the whole point of `PackedOption`. + assert_eq!( + mem::size_of::(), + mem::size_of::>() + ); + } + + #[test] + fn constant_with_number() { + assert_eq!(Constant::with_number(0).unwrap().to_string(), "const0"); + assert_eq!(Constant::with_number(1).unwrap().to_string(), "const1"); + } +} diff --git a/cranelift/codegen/src/ir/extfunc.rs b/cranelift/codegen/src/ir/extfunc.rs new file mode 100644 index 0000000000..9274efe9b9 --- /dev/null +++ b/cranelift/codegen/src/ir/extfunc.rs @@ -0,0 +1,456 @@ +//! External function calls. +//! +//! To a Cranelift function, all functions are "external". Directly called functions must be +//! declared in the preamble, and all function calls must have a signature. +//! +//! This module declares the data types used to represent external functions and call signatures. + +use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type}; +use crate::isa::{CallConv, RegInfo, RegUnit}; +use alloc::vec::Vec; +use core::fmt; +use core::str::FromStr; + +/// Function signature. +/// +/// The function signature describes the types of formal parameters and return values along with +/// other details that are needed to call a function correctly. +/// +/// A signature can optionally include ISA-specific ABI information which specifies exactly how +/// arguments and return values are passed. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Signature { + /// The arguments passed to the function. + pub params: Vec, + /// Values returned from the function. + pub returns: Vec, + + /// Calling convention. + pub call_conv: CallConv, +} + +impl Signature { + /// Create a new blank signature. + pub fn new(call_conv: CallConv) -> Self { + Self { + params: Vec::new(), + returns: Vec::new(), + call_conv, + } + } + + /// Clear the signature so it is identical to a fresh one returned by `new()`. + pub fn clear(&mut self, call_conv: CallConv) { + self.params.clear(); + self.returns.clear(); + self.call_conv = call_conv; + } + + /// Return an object that can display `self` with correct register names. + pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplaySignature<'a> { + DisplaySignature(self, regs.into()) + } + + /// Find the index of a presumed unique special-purpose parameter. + pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option { + self.params.iter().rposition(|arg| arg.purpose == purpose) + } + + /// Find the index of a presumed unique special-purpose parameter. + pub fn special_return_index(&self, purpose: ArgumentPurpose) -> Option { + self.returns.iter().rposition(|arg| arg.purpose == purpose) + } + + /// Does this signature have a parameter whose `ArgumentPurpose` is + /// `purpose`? + pub fn uses_special_param(&self, purpose: ArgumentPurpose) -> bool { + self.special_param_index(purpose).is_some() + } + + /// Does this signature have a return whose `ArgumentPurpose` is `purpose`? + pub fn uses_special_return(&self, purpose: ArgumentPurpose) -> bool { + self.special_return_index(purpose).is_some() + } + + /// How many special parameters does this function have? + pub fn num_special_params(&self) -> usize { + self.params + .iter() + .filter(|p| p.purpose != ArgumentPurpose::Normal) + .count() + } + + /// How many special returns does this function have? + pub fn num_special_returns(&self) -> usize { + self.returns + .iter() + .filter(|r| r.purpose != ArgumentPurpose::Normal) + .count() + } + + /// Does this signature take an struct return pointer parameter? + pub fn uses_struct_return_param(&self) -> bool { + self.uses_special_param(ArgumentPurpose::StructReturn) + } + + /// Does this return more than one normal value? (Pre-struct return + /// legalization) + pub fn is_multi_return(&self) -> bool { + self.returns + .iter() + .filter(|r| r.purpose == ArgumentPurpose::Normal) + .count() + > 1 + } +} + +/// Wrapper type capable of displaying a `Signature` with correct register names. +pub struct DisplaySignature<'a>(&'a Signature, Option<&'a RegInfo>); + +fn write_list(f: &mut fmt::Formatter, args: &[AbiParam], regs: Option<&RegInfo>) -> fmt::Result { + match args.split_first() { + None => {} + Some((first, rest)) => { + write!(f, "{}", first.display(regs))?; + for arg in rest { + write!(f, ", {}", arg.display(regs))?; + } + } + } + Ok(()) +} + +impl<'a> fmt::Display for DisplaySignature<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "(")?; + write_list(f, &self.0.params, self.1)?; + write!(f, ")")?; + if !self.0.returns.is_empty() { + write!(f, " -> ")?; + write_list(f, &self.0.returns, self.1)?; + } + write!(f, " {}", self.0.call_conv) + } +} + +impl fmt::Display for Signature { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(None).fmt(f) + } +} + +/// Function parameter or return value descriptor. +/// +/// This describes the value type being passed to or from a function along with flags that affect +/// how the argument is passed. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct AbiParam { + /// Type of the argument value. + pub value_type: Type, + /// Special purpose of argument, or `Normal`. + pub purpose: ArgumentPurpose, + /// Method for extending argument to a full register. + pub extension: ArgumentExtension, + + /// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet + /// been legalized. + pub location: ArgumentLoc, +} + +impl AbiParam { + /// Create a parameter with default flags. + pub fn new(vt: Type) -> Self { + Self { + value_type: vt, + extension: ArgumentExtension::None, + purpose: ArgumentPurpose::Normal, + location: Default::default(), + } + } + + /// Create a special-purpose parameter that is not (yet) bound to a specific register. + pub fn special(vt: Type, purpose: ArgumentPurpose) -> Self { + Self { + value_type: vt, + extension: ArgumentExtension::None, + purpose, + location: Default::default(), + } + } + + /// Create a parameter for a special-purpose register. + pub fn special_reg(vt: Type, purpose: ArgumentPurpose, regunit: RegUnit) -> Self { + Self { + value_type: vt, + extension: ArgumentExtension::None, + purpose, + location: ArgumentLoc::Reg(regunit), + } + } + + /// Convert `self` to a parameter with the `uext` flag set. + pub fn uext(self) -> Self { + debug_assert!(self.value_type.is_int(), "uext on {} arg", self.value_type); + Self { + extension: ArgumentExtension::Uext, + ..self + } + } + + /// Convert `self` to a parameter type with the `sext` flag set. + pub fn sext(self) -> Self { + debug_assert!(self.value_type.is_int(), "sext on {} arg", self.value_type); + Self { + extension: ArgumentExtension::Sext, + ..self + } + } + + /// Return an object that can display `self` with correct register names. + pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplayAbiParam<'a> { + DisplayAbiParam(self, regs.into()) + } +} + +/// Wrapper type capable of displaying a `AbiParam` with correct register names. +pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayAbiParam<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0.value_type)?; + match self.0.extension { + ArgumentExtension::None => {} + ArgumentExtension::Uext => write!(f, " uext")?, + ArgumentExtension::Sext => write!(f, " sext")?, + } + if self.0.purpose != ArgumentPurpose::Normal { + write!(f, " {}", self.0.purpose)?; + } + + if self.0.location.is_assigned() { + write!(f, " [{}]", self.0.location.display(self.1))?; + } + + Ok(()) + } +} + +impl fmt::Display for AbiParam { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(None).fmt(f) + } +} + +/// Function argument extension options. +/// +/// On some architectures, small integer function arguments are extended to the width of a +/// general-purpose register. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub enum ArgumentExtension { + /// No extension, high bits are indeterminate. + None, + /// Unsigned extension: high bits in register are 0. + Uext, + /// Signed extension: high bits in register replicate sign bit. + Sext, +} + +/// The special purpose of a function argument. +/// +/// Function arguments and return values are used to pass user program values between functions, +/// but they are also used to represent special registers with significance to the ABI such as +/// frame pointers and callee-saved registers. +/// +/// The argument purpose is used to indicate any special meaning of an argument or return value. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub enum ArgumentPurpose { + /// A normal user program value passed to or from a function. + Normal, + + /// Struct return pointer. + /// + /// When a function needs to return more data than will fit in registers, the caller passes a + /// pointer to a memory location where the return value can be written. In some ABIs, this + /// struct return pointer is passed in a specific register. + /// + /// This argument kind can also appear as a return value for ABIs that require a function with + /// a `StructReturn` pointer argument to also return that pointer in a register. + StructReturn, + + /// The link register. + /// + /// Most RISC architectures implement calls by saving the return address in a designated + /// register rather than pushing it on the stack. This is represented with a `Link` argument. + /// + /// Similarly, some return instructions expect the return address in a register represented as + /// a `Link` return value. + Link, + + /// The frame pointer. + /// + /// This indicates the frame pointer register which has a special meaning in some ABIs. + /// + /// The frame pointer appears as an argument and as a return value since it is a callee-saved + /// register. + FramePointer, + + /// A callee-saved register. + /// + /// Some calling conventions have registers that must be saved by the callee. These registers + /// are represented as `CalleeSaved` arguments and return values. + CalleeSaved, + + /// A VM context pointer. + /// + /// This is a pointer to a context struct containing details about the current sandbox. It is + /// used as a base pointer for `vmctx` global values. + VMContext, + + /// A signature identifier. + /// + /// This is a special-purpose argument used to identify the calling convention expected by the + /// caller in an indirect call. The callee can verify that the expected signature ID matches. + SignatureId, + + /// A stack limit pointer. + /// + /// This is a pointer to a stack limit. It is used to check the current stack pointer + /// against. Can only appear once in a signature. + StackLimit, +} + +/// Text format names of the `ArgumentPurpose` variants. +static PURPOSE_NAMES: [&str; 8] = [ + "normal", + "sret", + "link", + "fp", + "csr", + "vmctx", + "sigid", + "stack_limit", +]; + +impl fmt::Display for ArgumentPurpose { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(PURPOSE_NAMES[*self as usize]) + } +} + +impl FromStr for ArgumentPurpose { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "normal" => Ok(Self::Normal), + "sret" => Ok(Self::StructReturn), + "link" => Ok(Self::Link), + "fp" => Ok(Self::FramePointer), + "csr" => Ok(Self::CalleeSaved), + "vmctx" => Ok(Self::VMContext), + "sigid" => Ok(Self::SignatureId), + "stack_limit" => Ok(Self::StackLimit), + _ => Err(()), + } + } +} + +/// An external function. +/// +/// Information about a function that can be called directly with a direct `call` instruction. +#[derive(Clone, Debug)] +pub struct ExtFuncData { + /// Name of the external function. + pub name: ExternalName, + /// Call signature of function. + pub signature: SigRef, + /// Will this function be defined nearby, such that it will always be a certain distance away, + /// after linking? If so, references to it can avoid going through a GOT or PLT. Note that + /// symbols meant to be preemptible cannot be considered colocated. + pub colocated: bool, +} + +impl fmt::Display for ExtFuncData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.colocated { + write!(f, "colocated ")?; + } + write!(f, "{} {}", self.name, self.signature) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::types::{B8, F32, I32}; + use alloc::string::ToString; + + #[test] + fn argument_type() { + let t = AbiParam::new(I32); + assert_eq!(t.to_string(), "i32"); + let mut t = t.uext(); + assert_eq!(t.to_string(), "i32 uext"); + assert_eq!(t.sext().to_string(), "i32 sext"); + t.purpose = ArgumentPurpose::StructReturn; + assert_eq!(t.to_string(), "i32 uext sret"); + } + + #[test] + fn argument_purpose() { + let all_purpose = [ + ArgumentPurpose::Normal, + ArgumentPurpose::StructReturn, + ArgumentPurpose::Link, + ArgumentPurpose::FramePointer, + ArgumentPurpose::CalleeSaved, + ArgumentPurpose::VMContext, + ArgumentPurpose::SignatureId, + ArgumentPurpose::StackLimit, + ]; + for (&e, &n) in all_purpose.iter().zip(PURPOSE_NAMES.iter()) { + assert_eq!(e.to_string(), n); + assert_eq!(Ok(e), n.parse()); + } + } + + #[test] + fn call_conv() { + for &cc in &[ + CallConv::Fast, + CallConv::Cold, + CallConv::SystemV, + CallConv::WindowsFastcall, + CallConv::BaldrdashSystemV, + CallConv::BaldrdashWindows, + ] { + assert_eq!(Ok(cc), cc.to_string().parse()) + } + } + + #[test] + fn signatures() { + let mut sig = Signature::new(CallConv::BaldrdashSystemV); + assert_eq!(sig.to_string(), "() baldrdash_system_v"); + sig.params.push(AbiParam::new(I32)); + assert_eq!(sig.to_string(), "(i32) baldrdash_system_v"); + sig.returns.push(AbiParam::new(F32)); + assert_eq!(sig.to_string(), "(i32) -> f32 baldrdash_system_v"); + sig.params.push(AbiParam::new(I32.by(4).unwrap())); + assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 baldrdash_system_v"); + sig.returns.push(AbiParam::new(B8)); + assert_eq!( + sig.to_string(), + "(i32, i32x4) -> f32, b8 baldrdash_system_v" + ); + + // Order does not matter. + sig.params[0].location = ArgumentLoc::Stack(24); + sig.params[1].location = ArgumentLoc::Stack(8); + + // Writing ABI-annotated signatures. + assert_eq!( + sig.to_string(), + "(i32 [24], i32x4 [8]) -> f32, b8 baldrdash_system_v" + ); + } +} diff --git a/cranelift/codegen/src/ir/extname.rs b/cranelift/codegen/src/ir/extname.rs new file mode 100644 index 0000000000..c12a873d26 --- /dev/null +++ b/cranelift/codegen/src/ir/extname.rs @@ -0,0 +1,163 @@ +//! External names. +//! +//! These are identifiers for declaring entities defined outside the current +//! function. The name of an external declaration doesn't have any meaning to +//! Cranelift, which compiles functions independently. + +use crate::ir::LibCall; +use core::cmp; +use core::fmt::{self, Write}; +use core::str::FromStr; + +const TESTCASE_NAME_LENGTH: usize = 16; + +/// The name of an external is either a reference to a user-defined symbol +/// table, or a short sequence of ascii bytes so that test cases do not have +/// to keep track of a symbol table. +/// +/// External names are primarily used as keys by code using Cranelift to map +/// from a `cranelift_codegen::ir::FuncRef` or similar to additional associated +/// data. +/// +/// External names can also serve as a primitive testing and debugging tool. +/// In particular, many `.clif` test files use function names to identify +/// functions. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExternalName { + /// A name in a user-defined symbol table. Cranelift does not interpret + /// these numbers in any way. + User { + /// Arbitrary. + namespace: u32, + /// Arbitrary. + index: u32, + }, + /// A test case function name of up to a hardcoded amount of ascii + /// characters. This is not intended to be used outside test cases. + TestCase { + /// How many of the bytes in `ascii` are valid? + length: u8, + /// Ascii bytes of the name. + ascii: [u8; TESTCASE_NAME_LENGTH], + }, + /// A well-known runtime library function. + LibCall(LibCall), +} + +impl ExternalName { + /// Creates a new external name from a sequence of bytes. Caller is expected + /// to guarantee bytes are only ascii alphanumeric or `_`. + /// + /// # Examples + /// + /// ```rust + /// # use cranelift_codegen::ir::ExternalName; + /// // Create `ExternalName` from a string. + /// let name = ExternalName::testcase("hello"); + /// assert_eq!(name.to_string(), "%hello"); + /// ``` + pub fn testcase>(v: T) -> Self { + let vec = v.as_ref(); + let len = cmp::min(vec.len(), TESTCASE_NAME_LENGTH); + let mut bytes = [0u8; TESTCASE_NAME_LENGTH]; + bytes[0..len].copy_from_slice(&vec[0..len]); + + Self::TestCase { + length: len as u8, + ascii: bytes, + } + } + + /// Create a new external name from user-provided integer indices. + /// + /// # Examples + /// ```rust + /// # use cranelift_codegen::ir::ExternalName; + /// // Create `ExternalName` from integer indices + /// let name = ExternalName::user(123, 456); + /// assert_eq!(name.to_string(), "u123:456"); + /// ``` + pub fn user(namespace: u32, index: u32) -> Self { + Self::User { namespace, index } + } +} + +impl Default for ExternalName { + fn default() -> Self { + Self::user(0, 0) + } +} + +impl fmt::Display for ExternalName { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::User { namespace, index } => write!(f, "u{}:{}", namespace, index), + Self::TestCase { length, ascii } => { + f.write_char('%')?; + for byte in ascii.iter().take(length as usize) { + f.write_char(*byte as char)?; + } + Ok(()) + } + Self::LibCall(lc) => write!(f, "%{}", lc), + } + } +} + +impl FromStr for ExternalName { + type Err = (); + + fn from_str(s: &str) -> Result { + // Try to parse as a libcall name, otherwise it's a test case. + match s.parse() { + Ok(lc) => Ok(Self::LibCall(lc)), + Err(_) => Ok(Self::testcase(s.as_bytes())), + } + } +} + +#[cfg(test)] +mod tests { + use super::ExternalName; + use crate::ir::LibCall; + use alloc::string::ToString; + use core::u32; + + #[test] + fn display_testcase() { + assert_eq!(ExternalName::testcase("").to_string(), "%"); + assert_eq!(ExternalName::testcase("x").to_string(), "%x"); + assert_eq!(ExternalName::testcase("x_1").to_string(), "%x_1"); + assert_eq!( + ExternalName::testcase("longname12345678").to_string(), + "%longname12345678" + ); + // Constructor will silently drop bytes beyond the 16th + assert_eq!( + ExternalName::testcase("longname123456789").to_string(), + "%longname12345678" + ); + } + + #[test] + fn display_user() { + assert_eq!(ExternalName::user(0, 0).to_string(), "u0:0"); + assert_eq!(ExternalName::user(1, 1).to_string(), "u1:1"); + assert_eq!( + ExternalName::user(u32::MAX, u32::MAX).to_string(), + "u4294967295:4294967295" + ); + } + + #[test] + fn parsing() { + assert_eq!( + "FloorF32".parse(), + Ok(ExternalName::LibCall(LibCall::FloorF32)) + ); + assert_eq!( + ExternalName::LibCall(LibCall::FloorF32).to_string(), + "%FloorF32" + ); + } +} diff --git a/cranelift/codegen/src/ir/framelayout.rs b/cranelift/codegen/src/ir/framelayout.rs new file mode 100644 index 0000000000..983b209279 --- /dev/null +++ b/cranelift/codegen/src/ir/framelayout.rs @@ -0,0 +1,70 @@ +//! Frame layout item changes. + +use crate::ir::entities::Inst; +use crate::isa::RegUnit; +use std::boxed::Box; + +use crate::HashMap; + +/// Change in the frame layout information. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +pub enum FrameLayoutChange { + /// Base CallFrameAddress (CFA) pointer moved to different register/offset. + CallFrameAddressAt { + /// CFA register. + reg: RegUnit, + /// CFA offset. + offset: isize, + }, + /// Register saved at. + RegAt { + /// Saved register. + reg: RegUnit, + /// Offset in the frame (offset from CFA). + cfa_offset: isize, + }, + /// Return address saved at. + ReturnAddressAt { + /// Offset in the frame (offset from CFA). + cfa_offset: isize, + }, + /// The entire frame layout must be preserved somewhere to be restored at a corresponding + /// `Restore` change. + /// + /// This likely maps to the DWARF call frame instruction `.cfa_remember_state`. + Preserve, + /// Restore the entire frame layout from a corresponding prior `Preserve` frame change. + /// + /// This likely maps to the DWARF call frame instruction `.cfa_restore_state`. + Restore, +} + +/// Set of frame layout changes. +pub type FrameLayoutChanges = Box<[FrameLayoutChange]>; + +/// Frame items layout for (prologue/epilogue) instructions. +#[derive(Debug, Clone)] +pub struct FrameLayout { + /// Initial frame layout. + pub initial: FrameLayoutChanges, + + /// Instruction frame layout (changes). Because the map will not be dense, + /// a HashMap is used instead of a SecondaryMap. + pub instructions: HashMap, +} + +impl FrameLayout { + /// Create instance of FrameLayout. + pub fn new() -> Self { + Self { + initial: vec![].into_boxed_slice(), + instructions: HashMap::new(), + } + } + + /// Clear the structure. + pub fn clear(&mut self) { + self.initial = vec![].into_boxed_slice(); + self.instructions.clear(); + } +} diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs new file mode 100644 index 0000000000..1e72d2bc48 --- /dev/null +++ b/cranelift/codegen/src/ir/function.rs @@ -0,0 +1,367 @@ +//! Intermediate representation of a function. +//! +//! The `Function` struct defined in this module owns all of its basic blocks and +//! instructions. + +use crate::binemit::CodeOffset; +use crate::entity::{PrimaryMap, SecondaryMap}; +use crate::ir; +use crate::ir::{ + Block, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, JumpTable, + JumpTableData, Opcode, SigRef, StackSlot, StackSlotData, Table, TableData, +}; +use crate::ir::{BlockOffsets, FrameLayout, InstEncodings, SourceLocs, StackSlots, ValueLocations}; +use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature}; +use crate::ir::{JumpTableOffsets, JumpTables}; +use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa}; +use crate::regalloc::{EntryRegDiversions, RegDiversions}; +use crate::value_label::ValueLabelsRanges; +use crate::write::write_function; +use core::fmt; + +/// A function. +/// +/// Functions can be cloned, but it is not a very fast operation. +/// The clone will have all the same entity numbers as the original. +#[derive(Clone)] +pub struct Function { + /// Name of this function. Mostly used by `.clif` files. + pub name: ExternalName, + + /// Signature of this function. + pub signature: Signature, + + /// The old signature of this function, before the most recent legalization, + /// if any. + pub old_signature: Option, + + /// Stack slots allocated in this function. + pub stack_slots: StackSlots, + + /// Global values referenced. + pub global_values: PrimaryMap, + + /// Heaps referenced. + pub heaps: PrimaryMap, + + /// Tables referenced. + pub tables: PrimaryMap, + + /// Jump tables used in this function. + pub jump_tables: JumpTables, + + /// Data flow graph containing the primary definition of all instructions, blocks and values. + pub dfg: DataFlowGraph, + + /// Layout of blocks and instructions in the function body. + pub layout: Layout, + + /// Encoding recipe and bits for the legal instructions. + /// Illegal instructions have the `Encoding::default()` value. + pub encodings: InstEncodings, + + /// Location assigned to every value. + pub locations: ValueLocations, + + /// Non-default locations assigned to value at the entry of basic blocks. + /// + /// At the entry of each basic block, we might have values which are not in their default + /// ValueLocation. This field records these register-to-register moves as Diversions. + pub entry_diversions: EntryRegDiversions, + + /// Code offsets of the block headers. + /// + /// This information is only transiently available after the `binemit::relax_branches` function + /// computes it, and it can easily be recomputed by calling that function. It is not included + /// in the textual IR format. + pub offsets: BlockOffsets, + + /// Code offsets of Jump Table headers. + pub jt_offsets: JumpTableOffsets, + + /// Source locations. + /// + /// Track the original source location for each instruction. The source locations are not + /// interpreted by Cranelift, only preserved. + pub srclocs: SourceLocs, + + /// Instruction that marks the end (inclusive) of the function's prologue. + /// + /// This is used for some calling conventions to track the end of unwind information. + pub prologue_end: Option, + + /// Frame layout for the instructions. + /// + /// The stack unwinding requires to have information about which registers and where they + /// are saved in the frame. This information is created during the prologue and epilogue + /// passes. + pub frame_layout: Option, +} + +impl Function { + /// Create a function with the given name and signature. + pub fn with_name_signature(name: ExternalName, sig: Signature) -> Self { + Self { + name, + signature: sig, + old_signature: None, + stack_slots: StackSlots::new(), + global_values: PrimaryMap::new(), + heaps: PrimaryMap::new(), + tables: PrimaryMap::new(), + jump_tables: PrimaryMap::new(), + dfg: DataFlowGraph::new(), + layout: Layout::new(), + encodings: SecondaryMap::new(), + locations: SecondaryMap::new(), + entry_diversions: EntryRegDiversions::new(), + offsets: SecondaryMap::new(), + jt_offsets: SecondaryMap::new(), + srclocs: SecondaryMap::new(), + prologue_end: None, + frame_layout: None, + } + } + + /// Clear all data structures in this function. + pub fn clear(&mut self) { + self.signature.clear(CallConv::Fast); + self.stack_slots.clear(); + self.global_values.clear(); + self.heaps.clear(); + self.tables.clear(); + self.jump_tables.clear(); + self.dfg.clear(); + self.layout.clear(); + self.encodings.clear(); + self.locations.clear(); + self.entry_diversions.clear(); + self.offsets.clear(); + self.jt_offsets.clear(); + self.srclocs.clear(); + self.prologue_end = None; + self.frame_layout = None; + } + + /// Create a new empty, anonymous function with a Fast calling convention. + pub fn new() -> Self { + Self::with_name_signature(ExternalName::default(), Signature::new(CallConv::Fast)) + } + + /// Creates a jump table in the function, to be used by `br_table` instructions. + pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable { + self.jump_tables.push(data) + } + + /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and + /// `stack_addr` instructions. + pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.stack_slots.push(data) + } + + /// Adds a signature which can later be used to declare an external function import. + pub fn import_signature(&mut self, signature: Signature) -> SigRef { + self.dfg.signatures.push(signature) + } + + /// Declare an external function import. + pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef { + self.dfg.ext_funcs.push(data) + } + + /// Declares a global value accessible to the function. + pub fn create_global_value(&mut self, data: GlobalValueData) -> GlobalValue { + self.global_values.push(data) + } + + /// Declares a heap accessible to the function. + pub fn create_heap(&mut self, data: HeapData) -> Heap { + self.heaps.push(data) + } + + /// Declares a table accessible to the function. + pub fn create_table(&mut self, data: TableData) -> Table { + self.tables.push(data) + } + + /// Return an object that can display this function with correct ISA-specific annotations. + pub fn display<'a, I: Into>>( + &'a self, + isa: I, + ) -> DisplayFunction<'a> { + DisplayFunction(self, isa.into().into()) + } + + /// Return an object that can display this function with correct ISA-specific annotations. + pub fn display_with<'a>( + &'a self, + annotations: DisplayFunctionAnnotations<'a>, + ) -> DisplayFunction<'a> { + DisplayFunction(self, annotations) + } + + /// Find a presumed unique special-purpose function parameter value. + /// + /// Returns the value of the last `purpose` parameter, or `None` if no such parameter exists. + pub fn special_param(&self, purpose: ir::ArgumentPurpose) -> Option { + let entry = self.layout.entry_block().expect("Function is empty"); + self.signature + .special_param_index(purpose) + .map(|i| self.dfg.block_params(entry)[i]) + } + + /// Get an iterator over the instructions in `block`, including offsets and encoded instruction + /// sizes. + /// + /// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes + /// from the beginning of the function to the instruction, and `size` is the size of the + /// instruction in bytes, or 0 for unencoded instructions. + /// + /// This function can only be used after the code layout has been computed by the + /// `binemit::relax_branches()` function. + pub fn inst_offsets<'a>(&'a self, block: Block, encinfo: &EncInfo) -> InstOffsetIter<'a> { + assert!( + !self.offsets.is_empty(), + "Code layout must be computed first" + ); + let mut divert = RegDiversions::new(); + divert.at_block(&self.entry_diversions, block); + InstOffsetIter { + encinfo: encinfo.clone(), + func: self, + divert, + encodings: &self.encodings, + offset: self.offsets[block], + iter: self.layout.block_insts(block), + } + } + + /// Wrapper around `encode` which assigns `inst` the resulting encoding. + pub fn update_encoding(&mut self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<(), Legalize> { + self.encode(inst, isa).map(|e| self.encodings[inst] = e) + } + + /// Wrapper around `TargetIsa::encode` for encoding an existing instruction + /// in the `Function`. + pub fn encode(&self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result { + isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst)) + } + + /// Starts collection of debug information. + pub fn collect_debug_info(&mut self) { + self.dfg.collect_debug_info(); + self.collect_frame_layout_info(); + } + + /// Starts collection of frame layout information. + pub fn collect_frame_layout_info(&mut self) { + self.frame_layout = Some(FrameLayout::new()); + } + + /// Changes the destination of a jump or branch instruction. + /// Does nothing if called with a non-jump or non-branch instruction. + pub fn change_branch_destination(&mut self, inst: Inst, new_dest: Block) { + match self.dfg[inst].branch_destination_mut() { + None => (), + Some(inst_dest) => *inst_dest = new_dest, + } + } + + /// Checks that the specified block can be encoded as a basic block. + /// + /// On error, returns the first invalid instruction and an error message. + pub fn is_block_basic(&self, block: Block) -> Result<(), (Inst, &'static str)> { + let dfg = &self.dfg; + let inst_iter = self.layout.block_insts(block); + + // Ignore all instructions prior to the first branch. + let mut inst_iter = inst_iter.skip_while(|&inst| !dfg[inst].opcode().is_branch()); + + // A conditional branch is permitted in a basic block only when followed + // by a terminal jump or fallthrough instruction. + if let Some(_branch) = inst_iter.next() { + if let Some(next) = inst_iter.next() { + match dfg[next].opcode() { + Opcode::Fallthrough | Opcode::Jump => (), + _ => return Err((next, "post-branch instruction not fallthrough or jump")), + } + } + } + + Ok(()) + } + + /// Returns true if the function is function that doesn't call any other functions. This is not + /// to be confused with a "leaf function" in Windows terminology. + pub fn is_leaf(&self) -> bool { + // Conservative result: if there's at least one function signature referenced in this + // function, assume it is not a leaf. + self.dfg.signatures.is_empty() + } +} + +/// Additional annotations for function display. +#[derive(Default)] +pub struct DisplayFunctionAnnotations<'a> { + /// Enable ISA annotations. + pub isa: Option<&'a dyn TargetIsa>, + + /// Enable value labels annotations. + pub value_ranges: Option<&'a ValueLabelsRanges>, +} + +impl<'a> From> for DisplayFunctionAnnotations<'a> { + fn from(isa: Option<&'a dyn TargetIsa>) -> DisplayFunctionAnnotations { + DisplayFunctionAnnotations { + isa, + value_ranges: None, + } + } +} + +/// Wrapper type capable of displaying a `Function` with correct ISA annotations. +pub struct DisplayFunction<'a>(&'a Function, DisplayFunctionAnnotations<'a>); + +impl<'a> fmt::Display for DisplayFunction<'a> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write_function(fmt, self.0, &self.1) + } +} + +impl fmt::Display for Function { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write_function(fmt, self, &DisplayFunctionAnnotations::default()) + } +} + +impl fmt::Debug for Function { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write_function(fmt, self, &DisplayFunctionAnnotations::default()) + } +} + +/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`. +pub struct InstOffsetIter<'a> { + encinfo: EncInfo, + divert: RegDiversions, + func: &'a Function, + encodings: &'a InstEncodings, + offset: CodeOffset, + iter: ir::layout::Insts<'a>, +} + +impl<'a> Iterator for InstOffsetIter<'a> { + type Item = (CodeOffset, ir::Inst, CodeOffset); + + fn next(&mut self) -> Option { + self.iter.next().map(|inst| { + self.divert.apply(&self.func.dfg[inst]); + let byte_size = + self.encinfo + .byte_size(self.encodings[inst], inst, &self.divert, self.func); + let offset = self.offset; + self.offset += byte_size; + (offset, inst, byte_size) + }) + } +} diff --git a/cranelift/codegen/src/ir/globalvalue.rs b/cranelift/codegen/src/ir/globalvalue.rs new file mode 100644 index 0000000000..305654a95f --- /dev/null +++ b/cranelift/codegen/src/ir/globalvalue.rs @@ -0,0 +1,136 @@ +//! Global values. + +use crate::ir::immediates::{Imm64, Offset32}; +use crate::ir::{ExternalName, GlobalValue, Type}; +use crate::isa::TargetIsa; +use core::fmt; + +/// Information about a global value declaration. +#[derive(Clone)] +pub enum GlobalValueData { + /// Value is the address of the VM context struct. + VMContext, + + /// Value is pointed to by another global value. + /// + /// The `base` global value is assumed to contain a pointer. This global value is computed + /// by loading from memory at that pointer value. The memory must be accessible, and + /// naturally aligned to hold a value of the type. The data at this address is assumed + /// to never change while the current function is executing. + Load { + /// The base pointer global value. + base: GlobalValue, + + /// Offset added to the base pointer before doing the load. + offset: Offset32, + + /// Type of the loaded value. + global_type: Type, + + /// Specifies whether the memory that this refers to is readonly, allowing for the + /// elimination of redundant loads. + readonly: bool, + }, + + /// Value is an offset from another global value. + IAddImm { + /// The base pointer global value. + base: GlobalValue, + + /// Byte offset to be added to the value. + offset: Imm64, + + /// Type of the iadd. + global_type: Type, + }, + + /// Value is symbolic, meaning it's a name which will be resolved to an + /// actual value later (eg. by linking). Cranelift itself does not interpret + /// this name; it's used by embedders to link with other data structures. + /// + /// For now, symbolic values always have pointer type, and represent + /// addresses, however in the future they could be used to represent other + /// things as well. + Symbol { + /// The symbolic name. + name: ExternalName, + + /// Offset from the symbol. This can be used instead of IAddImm to represent folding an + /// offset into a symbol. + offset: Imm64, + + /// Will this symbol be defined nearby, such that it will always be a certain distance + /// away, after linking? If so, references to it can avoid going through a GOT. Note that + /// symbols meant to be preemptible cannot be colocated. + colocated: bool, + + /// Does this symbol refer to a thread local storage value? + tls: bool, + }, +} + +impl GlobalValueData { + /// Assume that `self` is an `GlobalValueData::Symbol` and return its name. + pub fn symbol_name(&self) -> &ExternalName { + match *self { + Self::Symbol { ref name, .. } => name, + _ => panic!("only symbols have names"), + } + } + + /// Return the type of this global. + pub fn global_type(&self, isa: &dyn TargetIsa) -> Type { + match *self { + Self::VMContext { .. } | Self::Symbol { .. } => isa.pointer_type(), + Self::IAddImm { global_type, .. } | Self::Load { global_type, .. } => global_type, + } + } +} + +impl fmt::Display for GlobalValueData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::VMContext => write!(f, "vmctx"), + Self::Load { + base, + offset, + global_type, + readonly, + } => write!( + f, + "load.{} notrap aligned {}{}{}", + global_type, + if readonly { "readonly " } else { "" }, + base, + offset + ), + Self::IAddImm { + global_type, + base, + offset, + } => write!(f, "iadd_imm.{} {}, {}", global_type, base, offset), + Self::Symbol { + ref name, + offset, + colocated, + tls, + } => { + write!( + f, + "symbol {}{}{}", + if colocated { "colocated " } else { "" }, + if tls { "tls " } else { "" }, + name + )?; + let offset_val: i64 = offset.into(); + if offset_val > 0 { + write!(f, "+")?; + } + if offset_val != 0 { + write!(f, "{}", offset)?; + } + Ok(()) + } + } + } +} diff --git a/cranelift/codegen/src/ir/heap.rs b/cranelift/codegen/src/ir/heap.rs new file mode 100644 index 0000000000..8a4b4e84b9 --- /dev/null +++ b/cranelift/codegen/src/ir/heap.rs @@ -0,0 +1,62 @@ +//! Heaps. + +use crate::ir::immediates::Uimm64; +use crate::ir::{GlobalValue, Type}; +use core::fmt; + +/// Information about a heap declaration. +#[derive(Clone)] +pub struct HeapData { + /// The address of the start of the heap's storage. + pub base: GlobalValue, + + /// Guaranteed minimum heap size in bytes. Heap accesses before `min_size` don't need bounds + /// checking. + pub min_size: Uimm64, + + /// Size in bytes of the offset-guard pages following the heap. + pub offset_guard_size: Uimm64, + + /// Heap style, with additional style-specific info. + pub style: HeapStyle, + + /// The index type for the heap. + pub index_type: Type, +} + +/// Style of heap including style-specific information. +#[derive(Clone)] +pub enum HeapStyle { + /// A dynamic heap can be relocated to a different base address when it is grown. + Dynamic { + /// Global value providing the current bound of the heap in bytes. + bound_gv: GlobalValue, + }, + + /// A static heap has a fixed base address and a number of not-yet-allocated pages before the + /// offset-guard pages. + Static { + /// Heap bound in bytes. The offset-guard pages are allocated after the bound. + bound: Uimm64, + }, +} + +impl fmt::Display for HeapData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self.style { + HeapStyle::Dynamic { .. } => "dynamic", + HeapStyle::Static { .. } => "static", + })?; + + write!(f, " {}, min {}", self.base, self.min_size)?; + match self.style { + HeapStyle::Dynamic { bound_gv } => write!(f, ", bound {}", bound_gv)?, + HeapStyle::Static { bound } => write!(f, ", bound {}", bound)?, + } + write!( + f, + ", offset_guard {}, index_type {}", + self.offset_guard_size, self.index_type + ) + } +} diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs new file mode 100644 index 0000000000..b1d142bd9e --- /dev/null +++ b/cranelift/codegen/src/ir/immediates.rs @@ -0,0 +1,1263 @@ +//! Immediate operands for Cranelift instructions +//! +//! This module defines the types of immediate operands that can appear on Cranelift instructions. +//! Each type here should have a corresponding definition in the +//! `cranelift-codegen/meta/src/shared/immediates` crate in the meta language. + +use alloc::vec::Vec; +use core::fmt::{self, Display, Formatter}; +use core::str::FromStr; +use core::{i32, u32}; + +/// Convert a type into a vector of bytes; all implementors in this file must use little-endian +/// orderings of bytes to match WebAssembly's little-endianness. +pub trait IntoBytes { + /// Return the little-endian byte representation of the implementing type. + fn into_bytes(self) -> Vec; +} + +impl IntoBytes for u8 { + fn into_bytes(self) -> Vec { + vec![self] + } +} + +impl IntoBytes for i16 { + fn into_bytes(self) -> Vec { + self.to_le_bytes().to_vec() + } +} + +impl IntoBytes for i32 { + fn into_bytes(self) -> Vec { + self.to_le_bytes().to_vec() + } +} + +impl IntoBytes for Vec { + fn into_bytes(self) -> Vec { + self + } +} + +/// 64-bit immediate signed integer operand. +/// +/// An `Imm64` operand can also be used to represent immediate values of smaller integer types by +/// sign-extending to `i64`. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub struct Imm64(i64); + +impl Imm64 { + /// Create a new `Imm64` representing the signed number `x`. + pub fn new(x: i64) -> Self { + Self(x) + } + + /// Return self negated. + pub fn wrapping_neg(self) -> Self { + Self(self.0.wrapping_neg()) + } +} + +impl Into for Imm64 { + fn into(self) -> i64 { + self.0 + } +} + +impl IntoBytes for Imm64 { + fn into_bytes(self) -> Vec { + self.0.to_le_bytes().to_vec() + } +} + +impl From for Imm64 { + fn from(x: i64) -> Self { + Self(x) + } +} + +impl Display for Imm64 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let x = self.0; + if -10_000 < x && x < 10_000 { + // Use decimal for small numbers. + write!(f, "{}", x) + } else { + write_hex(x as u64, f) + } + } +} + +/// Parse a 64-bit signed number. +fn parse_i64(s: &str) -> Result { + let negative = s.starts_with('-'); + let s2 = if negative || s.starts_with('+') { + &s[1..] + } else { + s + }; + + let mut value = parse_u64(s2)?; + + // We support the range-and-a-half from -2^63 .. 2^64-1. + if negative { + value = value.wrapping_neg(); + // Don't allow large negative values to wrap around and become positive. + if value as i64 > 0 { + return Err("Negative number too small"); + } + } + Ok(value as i64) +} + +impl FromStr for Imm64 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Imm64`, formatted as above. + fn from_str(s: &str) -> Result { + parse_i64(s).map(Self::new) + } +} + +/// 64-bit immediate unsigned integer operand. +/// +/// A `Uimm64` operand can also be used to represent immediate values of smaller integer types by +/// zero-extending to `i64`. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub struct Uimm64(u64); + +impl Uimm64 { + /// Create a new `Uimm64` representing the unsigned number `x`. + pub fn new(x: u64) -> Self { + Self(x) + } + + /// Return self negated. + pub fn wrapping_neg(self) -> Self { + Self(self.0.wrapping_neg()) + } +} + +impl Into for Uimm64 { + fn into(self) -> u64 { + self.0 + } +} + +impl From for Uimm64 { + fn from(x: u64) -> Self { + Self(x) + } +} + +/// Hexadecimal with a multiple of 4 digits and group separators: +/// +/// 0xfff0 +/// 0x0001_ffff +/// 0xffff_ffff_fff8_4400 +/// +fn write_hex(x: u64, f: &mut Formatter) -> fmt::Result { + let mut pos = (64 - x.leading_zeros() - 1) & 0xf0; + write!(f, "0x{:04x}", (x >> pos) & 0xffff)?; + while pos > 0 { + pos -= 16; + write!(f, "_{:04x}", (x >> pos) & 0xffff)?; + } + Ok(()) +} + +impl Display for Uimm64 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let x = self.0; + if x < 10_000 { + // Use decimal for small numbers. + write!(f, "{}", x) + } else { + write_hex(x, f) + } + } +} + +/// Parse a 64-bit unsigned number. +fn parse_u64(s: &str) -> Result { + let mut value: u64 = 0; + let mut digits = 0; + + if s.starts_with("-0x") { + return Err("Invalid character in hexadecimal number"); + } else if s.starts_with("0x") { + // Hexadecimal. + for ch in s[2..].chars() { + match ch.to_digit(16) { + Some(digit) => { + digits += 1; + if digits > 16 { + return Err("Too many hexadecimal digits"); + } + // This can't overflow given the digit limit. + value = (value << 4) | u64::from(digit); + } + None => { + // Allow embedded underscores, but fail on anything else. + if ch != '_' { + return Err("Invalid character in hexadecimal number"); + } + } + } + } + } else { + // Decimal number, possibly negative. + for ch in s.chars() { + match ch.to_digit(16) { + Some(digit) => { + digits += 1; + match value.checked_mul(10) { + None => return Err("Too large decimal number"), + Some(v) => value = v, + } + match value.checked_add(u64::from(digit)) { + None => return Err("Too large decimal number"), + Some(v) => value = v, + } + } + None => { + // Allow embedded underscores, but fail on anything else. + if ch != '_' { + return Err("Invalid character in decimal number"); + } + } + } + } + } + + if digits == 0 { + return Err("No digits in number"); + } + + Ok(value) +} + +impl FromStr for Uimm64 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Uimm64`, formatted as above. + fn from_str(s: &str) -> Result { + parse_u64(s).map(Self::new) + } +} + +/// 8-bit unsigned integer immediate operand. +/// +/// This is used to indicate lane indexes typically. +pub type Uimm8 = u8; + +/// A 32-bit unsigned integer immediate operand. +/// +/// This is used to represent sizes of memory objects. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub struct Uimm32(u32); + +impl Into for Uimm32 { + fn into(self) -> u32 { + self.0 + } +} + +impl Into for Uimm32 { + fn into(self) -> i64 { + i64::from(self.0) + } +} + +impl From for Uimm32 { + fn from(x: u32) -> Self { + Self(x) + } +} + +impl Display for Uimm32 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if self.0 < 10_000 { + write!(f, "{}", self.0) + } else { + write_hex(u64::from(self.0), f) + } + } +} + +impl FromStr for Uimm32 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Uimm32`, formatted as above. + fn from_str(s: &str) -> Result { + parse_i64(s).and_then(|x| { + if 0 <= x && x <= i64::from(u32::MAX) { + Ok(Self(x as u32)) + } else { + Err("Uimm32 out of range") + } + }) + } +} + +/// A 128-bit immediate operand. +/// +/// This is used as an immediate value in SIMD instructions. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub struct V128Imm(pub [u8; 16]); + +impl V128Imm { + /// Iterate over the bytes in the constant. + pub fn bytes(&self) -> impl Iterator { + self.0.iter() + } + + /// Convert the immediate into a vector. + pub fn to_vec(self) -> Vec { + self.0.to_vec() + } + + /// Convert the immediate into a slice. + pub fn as_slice(&self) -> &[u8] { + &self.0[..] + } +} + +impl From<&[u8]> for V128Imm { + fn from(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 16); + let mut buffer = [0; 16]; + buffer.copy_from_slice(slice); + Self(buffer) + } +} + +/// 32-bit signed immediate offset. +/// +/// This is used to encode an immediate offset for load/store instructions. All supported ISAs have +/// a maximum load/store offset that fits in an `i32`. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] +pub struct Offset32(i32); + +impl Offset32 { + /// Create a new `Offset32` representing the signed number `x`. + pub fn new(x: i32) -> Self { + Self(x) + } + + /// Create a new `Offset32` representing the signed number `x` if possible. + pub fn try_from_i64(x: i64) -> Option { + let casted = x as i32; + if casted as i64 == x { + Some(Self::new(casted)) + } else { + None + } + } + + /// Add in the signed number `x` if possible. + pub fn try_add_i64(self, x: i64) -> Option { + let casted = x as i32; + if casted as i64 == x { + self.0.checked_add(casted).map(Self::new) + } else { + None + } + } +} + +impl Into for Offset32 { + fn into(self) -> i32 { + self.0 + } +} + +impl Into for Offset32 { + fn into(self) -> i64 { + i64::from(self.0) + } +} + +impl From for Offset32 { + fn from(x: i32) -> Self { + Self(x) + } +} + +impl Display for Offset32 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + // 0 displays as an empty offset. + if self.0 == 0 { + return Ok(()); + } + + // Always include a sign. + write!(f, "{}", if self.0 < 0 { '-' } else { '+' })?; + + let val = i64::from(self.0).abs(); + if val < 10_000 { + write!(f, "{}", val) + } else { + write_hex(val as u64, f) + } + } +} + +impl FromStr for Offset32 { + type Err = &'static str; + + // Parse a decimal or hexadecimal `Offset32`, formatted as above. + fn from_str(s: &str) -> Result { + if !(s.starts_with('-') || s.starts_with('+')) { + return Err("Offset must begin with sign"); + } + parse_i64(s).and_then(|x| { + if i64::from(i32::MIN) <= x && x <= i64::from(i32::MAX) { + Ok(Self::new(x as i32)) + } else { + Err("Offset out of range") + } + }) + } +} + +/// An IEEE binary32 immediate floating point value, represented as a u32 +/// containing the bit pattern. +/// +/// All bit patterns are allowed. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub struct Ieee32(u32); + +/// An IEEE binary64 immediate floating point value, represented as a u64 +/// containing the bit pattern. +/// +/// All bit patterns are allowed. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub struct Ieee64(u64); + +/// Format a floating point number in a way that is reasonably human-readable, and that can be +/// converted back to binary without any rounding issues. The hexadecimal formatting of normal and +/// subnormal numbers is compatible with C99 and the `printf "%a"` format specifier. The NaN and Inf +/// formats are not supported by C99. +/// +/// The encoding parameters are: +/// +/// w - exponent field width in bits +/// t - trailing significand field width in bits +/// +fn format_float(bits: u64, w: u8, t: u8, f: &mut Formatter) -> fmt::Result { + debug_assert!(w > 0 && w <= 16, "Invalid exponent range"); + debug_assert!(1 + w + t <= 64, "Too large IEEE format for u64"); + debug_assert!((t + w + 1).is_power_of_two(), "Unexpected IEEE format size"); + + let max_e_bits = (1u64 << w) - 1; + let t_bits = bits & ((1u64 << t) - 1); // Trailing significand. + let e_bits = (bits >> t) & max_e_bits; // Biased exponent. + let sign_bit = (bits >> (w + t)) & 1; + + let bias: i32 = (1 << (w - 1)) - 1; + let e = e_bits as i32 - bias; // Unbiased exponent. + let emin = 1 - bias; // Minimum exponent. + + // How many hexadecimal digits are needed for the trailing significand? + let digits = (t + 3) / 4; + // Trailing significand left-aligned in `digits` hexadecimal digits. + let left_t_bits = t_bits << (4 * digits - t); + + // All formats share the leading sign. + if sign_bit != 0 { + write!(f, "-")?; + } + + if e_bits == 0 { + if t_bits == 0 { + // Zero. + write!(f, "0.0") + } else { + // Subnormal. + write!( + f, + "0x0.{0:01$x}p{2}", + left_t_bits, + usize::from(digits), + emin + ) + } + } else if e_bits == max_e_bits { + // Always print a `+` or `-` sign for these special values. + // This makes them easier to parse as they can't be confused as identifiers. + if sign_bit == 0 { + write!(f, "+")?; + } + if t_bits == 0 { + // Infinity. + write!(f, "Inf") + } else { + // NaN. + let payload = t_bits & ((1 << (t - 1)) - 1); + if t_bits & (1 << (t - 1)) != 0 { + // Quiet NaN. + if payload != 0 { + write!(f, "NaN:0x{:x}", payload) + } else { + write!(f, "NaN") + } + } else { + // Signaling NaN. + write!(f, "sNaN:0x{:x}", payload) + } + } + } else { + // Normal number. + write!(f, "0x1.{0:01$x}p{2}", left_t_bits, usize::from(digits), e) + } +} + +/// Parse a float using the same format as `format_float` above. +/// +/// The encoding parameters are: +/// +/// w - exponent field width in bits +/// t - trailing significand field width in bits +/// +fn parse_float(s: &str, w: u8, t: u8) -> Result { + debug_assert!(w > 0 && w <= 16, "Invalid exponent range"); + debug_assert!(1 + w + t <= 64, "Too large IEEE format for u64"); + debug_assert!((t + w + 1).is_power_of_two(), "Unexpected IEEE format size"); + + let (sign_bit, s2) = if s.starts_with('-') { + (1u64 << (t + w), &s[1..]) + } else if s.starts_with('+') { + (0, &s[1..]) + } else { + (0, s) + }; + + if !s2.starts_with("0x") { + let max_e_bits = ((1u64 << w) - 1) << t; + let quiet_bit = 1u64 << (t - 1); + + // The only decimal encoding allowed is 0. + if s2 == "0.0" { + return Ok(sign_bit); + } + + if s2 == "Inf" { + // +/- infinity: e = max, t = 0. + return Ok(sign_bit | max_e_bits); + } + if s2 == "NaN" { + // Canonical quiet NaN: e = max, t = quiet. + return Ok(sign_bit | max_e_bits | quiet_bit); + } + if s2.starts_with("NaN:0x") { + // Quiet NaN with payload. + return match u64::from_str_radix(&s2[6..], 16) { + Ok(payload) if payload < quiet_bit => { + Ok(sign_bit | max_e_bits | quiet_bit | payload) + } + _ => Err("Invalid NaN payload"), + }; + } + if s2.starts_with("sNaN:0x") { + // Signaling NaN with payload. + return match u64::from_str_radix(&s2[7..], 16) { + Ok(payload) if 0 < payload && payload < quiet_bit => { + Ok(sign_bit | max_e_bits | payload) + } + _ => Err("Invalid sNaN payload"), + }; + } + + return Err("Float must be hexadecimal"); + } + let s3 = &s2[2..]; + + let mut digits = 0u8; + let mut digits_before_period: Option = None; + let mut significand = 0u64; + let mut exponent = 0i32; + + for (idx, ch) in s3.char_indices() { + match ch { + '.' => { + // This is the radix point. There can only be one. + if digits_before_period != None { + return Err("Multiple radix points"); + } else { + digits_before_period = Some(digits); + } + } + 'p' => { + // The following exponent is a decimal number. + let exp_str = &s3[1 + idx..]; + match exp_str.parse::() { + Ok(e) => { + exponent = i32::from(e); + break; + } + Err(_) => return Err("Bad exponent"), + } + } + _ => match ch.to_digit(16) { + Some(digit) => { + digits += 1; + if digits > 16 { + return Err("Too many digits"); + } + significand = (significand << 4) | u64::from(digit); + } + None => return Err("Invalid character"), + }, + } + } + + if digits == 0 { + return Err("No digits"); + } + + if significand == 0 { + // This is +/- 0.0. + return Ok(sign_bit); + } + + // Number of bits appearing after the radix point. + match digits_before_period { + None => {} // No radix point present. + Some(d) => exponent -= 4 * i32::from(digits - d), + }; + + // Normalize the significand and exponent. + let significant_bits = (64 - significand.leading_zeros()) as u8; + if significant_bits > t + 1 { + let adjust = significant_bits - (t + 1); + if significand & ((1u64 << adjust) - 1) != 0 { + return Err("Too many significant bits"); + } + // Adjust significand down. + significand >>= adjust; + exponent += i32::from(adjust); + } else { + let adjust = t + 1 - significant_bits; + significand <<= adjust; + exponent -= i32::from(adjust); + } + debug_assert_eq!(significand >> t, 1); + + // Trailing significand excludes the high bit. + let t_bits = significand & ((1 << t) - 1); + + let max_exp = (1i32 << w) - 2; + let bias: i32 = (1 << (w - 1)) - 1; + exponent += bias + i32::from(t); + + if exponent > max_exp { + Err("Magnitude too large") + } else if exponent > 0 { + // This is a normal number. + let e_bits = (exponent as u64) << t; + Ok(sign_bit | e_bits | t_bits) + } else if 1 - exponent <= i32::from(t) { + // This is a subnormal number: e = 0, t = significand bits. + // Renormalize significand for exponent = 1. + let adjust = 1 - exponent; + if significand & ((1u64 << adjust) - 1) != 0 { + Err("Subnormal underflow") + } else { + significand >>= adjust; + Ok(sign_bit | significand) + } + } else { + Err("Magnitude too small") + } +} + +impl Ieee32 { + /// Create a new `Ieee32` containing the bits of `x`. + pub fn with_bits(x: u32) -> Self { + Self(x) + } + + /// Create an `Ieee32` number representing `2.0^n`. + pub fn pow2>(n: I) -> Self { + let n = n.into(); + let w = 8; + let t = 23; + let bias = (1 << (w - 1)) - 1; + let exponent = (n + bias) as u32; + assert!(exponent > 0, "Underflow n={}", n); + assert!(exponent < (1 << w) + 1, "Overflow n={}", n); + Self(exponent << t) + } + + /// Create an `Ieee32` number representing the greatest negative value + /// not convertable from f32 to a signed integer with width n. + pub fn fcvt_to_sint_negative_overflow>(n: I) -> Self { + let n = n.into(); + debug_assert!(n < 32); + debug_assert!(23 + 1 - n < 32); + Self::with_bits((1u32 << (32 - 1)) | Self::pow2(n - 1).0 | (1u32 << (23 + 1 - n))) + } + + /// Return self negated. + pub fn neg(self) -> Self { + Self(self.0 ^ (1 << 31)) + } + + /// Create a new `Ieee32` representing the number `x`. + pub fn with_float(x: f32) -> Self { + Self(x.to_bits()) + } + + /// Get the bitwise representation. + pub fn bits(self) -> u32 { + self.0 + } +} + +impl Display for Ieee32 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let bits: u32 = self.0; + format_float(u64::from(bits), 8, 23, f) + } +} + +impl FromStr for Ieee32 { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match parse_float(s, 8, 23) { + Ok(b) => Ok(Self(b as u32)), + Err(s) => Err(s), + } + } +} + +impl From for Ieee32 { + fn from(x: f32) -> Self { + Self::with_float(x) + } +} + +impl IntoBytes for Ieee32 { + fn into_bytes(self) -> Vec { + self.0.to_le_bytes().to_vec() + } +} + +impl Ieee64 { + /// Create a new `Ieee64` containing the bits of `x`. + pub fn with_bits(x: u64) -> Self { + Self(x) + } + + /// Create an `Ieee64` number representing `2.0^n`. + pub fn pow2>(n: I) -> Self { + let n = n.into(); + let w = 11; + let t = 52; + let bias = (1 << (w - 1)) - 1; + let exponent = (n + bias) as u64; + assert!(exponent > 0, "Underflow n={}", n); + assert!(exponent < (1 << w) + 1, "Overflow n={}", n); + Self(exponent << t) + } + + /// Create an `Ieee64` number representing the greatest negative value + /// not convertable from f64 to a signed integer with width n. + pub fn fcvt_to_sint_negative_overflow>(n: I) -> Self { + let n = n.into(); + debug_assert!(n < 64); + debug_assert!(52 + 1 - n < 64); + Self::with_bits((1u64 << (64 - 1)) | Self::pow2(n - 1).0 | (1u64 << (52 + 1 - n))) + } + + /// Return self negated. + pub fn neg(self) -> Self { + Self(self.0 ^ (1 << 63)) + } + + /// Create a new `Ieee64` representing the number `x`. + pub fn with_float(x: f64) -> Self { + Self(x.to_bits()) + } + + /// Get the bitwise representation. + pub fn bits(self) -> u64 { + self.0 + } +} + +impl Display for Ieee64 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let bits: u64 = self.0; + format_float(bits, 11, 52, f) + } +} + +impl FromStr for Ieee64 { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match parse_float(s, 11, 52) { + Ok(b) => Ok(Self(b)), + Err(s) => Err(s), + } + } +} + +impl From for Ieee64 { + fn from(x: f64) -> Self { + Self::with_float(x) + } +} + +impl From for Ieee64 { + fn from(x: u64) -> Self { + Self::with_float(f64::from_bits(x)) + } +} + +impl IntoBytes for Ieee64 { + fn into_bytes(self) -> Vec { + self.0.to_le_bytes().to_vec() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + use core::fmt::Display; + use core::mem; + use core::str::FromStr; + use core::{f32, f64}; + + #[test] + fn format_imm64() { + assert_eq!(Imm64(0).to_string(), "0"); + assert_eq!(Imm64(9999).to_string(), "9999"); + assert_eq!(Imm64(10000).to_string(), "0x2710"); + assert_eq!(Imm64(-9999).to_string(), "-9999"); + assert_eq!(Imm64(-10000).to_string(), "0xffff_ffff_ffff_d8f0"); + assert_eq!(Imm64(0xffff).to_string(), "0xffff"); + assert_eq!(Imm64(0x10000).to_string(), "0x0001_0000"); + } + + #[test] + fn format_uimm64() { + assert_eq!(Uimm64(0).to_string(), "0"); + assert_eq!(Uimm64(9999).to_string(), "9999"); + assert_eq!(Uimm64(10000).to_string(), "0x2710"); + assert_eq!(Uimm64(-9999i64 as u64).to_string(), "0xffff_ffff_ffff_d8f1"); + assert_eq!( + Uimm64(-10000i64 as u64).to_string(), + "0xffff_ffff_ffff_d8f0" + ); + assert_eq!(Uimm64(0xffff).to_string(), "0xffff"); + assert_eq!(Uimm64(0x10000).to_string(), "0x0001_0000"); + } + + // Verify that `text` can be parsed as a `T` into a value that displays as `want`. + fn parse_ok(text: &str, want: &str) + where + ::Err: Display, + { + match text.parse::() { + Err(s) => panic!("\"{}\".parse() error: {}", text, s), + Ok(x) => assert_eq!(x.to_string(), want), + } + } + + // Verify that `text` fails to parse as `T` with the error `msg`. + fn parse_err(text: &str, msg: &str) + where + ::Err: Display, + { + match text.parse::() { + Err(s) => assert_eq!(s.to_string(), msg), + Ok(x) => panic!("Wanted Err({}), but got {}", msg, x), + } + } + + #[test] + fn parse_imm64() { + parse_ok::("0", "0"); + parse_ok::("1", "1"); + parse_ok::("-0", "0"); + parse_ok::("-1", "-1"); + parse_ok::("0x0", "0"); + parse_ok::("0xf", "15"); + parse_ok::("-0x9", "-9"); + + // Probe limits. + parse_ok::("0xffffffff_ffffffff", "-1"); + parse_ok::("0x80000000_00000000", "0x8000_0000_0000_0000"); + parse_ok::("-0x80000000_00000000", "0x8000_0000_0000_0000"); + parse_err::("-0x80000000_00000001", "Negative number too small"); + parse_ok::("18446744073709551615", "-1"); + parse_ok::("-9223372036854775808", "0x8000_0000_0000_0000"); + // Overflow both the `checked_add` and `checked_mul`. + parse_err::("18446744073709551616", "Too large decimal number"); + parse_err::("184467440737095516100", "Too large decimal number"); + parse_err::("-9223372036854775809", "Negative number too small"); + + // Underscores are allowed where digits go. + parse_ok::("0_0", "0"); + parse_ok::("-_10_0", "-100"); + parse_ok::("_10_", "10"); + parse_ok::("0x97_88_bb", "0x0097_88bb"); + parse_ok::("0x_97_", "151"); + + parse_err::("", "No digits in number"); + parse_err::("-", "No digits in number"); + parse_err::("_", "No digits in number"); + parse_err::("0x", "No digits in number"); + parse_err::("0x_", "No digits in number"); + parse_err::("-0x", "No digits in number"); + parse_err::(" ", "Invalid character in decimal number"); + parse_err::("0 ", "Invalid character in decimal number"); + parse_err::(" 0", "Invalid character in decimal number"); + parse_err::("--", "Invalid character in decimal number"); + parse_err::("-0x-", "Invalid character in hexadecimal number"); + + // Hex count overflow. + parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); + } + + #[test] + fn parse_uimm64() { + parse_ok::("0", "0"); + parse_ok::("1", "1"); + parse_ok::("0x0", "0"); + parse_ok::("0xf", "15"); + parse_ok::("0xffffffff_fffffff7", "0xffff_ffff_ffff_fff7"); + + // Probe limits. + parse_ok::("0xffffffff_ffffffff", "0xffff_ffff_ffff_ffff"); + parse_ok::("0x80000000_00000000", "0x8000_0000_0000_0000"); + parse_ok::("18446744073709551615", "0xffff_ffff_ffff_ffff"); + // Overflow both the `checked_add` and `checked_mul`. + parse_err::("18446744073709551616", "Too large decimal number"); + parse_err::("184467440737095516100", "Too large decimal number"); + + // Underscores are allowed where digits go. + parse_ok::("0_0", "0"); + parse_ok::("_10_", "10"); + parse_ok::("0x97_88_bb", "0x0097_88bb"); + parse_ok::("0x_97_", "151"); + + parse_err::("", "No digits in number"); + parse_err::("_", "No digits in number"); + parse_err::("0x", "No digits in number"); + parse_err::("0x_", "No digits in number"); + parse_err::("-", "Invalid character in decimal number"); + parse_err::("-0x", "Invalid character in hexadecimal number"); + parse_err::(" ", "Invalid character in decimal number"); + parse_err::("0 ", "Invalid character in decimal number"); + parse_err::(" 0", "Invalid character in decimal number"); + parse_err::("--", "Invalid character in decimal number"); + parse_err::("-0x-", "Invalid character in hexadecimal number"); + parse_err::("-0", "Invalid character in decimal number"); + parse_err::("-1", "Invalid character in decimal number"); + + // Hex count overflow. + parse_err::("0x0_0000_0000_0000_0000", "Too many hexadecimal digits"); + } + + #[test] + fn format_offset32() { + assert_eq!(Offset32(0).to_string(), ""); + assert_eq!(Offset32(1).to_string(), "+1"); + assert_eq!(Offset32(-1).to_string(), "-1"); + assert_eq!(Offset32(9999).to_string(), "+9999"); + assert_eq!(Offset32(10000).to_string(), "+0x2710"); + assert_eq!(Offset32(-9999).to_string(), "-9999"); + assert_eq!(Offset32(-10000).to_string(), "-0x2710"); + assert_eq!(Offset32(0xffff).to_string(), "+0xffff"); + assert_eq!(Offset32(0x10000).to_string(), "+0x0001_0000"); + } + + #[test] + fn parse_offset32() { + parse_ok::("+0", ""); + parse_ok::("+1", "+1"); + parse_ok::("-0", ""); + parse_ok::("-1", "-1"); + parse_ok::("+0x0", ""); + parse_ok::("+0xf", "+15"); + parse_ok::("-0x9", "-9"); + parse_ok::("-0x8000_0000", "-0x8000_0000"); + + parse_err::("+0x8000_0000", "Offset out of range"); + } + + #[test] + fn format_ieee32() { + assert_eq!(Ieee32::with_float(0.0).to_string(), "0.0"); + assert_eq!(Ieee32::with_float(-0.0).to_string(), "-0.0"); + assert_eq!(Ieee32::with_float(1.0).to_string(), "0x1.000000p0"); + assert_eq!(Ieee32::with_float(1.5).to_string(), "0x1.800000p0"); + assert_eq!(Ieee32::with_float(0.5).to_string(), "0x1.000000p-1"); + assert_eq!( + Ieee32::with_float(f32::EPSILON).to_string(), + "0x1.000000p-23" + ); + assert_eq!(Ieee32::with_float(f32::MIN).to_string(), "-0x1.fffffep127"); + assert_eq!(Ieee32::with_float(f32::MAX).to_string(), "0x1.fffffep127"); + // Smallest positive normal number. + assert_eq!( + Ieee32::with_float(f32::MIN_POSITIVE).to_string(), + "0x1.000000p-126" + ); + // Subnormals. + assert_eq!( + Ieee32::with_float(f32::MIN_POSITIVE / 2.0).to_string(), + "0x0.800000p-126" + ); + assert_eq!( + Ieee32::with_float(f32::MIN_POSITIVE * f32::EPSILON).to_string(), + "0x0.000002p-126" + ); + assert_eq!(Ieee32::with_float(f32::INFINITY).to_string(), "+Inf"); + assert_eq!(Ieee32::with_float(f32::NEG_INFINITY).to_string(), "-Inf"); + assert_eq!(Ieee32::with_float(f32::NAN).to_string(), "+NaN"); + assert_eq!(Ieee32::with_float(-f32::NAN).to_string(), "-NaN"); + // Construct some qNaNs with payloads. + assert_eq!(Ieee32(0x7fc00001).to_string(), "+NaN:0x1"); + assert_eq!(Ieee32(0x7ff00001).to_string(), "+NaN:0x300001"); + // Signaling NaNs. + assert_eq!(Ieee32(0x7f800001).to_string(), "+sNaN:0x1"); + assert_eq!(Ieee32(0x7fa00001).to_string(), "+sNaN:0x200001"); + } + + #[test] + fn parse_ieee32() { + parse_ok::("0.0", "0.0"); + parse_ok::("+0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.000000p0"); + parse_ok::("+0x1", "0x1.000000p0"); + parse_ok::("-0x1", "-0x1.000000p0"); + parse_ok::("0x10", "0x1.000000p4"); + parse_ok::("0x10.0", "0x1.000000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::("0x0.ffffff", "0x1.fffffep-1"); + parse_ok::("0x1.fffffe", "0x1.fffffep0"); + parse_ok::("0x3.fffffc", "0x1.fffffep1"); + parse_ok::("0x7.fffff8", "0x1.fffffep2"); + parse_ok::("0xf.fffff0", "0x1.fffffep3"); + parse_err::("0x1.ffffff", "Too many significant bits"); + parse_err::("0x1.fffffe0000000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.000000p3"); + parse_ok::("0x1p-3", "0x1.000000p-3"); + parse_ok::("0x1.0p3", "0x1.000000p3"); + parse_ok::("0x2.0p3", "0x1.000000p4"); + parse_ok::("0x1.0p127", "0x1.000000p127"); + parse_ok::("0x1.0p-126", "0x1.000000p-126"); + parse_ok::("0x0.1p-122", "0x1.000000p-126"); + parse_err::("0x2.0p127", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-127", "0x0.800000p-126"); + parse_ok::("0x1.0p-149", "0x0.000002p-126"); + parse_ok::("0x0.000002p-126", "0x0.000002p-126"); + parse_err::("0x0.100001p-126", "Subnormal underflow"); + parse_err::("0x1.8p-149", "Subnormal underflow"); + parse_err::("0x1.0p-150", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("+Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("+NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x000001", "+NaN:0x1"); + parse_ok::("NaN:0x300001", "+NaN:0x300001"); + parse_err::("NaN:0x400001", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::("sNaN:0x200001", "+sNaN:0x200001"); + parse_err::("sNaN:0x400001", "Invalid sNaN payload"); + } + + #[test] + fn pow2_ieee32() { + assert_eq!(Ieee32::pow2(0).to_string(), "0x1.000000p0"); + assert_eq!(Ieee32::pow2(1).to_string(), "0x1.000000p1"); + assert_eq!(Ieee32::pow2(-1).to_string(), "0x1.000000p-1"); + assert_eq!(Ieee32::pow2(127).to_string(), "0x1.000000p127"); + assert_eq!(Ieee32::pow2(-126).to_string(), "0x1.000000p-126"); + + assert_eq!(Ieee32::pow2(1).neg().to_string(), "-0x1.000000p1"); + } + + #[test] + fn fcvt_to_sint_negative_overflow_ieee32() { + for n in &[8, 16] { + assert_eq!(-((1u32 << (n - 1)) as f32) - 1.0, unsafe { + mem::transmute(Ieee32::fcvt_to_sint_negative_overflow(*n)) + }); + } + } + + #[test] + fn format_ieee64() { + assert_eq!(Ieee64::with_float(0.0).to_string(), "0.0"); + assert_eq!(Ieee64::with_float(-0.0).to_string(), "-0.0"); + assert_eq!(Ieee64::with_float(1.0).to_string(), "0x1.0000000000000p0"); + assert_eq!(Ieee64::with_float(1.5).to_string(), "0x1.8000000000000p0"); + assert_eq!(Ieee64::with_float(0.5).to_string(), "0x1.0000000000000p-1"); + assert_eq!( + Ieee64::with_float(f64::EPSILON).to_string(), + "0x1.0000000000000p-52" + ); + assert_eq!( + Ieee64::with_float(f64::MIN).to_string(), + "-0x1.fffffffffffffp1023" + ); + assert_eq!( + Ieee64::with_float(f64::MAX).to_string(), + "0x1.fffffffffffffp1023" + ); + // Smallest positive normal number. + assert_eq!( + Ieee64::with_float(f64::MIN_POSITIVE).to_string(), + "0x1.0000000000000p-1022" + ); + // Subnormals. + assert_eq!( + Ieee64::with_float(f64::MIN_POSITIVE / 2.0).to_string(), + "0x0.8000000000000p-1022" + ); + assert_eq!( + Ieee64::with_float(f64::MIN_POSITIVE * f64::EPSILON).to_string(), + "0x0.0000000000001p-1022" + ); + assert_eq!(Ieee64::with_float(f64::INFINITY).to_string(), "+Inf"); + assert_eq!(Ieee64::with_float(f64::NEG_INFINITY).to_string(), "-Inf"); + assert_eq!(Ieee64::with_float(f64::NAN).to_string(), "+NaN"); + assert_eq!(Ieee64::with_float(-f64::NAN).to_string(), "-NaN"); + // Construct some qNaNs with payloads. + assert_eq!(Ieee64(0x7ff8000000000001).to_string(), "+NaN:0x1"); + assert_eq!( + Ieee64(0x7ffc000000000001).to_string(), + "+NaN:0x4000000000001" + ); + // Signaling NaNs. + assert_eq!(Ieee64(0x7ff0000000000001).to_string(), "+sNaN:0x1"); + assert_eq!( + Ieee64(0x7ff4000000000001).to_string(), + "+sNaN:0x4000000000001" + ); + } + + #[test] + fn parse_ieee64() { + parse_ok::("0.0", "0.0"); + parse_ok::("-0.0", "-0.0"); + parse_ok::("0x0", "0.0"); + parse_ok::("0x0.0", "0.0"); + parse_ok::("0x.0", "0.0"); + parse_ok::("0x0.", "0.0"); + parse_ok::("0x1", "0x1.0000000000000p0"); + parse_ok::("-0x1", "-0x1.0000000000000p0"); + parse_ok::("0x10", "0x1.0000000000000p4"); + parse_ok::("0x10.0", "0x1.0000000000000p4"); + parse_err::("0.", "Float must be hexadecimal"); + parse_err::(".0", "Float must be hexadecimal"); + parse_err::("0", "Float must be hexadecimal"); + parse_err::("-0", "Float must be hexadecimal"); + parse_err::(".", "Float must be hexadecimal"); + parse_err::("", "Float must be hexadecimal"); + parse_err::("-", "Float must be hexadecimal"); + parse_err::("0x", "No digits"); + parse_err::("0x..", "Multiple radix points"); + + // Check significant bits. + parse_ok::("0x0.fffffffffffff8", "0x1.fffffffffffffp-1"); + parse_ok::("0x1.fffffffffffff", "0x1.fffffffffffffp0"); + parse_ok::("0x3.ffffffffffffe", "0x1.fffffffffffffp1"); + parse_ok::("0x7.ffffffffffffc", "0x1.fffffffffffffp2"); + parse_ok::("0xf.ffffffffffff8", "0x1.fffffffffffffp3"); + parse_err::("0x3.fffffffffffff", "Too many significant bits"); + parse_err::("0x001.fffffe00000000", "Too many digits"); + + // Exponents. + parse_ok::("0x1p3", "0x1.0000000000000p3"); + parse_ok::("0x1p-3", "0x1.0000000000000p-3"); + parse_ok::("0x1.0p3", "0x1.0000000000000p3"); + parse_ok::("0x2.0p3", "0x1.0000000000000p4"); + parse_ok::("0x1.0p1023", "0x1.0000000000000p1023"); + parse_ok::("0x1.0p-1022", "0x1.0000000000000p-1022"); + parse_ok::("0x0.1p-1018", "0x1.0000000000000p-1022"); + parse_err::("0x2.0p1023", "Magnitude too large"); + + // Subnormals. + parse_ok::("0x1.0p-1023", "0x0.8000000000000p-1022"); + parse_ok::("0x1.0p-1074", "0x0.0000000000001p-1022"); + parse_ok::("0x0.0000000000001p-1022", "0x0.0000000000001p-1022"); + parse_err::("0x0.10000000000008p-1022", "Subnormal underflow"); + parse_err::("0x1.8p-1074", "Subnormal underflow"); + parse_err::("0x1.0p-1075", "Magnitude too small"); + + // NaNs and Infs. + parse_ok::("Inf", "+Inf"); + parse_ok::("-Inf", "-Inf"); + parse_ok::("NaN", "+NaN"); + parse_ok::("-NaN", "-NaN"); + parse_ok::("NaN:0x0", "+NaN"); + parse_err::("NaN:", "Float must be hexadecimal"); + parse_err::("NaN:0", "Float must be hexadecimal"); + parse_err::("NaN:0x", "Invalid NaN payload"); + parse_ok::("NaN:0x000001", "+NaN:0x1"); + parse_ok::("NaN:0x4000000000001", "+NaN:0x4000000000001"); + parse_err::("NaN:0x8000000000001", "Invalid NaN payload"); + parse_ok::("sNaN:0x1", "+sNaN:0x1"); + parse_err::("sNaN:0x0", "Invalid sNaN payload"); + parse_ok::("sNaN:0x4000000000001", "+sNaN:0x4000000000001"); + parse_err::("sNaN:0x8000000000001", "Invalid sNaN payload"); + } + + #[test] + fn pow2_ieee64() { + assert_eq!(Ieee64::pow2(0).to_string(), "0x1.0000000000000p0"); + assert_eq!(Ieee64::pow2(1).to_string(), "0x1.0000000000000p1"); + assert_eq!(Ieee64::pow2(-1).to_string(), "0x1.0000000000000p-1"); + assert_eq!(Ieee64::pow2(1023).to_string(), "0x1.0000000000000p1023"); + assert_eq!(Ieee64::pow2(-1022).to_string(), "0x1.0000000000000p-1022"); + + assert_eq!(Ieee64::pow2(1).neg().to_string(), "-0x1.0000000000000p1"); + } + + #[test] + fn fcvt_to_sint_negative_overflow_ieee64() { + for n in &[8, 16, 32] { + assert_eq!(-((1u64 << (n - 1)) as f64) - 1.0, unsafe { + mem::transmute(Ieee64::fcvt_to_sint_negative_overflow(*n)) + }); + } + } +} diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs new file mode 100644 index 0000000000..afe0266fe4 --- /dev/null +++ b/cranelift/codegen/src/ir/instructions.rs @@ -0,0 +1,714 @@ +//! Instruction formats and opcodes. +//! +//! The `instructions` module contains definitions for instruction formats, opcodes, and the +//! in-memory representation of IR instructions. +//! +//! A large part of this module is auto-generated from the instruction descriptions in the meta +//! directory. + +use alloc::vec::Vec; +use core::fmt::{self, Display, Formatter}; +use core::ops::{Deref, DerefMut}; +use core::str::FromStr; + +use crate::ir; +use crate::ir::types; +use crate::ir::{Block, FuncRef, JumpTable, SigRef, Type, Value}; +use crate::isa; + +use crate::bitset::BitSet; +use crate::entity; + +/// Some instructions use an external list of argument values because there is not enough space in +/// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in +/// `dfg.value_lists`. +pub type ValueList = entity::EntityList; + +/// Memory pool for holding value lists. See `ValueList`. +pub type ValueListPool = entity::ListPool; + +// Include code generated by `cranelift-codegen/meta/src/gen_inst.rs`. This file contains: +// +// - The `pub enum InstructionFormat` enum with all the instruction formats. +// - The `pub enum InstructionData` enum with all the instruction data fields. +// - The `pub enum Opcode` definition with all known opcodes, +// - The `const OPCODE_FORMAT: [InstructionFormat; N]` table. +// - The private `fn opcode_name(Opcode) -> &'static str` function, and +// - The hash table `const OPCODE_HASH_TABLE: [Opcode; N]`. +// +// For value type constraints: +// +// - The `const OPCODE_CONSTRAINTS : [OpcodeConstraints; N]` table. +// - The `const TYPE_SETS : [ValueTypeSet; N]` table. +// - The `const OPERAND_CONSTRAINTS : [OperandConstraint; N]` table. +// +include!(concat!(env!("OUT_DIR"), "/opcodes.rs")); + +impl Display for Opcode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}", opcode_name(*self)) + } +} + +impl Opcode { + /// Get the instruction format for this opcode. + pub fn format(self) -> InstructionFormat { + OPCODE_FORMAT[self as usize - 1] + } + + /// Get the constraint descriptor for this opcode. + /// Panic if this is called on `NotAnOpcode`. + pub fn constraints(self) -> OpcodeConstraints { + OPCODE_CONSTRAINTS[self as usize - 1] + } +} + +// This trait really belongs in cranelift-reader where it is used by the `.clif` file parser, but since +// it critically depends on the `opcode_name()` function which is needed here anyway, it lives in +// this module. This also saves us from running the build script twice to generate code for the two +// separate crates. +impl FromStr for Opcode { + type Err = &'static str; + + /// Parse an Opcode name from a string. + fn from_str(s: &str) -> Result { + use crate::constant_hash::{probe, simple_hash, Table}; + + impl<'a> Table<&'a str> for [Option] { + fn len(&self) -> usize { + self.len() + } + + fn key(&self, idx: usize) -> Option<&'a str> { + self[idx].map(opcode_name) + } + } + + match probe::<&str, [Option]>(&OPCODE_HASH_TABLE, s, simple_hash(s)) { + Err(_) => Err("Unknown opcode"), + // We unwrap here because probe() should have ensured that the entry + // at this index is not None. + Ok(i) => Ok(OPCODE_HASH_TABLE[i].unwrap()), + } + } +} + +/// A variable list of `Value` operands used for function call arguments and passing arguments to +/// basic blocks. +#[derive(Clone, Debug)] +pub struct VariableArgs(Vec); + +impl VariableArgs { + /// Create an empty argument list. + pub fn new() -> Self { + Self(Vec::new()) + } + + /// Add an argument to the end. + pub fn push(&mut self, v: Value) { + self.0.push(v) + } + + /// Check if the list is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Convert this to a value list in `pool` with `fixed` prepended. + pub fn into_value_list(self, fixed: &[Value], pool: &mut ValueListPool) -> ValueList { + let mut vlist = ValueList::default(); + vlist.extend(fixed.iter().cloned(), pool); + vlist.extend(self.0, pool); + vlist + } +} + +// Coerce `VariableArgs` into a `&[Value]` slice. +impl Deref for VariableArgs { + type Target = [Value]; + + fn deref(&self) -> &[Value] { + &self.0 + } +} + +impl DerefMut for VariableArgs { + fn deref_mut(&mut self) -> &mut [Value] { + &mut self.0 + } +} + +impl Display for VariableArgs { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + for (i, val) in self.0.iter().enumerate() { + if i == 0 { + write!(fmt, "{}", val)?; + } else { + write!(fmt, ", {}", val)?; + } + } + Ok(()) + } +} + +impl Default for VariableArgs { + fn default() -> Self { + Self::new() + } +} + +/// Analyzing an instruction. +/// +/// Avoid large matches on instruction formats by using the methods defined here to examine +/// instructions. +impl InstructionData { + /// Return information about the destination of a branch or jump instruction. + /// + /// Any instruction that can transfer control to another block reveals its possible destinations + /// here. + pub fn analyze_branch<'a>(&'a self, pool: &'a ValueListPool) -> BranchInfo<'a> { + match *self { + Self::Jump { + destination, + ref args, + .. + } => BranchInfo::SingleDest(destination, args.as_slice(pool)), + Self::BranchInt { + destination, + ref args, + .. + } + | Self::BranchFloat { + destination, + ref args, + .. + } + | Self::Branch { + destination, + ref args, + .. + } => BranchInfo::SingleDest(destination, &args.as_slice(pool)[1..]), + Self::BranchIcmp { + destination, + ref args, + .. + } => BranchInfo::SingleDest(destination, &args.as_slice(pool)[2..]), + Self::BranchTable { + table, destination, .. + } => BranchInfo::Table(table, Some(destination)), + Self::IndirectJump { table, .. } => BranchInfo::Table(table, None), + _ => { + debug_assert!(!self.opcode().is_branch()); + BranchInfo::NotABranch + } + } + } + + /// Get the single destination of this branch instruction, if it is a single destination + /// branch or jump. + /// + /// Multi-destination branches like `br_table` return `None`. + pub fn branch_destination(&self) -> Option { + match *self { + Self::Jump { destination, .. } + | Self::Branch { destination, .. } + | Self::BranchInt { destination, .. } + | Self::BranchFloat { destination, .. } + | Self::BranchIcmp { destination, .. } => Some(destination), + Self::BranchTable { .. } | Self::IndirectJump { .. } => None, + _ => { + debug_assert!(!self.opcode().is_branch()); + None + } + } + } + + /// Get a mutable reference to the single destination of this branch instruction, if it is a + /// single destination branch or jump. + /// + /// Multi-destination branches like `br_table` return `None`. + pub fn branch_destination_mut(&mut self) -> Option<&mut Block> { + match *self { + Self::Jump { + ref mut destination, + .. + } + | Self::Branch { + ref mut destination, + .. + } + | Self::BranchInt { + ref mut destination, + .. + } + | Self::BranchFloat { + ref mut destination, + .. + } + | Self::BranchIcmp { + ref mut destination, + .. + } => Some(destination), + Self::BranchTable { .. } => None, + _ => { + debug_assert!(!self.opcode().is_branch()); + None + } + } + } + + /// Return information about a call instruction. + /// + /// Any instruction that can call another function reveals its call signature here. + pub fn analyze_call<'a>(&'a self, pool: &'a ValueListPool) -> CallInfo<'a> { + match *self { + Self::Call { + func_ref, ref args, .. + } => CallInfo::Direct(func_ref, args.as_slice(pool)), + Self::CallIndirect { + sig_ref, ref args, .. + } => CallInfo::Indirect(sig_ref, &args.as_slice(pool)[1..]), + _ => { + debug_assert!(!self.opcode().is_call()); + CallInfo::NotACall + } + } + } +} + +/// Information about branch and jump instructions. +pub enum BranchInfo<'a> { + /// This is not a branch or jump instruction. + /// This instruction will not transfer control to another block in the function, but it may still + /// affect control flow by returning or trapping. + NotABranch, + + /// This is a branch or jump to a single destination block, possibly taking value arguments. + SingleDest(Block, &'a [Value]), + + /// This is a jump table branch which can have many destination blocks and maybe one default block. + Table(JumpTable, Option), +} + +/// Information about call instructions. +pub enum CallInfo<'a> { + /// This is not a call instruction. + NotACall, + + /// This is a direct call to an external function declared in the preamble. See + /// `DataFlowGraph.ext_funcs`. + Direct(FuncRef, &'a [Value]), + + /// This is an indirect call with the specified signature. See `DataFlowGraph.signatures`. + Indirect(SigRef, &'a [Value]), +} + +/// Value type constraints for a given opcode. +/// +/// The `InstructionFormat` determines the constraints on most operands, but `Value` operands and +/// results are not determined by the format. Every `Opcode` has an associated +/// `OpcodeConstraints` object that provides the missing details. +#[derive(Clone, Copy)] +pub struct OpcodeConstraints { + /// Flags for this opcode encoded as a bit field: + /// + /// Bits 0-2: + /// Number of fixed result values. This does not include `variable_args` results as are + /// produced by call instructions. + /// + /// Bit 3: + /// This opcode is polymorphic and the controlling type variable can be inferred from the + /// designated input operand. This is the `typevar_operand` index given to the + /// `InstructionFormat` meta language object. When this bit is not set, the controlling + /// type variable must be the first output value instead. + /// + /// Bit 4: + /// This opcode is polymorphic and the controlling type variable does *not* appear as the + /// first result type. + /// + /// Bits 5-7: + /// Number of fixed value arguments. The minimum required number of value operands. + flags: u8, + + /// Permitted set of types for the controlling type variable as an index into `TYPE_SETS`. + typeset_offset: u8, + + /// Offset into `OPERAND_CONSTRAINT` table of the descriptors for this opcode. The first + /// `num_fixed_results()` entries describe the result constraints, then follows constraints for + /// the fixed `Value` input operands. (`num_fixed_value_arguments()` of them). + constraint_offset: u16, +} + +impl OpcodeConstraints { + /// Can the controlling type variable for this opcode be inferred from the designated value + /// input operand? + /// This also implies that this opcode is polymorphic. + pub fn use_typevar_operand(self) -> bool { + (self.flags & 0x8) != 0 + } + + /// Is it necessary to look at the designated value input operand in order to determine the + /// controlling type variable, or is it good enough to use the first return type? + /// + /// Most polymorphic instructions produce a single result with the type of the controlling type + /// variable. A few polymorphic instructions either don't produce any results, or produce + /// results with a fixed type. These instructions return `true`. + pub fn requires_typevar_operand(self) -> bool { + (self.flags & 0x10) != 0 + } + + /// Get the number of *fixed* result values produced by this opcode. + /// This does not include `variable_args` produced by calls. + pub fn num_fixed_results(self) -> usize { + (self.flags & 0x7) as usize + } + + /// Get the number of *fixed* input values required by this opcode. + /// + /// This does not include `variable_args` arguments on call and branch instructions. + /// + /// The number of fixed input values is usually implied by the instruction format, but + /// instruction formats that use a `ValueList` put both fixed and variable arguments in the + /// list. This method returns the *minimum* number of values required in the value list. + pub fn num_fixed_value_arguments(self) -> usize { + ((self.flags >> 5) & 0x7) as usize + } + + /// Get the offset into `TYPE_SETS` for the controlling type variable. + /// Returns `None` if the instruction is not polymorphic. + fn typeset_offset(self) -> Option { + let offset = usize::from(self.typeset_offset); + if offset < TYPE_SETS.len() { + Some(offset) + } else { + None + } + } + + /// Get the offset into OPERAND_CONSTRAINTS where the descriptors for this opcode begin. + fn constraint_offset(self) -> usize { + self.constraint_offset as usize + } + + /// Get the value type of result number `n`, having resolved the controlling type variable to + /// `ctrl_type`. + pub fn result_type(self, n: usize, ctrl_type: Type) -> Type { + debug_assert!(n < self.num_fixed_results(), "Invalid result index"); + if let ResolvedConstraint::Bound(t) = + OPERAND_CONSTRAINTS[self.constraint_offset() + n].resolve(ctrl_type) + { + t + } else { + panic!("Result constraints can't be free"); + } + } + + /// Get the value type of input value number `n`, having resolved the controlling type variable + /// to `ctrl_type`. + /// + /// Unlike results, it is possible for some input values to vary freely within a specific + /// `ValueTypeSet`. This is represented with the `ArgumentConstraint::Free` variant. + pub fn value_argument_constraint(self, n: usize, ctrl_type: Type) -> ResolvedConstraint { + debug_assert!( + n < self.num_fixed_value_arguments(), + "Invalid value argument index" + ); + let offset = self.constraint_offset() + self.num_fixed_results(); + OPERAND_CONSTRAINTS[offset + n].resolve(ctrl_type) + } + + /// Get the typeset of allowed types for the controlling type variable in a polymorphic + /// instruction. + pub fn ctrl_typeset(self) -> Option { + self.typeset_offset().map(|offset| TYPE_SETS[offset]) + } + + /// Is this instruction polymorphic? + pub fn is_polymorphic(self) -> bool { + self.ctrl_typeset().is_some() + } +} + +type BitSet8 = BitSet; +type BitSet16 = BitSet; + +/// A value type set describes the permitted set of types for a type variable. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ValueTypeSet { + /// Allowed lane sizes + pub lanes: BitSet16, + /// Allowed int widths + pub ints: BitSet8, + /// Allowed float widths + pub floats: BitSet8, + /// Allowed bool widths + pub bools: BitSet8, + /// Allowed ref widths + pub refs: BitSet8, +} + +impl ValueTypeSet { + /// Is `scalar` part of the base type set? + /// + /// Note that the base type set does not have to be included in the type set proper. + fn is_base_type(self, scalar: Type) -> bool { + let l2b = scalar.log2_lane_bits(); + if scalar.is_int() { + self.ints.contains(l2b) + } else if scalar.is_float() { + self.floats.contains(l2b) + } else if scalar.is_bool() { + self.bools.contains(l2b) + } else if scalar.is_ref() { + self.refs.contains(l2b) + } else { + false + } + } + + /// Does `typ` belong to this set? + pub fn contains(self, typ: Type) -> bool { + let l2l = typ.log2_lane_count(); + self.lanes.contains(l2l) && self.is_base_type(typ.lane_type()) + } + + /// Get an example member of this type set. + /// + /// This is used for error messages to avoid suggesting invalid types. + pub fn example(self) -> Type { + let t = if self.ints.max().unwrap_or(0) > 5 { + types::I32 + } else if self.floats.max().unwrap_or(0) > 5 { + types::F32 + } else if self.bools.max().unwrap_or(0) > 5 { + types::B32 + } else { + types::B1 + }; + t.by(1 << self.lanes.min().unwrap()).unwrap() + } +} + +/// Operand constraints. This describes the value type constraints on a single `Value` operand. +enum OperandConstraint { + /// This operand has a concrete value type. + Concrete(Type), + + /// This operand can vary freely within the given type set. + /// The type set is identified by its index into the TYPE_SETS constant table. + Free(u8), + + /// This operand is the same type as the controlling type variable. + Same, + + /// This operand is `ctrlType.lane_of()`. + LaneOf, + + /// This operand is `ctrlType.as_bool()`. + AsBool, + + /// This operand is `ctrlType.half_width()`. + HalfWidth, + + /// This operand is `ctrlType.double_width()`. + DoubleWidth, + + /// This operand is `ctrlType.half_vector()`. + HalfVector, + + /// This operand is `ctrlType.double_vector()`. + DoubleVector, +} + +impl OperandConstraint { + /// Resolve this operand constraint into a concrete value type, given the value of the + /// controlling type variable. + pub fn resolve(&self, ctrl_type: Type) -> ResolvedConstraint { + use self::OperandConstraint::*; + use self::ResolvedConstraint::Bound; + match *self { + Concrete(t) => Bound(t), + Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]), + Same => Bound(ctrl_type), + LaneOf => Bound(ctrl_type.lane_of()), + AsBool => Bound(ctrl_type.as_bool()), + HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")), + DoubleWidth => Bound( + ctrl_type + .double_width() + .expect("invalid type for double_width"), + ), + HalfVector => Bound( + ctrl_type + .half_vector() + .expect("invalid type for half_vector"), + ), + DoubleVector => Bound(ctrl_type.by(2).expect("invalid type for double_vector")), + } + } +} + +/// The type constraint on a value argument once the controlling type variable is known. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ResolvedConstraint { + /// The operand is bound to a known type. + Bound(Type), + /// The operand type can vary freely within the given set. + Free(ValueTypeSet), +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn opcodes() { + use core::mem; + + let x = Opcode::Iadd; + let mut y = Opcode::Isub; + + assert!(x != y); + y = Opcode::Iadd; + assert_eq!(x, y); + assert_eq!(x.format(), InstructionFormat::Binary); + + assert_eq!(format!("{:?}", Opcode::IaddImm), "IaddImm"); + assert_eq!(Opcode::IaddImm.to_string(), "iadd_imm"); + + // Check the matcher. + assert_eq!("iadd".parse::(), Ok(Opcode::Iadd)); + assert_eq!("iadd_imm".parse::(), Ok(Opcode::IaddImm)); + assert_eq!("iadd\0".parse::(), Err("Unknown opcode")); + assert_eq!("".parse::(), Err("Unknown opcode")); + assert_eq!("\0".parse::(), Err("Unknown opcode")); + + // Opcode is a single byte, and because Option originally came to 2 bytes, early on + // Opcode included a variant NotAnOpcode to avoid the unnecessary bloat. Since then the Rust + // compiler has brought in NonZero optimization, meaning that an enum not using the 0 value + // can be optional for no size cost. We want to ensure Option remains small. + assert_eq!(mem::size_of::(), mem::size_of::>()); + } + + #[test] + fn instruction_data() { + use core::mem; + // The size of the `InstructionData` enum is important for performance. It should not + // exceed 16 bytes. Use `Box` out-of-line payloads for instruction formats that + // require more space than that. It would be fine with a data structure smaller than 16 + // bytes, but what are the odds of that? + assert_eq!(mem::size_of::(), 16); + } + + #[test] + fn constraints() { + let a = Opcode::Iadd.constraints(); + assert!(a.use_typevar_operand()); + assert!(!a.requires_typevar_operand()); + assert_eq!(a.num_fixed_results(), 1); + assert_eq!(a.num_fixed_value_arguments(), 2); + assert_eq!(a.result_type(0, types::I32), types::I32); + assert_eq!(a.result_type(0, types::I8), types::I8); + assert_eq!( + a.value_argument_constraint(0, types::I32), + ResolvedConstraint::Bound(types::I32) + ); + assert_eq!( + a.value_argument_constraint(1, types::I32), + ResolvedConstraint::Bound(types::I32) + ); + + let b = Opcode::Bitcast.constraints(); + assert!(!b.use_typevar_operand()); + assert!(!b.requires_typevar_operand()); + assert_eq!(b.num_fixed_results(), 1); + assert_eq!(b.num_fixed_value_arguments(), 1); + assert_eq!(b.result_type(0, types::I32), types::I32); + assert_eq!(b.result_type(0, types::I8), types::I8); + match b.value_argument_constraint(0, types::I32) { + ResolvedConstraint::Free(vts) => assert!(vts.contains(types::F32)), + _ => panic!("Unexpected constraint from value_argument_constraint"), + } + + let c = Opcode::Call.constraints(); + assert_eq!(c.num_fixed_results(), 0); + assert_eq!(c.num_fixed_value_arguments(), 0); + + let i = Opcode::CallIndirect.constraints(); + assert_eq!(i.num_fixed_results(), 0); + assert_eq!(i.num_fixed_value_arguments(), 1); + + let cmp = Opcode::Icmp.constraints(); + assert!(cmp.use_typevar_operand()); + assert!(cmp.requires_typevar_operand()); + assert_eq!(cmp.num_fixed_results(), 1); + assert_eq!(cmp.num_fixed_value_arguments(), 2); + } + + #[test] + fn value_set() { + use crate::ir::types::*; + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(0, 8), + ints: BitSet8::from_range(4, 7), + floats: BitSet8::from_range(0, 0), + bools: BitSet8::from_range(3, 7), + refs: BitSet8::from_range(5, 7), + }; + assert!(!vts.contains(I8)); + assert!(vts.contains(I32)); + assert!(vts.contains(I64)); + assert!(vts.contains(I32X4)); + assert!(!vts.contains(F32)); + assert!(!vts.contains(B1)); + assert!(vts.contains(B8)); + assert!(vts.contains(B64)); + assert!(vts.contains(R32)); + assert!(vts.contains(R64)); + assert_eq!(vts.example().to_string(), "i32"); + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(0, 8), + ints: BitSet8::from_range(0, 0), + floats: BitSet8::from_range(5, 7), + bools: BitSet8::from_range(3, 7), + refs: BitSet8::from_range(0, 0), + }; + assert_eq!(vts.example().to_string(), "f32"); + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(1, 8), + ints: BitSet8::from_range(0, 0), + floats: BitSet8::from_range(5, 7), + bools: BitSet8::from_range(3, 7), + refs: BitSet8::from_range(0, 0), + }; + assert_eq!(vts.example().to_string(), "f32x2"); + + let vts = ValueTypeSet { + lanes: BitSet16::from_range(2, 8), + ints: BitSet8::from_range(0, 0), + floats: BitSet8::from_range(0, 0), + bools: BitSet8::from_range(3, 7), + refs: BitSet8::from_range(0, 0), + }; + assert!(!vts.contains(B32X2)); + assert!(vts.contains(B32X4)); + assert_eq!(vts.example().to_string(), "b32x4"); + + let vts = ValueTypeSet { + // TypeSet(lanes=(1, 256), ints=(8, 64)) + lanes: BitSet16::from_range(0, 9), + ints: BitSet8::from_range(3, 7), + floats: BitSet8::from_range(0, 0), + bools: BitSet8::from_range(0, 0), + refs: BitSet8::from_range(0, 0), + }; + assert!(vts.contains(I32)); + assert!(vts.contains(I32X4)); + assert!(!vts.contains(R32)); + assert!(!vts.contains(R64)); + } +} diff --git a/cranelift/codegen/src/ir/jumptable.rs b/cranelift/codegen/src/ir/jumptable.rs new file mode 100644 index 0000000000..a0596728a3 --- /dev/null +++ b/cranelift/codegen/src/ir/jumptable.rs @@ -0,0 +1,119 @@ +//! Jump table representation. +//! +//! Jump tables are declared in the preamble and assigned an `ir::entities::JumpTable` reference. +//! The actual table of destinations is stored in a `JumpTableData` struct defined in this module. + +use crate::ir::entities::Block; +use alloc::vec::Vec; +use core::fmt::{self, Display, Formatter}; +use core::slice::{Iter, IterMut}; + +/// Contents of a jump table. +/// +/// All jump tables use 0-based indexing and are densely populated. +#[derive(Clone)] +pub struct JumpTableData { + // Table entries. + table: Vec, +} + +impl JumpTableData { + /// Create a new empty jump table. + pub fn new() -> Self { + Self { table: Vec::new() } + } + + /// Create a new empty jump table with the specified capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self { + table: Vec::with_capacity(capacity), + } + } + + /// Get the number of table entries. + pub fn len(&self) -> usize { + self.table.len() + } + + /// Append a table entry. + pub fn push_entry(&mut self, dest: Block) { + self.table.push(dest) + } + + /// Checks if any of the entries branch to `block`. + pub fn branches_to(&self, block: Block) -> bool { + self.table.iter().any(|target_block| *target_block == block) + } + + /// Access the whole table as a slice. + pub fn as_slice(&self) -> &[Block] { + self.table.as_slice() + } + + /// Access the whole table as a mutable slice. + pub fn as_mut_slice(&mut self) -> &mut [Block] { + self.table.as_mut_slice() + } + + /// Returns an iterator over the table. + pub fn iter(&self) -> Iter { + self.table.iter() + } + + /// Returns an iterator that allows modifying each value. + pub fn iter_mut(&mut self) -> IterMut { + self.table.iter_mut() + } +} + +impl Display for JumpTableData { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + write!(fmt, "jump_table [")?; + match self.table.first() { + None => (), + Some(first) => write!(fmt, "{}", first)?, + } + for block in self.table.iter().skip(1) { + write!(fmt, ", {}", block)?; + } + write!(fmt, "]") + } +} + +#[cfg(test)] +mod tests { + use super::JumpTableData; + use crate::entity::EntityRef; + use crate::ir::Block; + use alloc::string::ToString; + + #[test] + fn empty() { + let jt = JumpTableData::new(); + + assert_eq!(jt.as_slice().get(0), None); + assert_eq!(jt.as_slice().get(10), None); + + assert_eq!(jt.to_string(), "jump_table []"); + + let v = jt.as_slice(); + assert_eq!(v, []); + } + + #[test] + fn insert() { + let e1 = Block::new(1); + let e2 = Block::new(2); + + let mut jt = JumpTableData::new(); + + jt.push_entry(e1); + jt.push_entry(e2); + jt.push_entry(e1); + + assert_eq!(jt.to_string(), "jump_table [block1, block2, block1]"); + + let v = jt.as_slice(); + assert_eq!(v, [e1, e2, e1]); + } +} diff --git a/cranelift/codegen/src/ir/layout.rs b/cranelift/codegen/src/ir/layout.rs new file mode 100644 index 0000000000..567a92514c --- /dev/null +++ b/cranelift/codegen/src/ir/layout.rs @@ -0,0 +1,1202 @@ +//! Function layout. +//! +//! The order of basic blocks in a function and the order of instructions in an block is +//! determined by the `Layout` data structure defined in this module. + +use crate::entity::SecondaryMap; +use crate::ir::dfg::DataFlowGraph; +use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder}; +use crate::ir::{Block, Inst}; +use crate::packed_option::PackedOption; +use crate::timing; +use core::cmp; +use core::iter::{IntoIterator, Iterator}; +use log::debug; + +/// The `Layout` struct determines the layout of blocks and instructions in a function. It does not +/// contain definitions of instructions or blocks, but depends on `Inst` and `Block` entity references +/// being defined elsewhere. +/// +/// This data structure determines: +/// +/// - The order of blocks in the function. +/// - Which block contains a given instruction. +/// - The order of instructions with an block. +/// +/// While data dependencies are not recorded, instruction ordering does affect control +/// dependencies, so part of the semantics of the program are determined by the layout. +/// +#[derive(Clone)] +pub struct Layout { + /// Linked list nodes for the layout order of blocks Forms a doubly linked list, terminated in + /// both ends by `None`. + blocks: SecondaryMap, + + /// Linked list nodes for the layout order of instructions. Forms a double linked list per block, + /// terminated in both ends by `None`. + insts: SecondaryMap, + + /// First block in the layout order, or `None` when no blocks have been laid out. + first_block: Option, + + /// Last block in the layout order, or `None` when no blocks have been laid out. + last_block: Option, +} + +impl Layout { + /// Create a new empty `Layout`. + pub fn new() -> Self { + Self { + blocks: SecondaryMap::new(), + insts: SecondaryMap::new(), + first_block: None, + last_block: None, + } + } + + /// Clear the layout. + pub fn clear(&mut self) { + self.blocks.clear(); + self.insts.clear(); + self.first_block = None; + self.last_block = None; + } + + /// Returns the capacity of the `BlockData` map. + pub fn block_capacity(&self) -> usize { + self.blocks.capacity() + } +} + +/// Sequence numbers. +/// +/// All instructions and blocks are given a sequence number that can be used to quickly determine +/// their relative position in the layout. The sequence numbers are not contiguous, but are assigned +/// like line numbers in BASIC: 10, 20, 30, ... +/// +/// The block sequence numbers are strictly increasing, and so are the instruction sequence numbers +/// within an block. The instruction sequence numbers are all between the sequence number of their +/// containing block and the following block. +/// +/// The result is that sequence numbers work like BASIC line numbers for the textual form of the IR. +type SequenceNumber = u32; + +/// Initial stride assigned to new sequence numbers. +const MAJOR_STRIDE: SequenceNumber = 10; + +/// Secondary stride used when renumbering locally. +const MINOR_STRIDE: SequenceNumber = 2; + +/// Limit on the sequence number range we'll renumber locally. If this limit is exceeded, we'll +/// switch to a full function renumbering. +const LOCAL_LIMIT: SequenceNumber = 100 * MINOR_STRIDE; + +/// Compute the midpoint between `a` and `b`. +/// Return `None` if the midpoint would be equal to either. +fn midpoint(a: SequenceNumber, b: SequenceNumber) -> Option { + debug_assert!(a < b); + // Avoid integer overflow. + let m = a + (b - a) / 2; + if m > a { + Some(m) + } else { + None + } +} + +#[test] +fn test_midpoint() { + assert_eq!(midpoint(0, 1), None); + assert_eq!(midpoint(0, 2), Some(1)); + assert_eq!(midpoint(0, 3), Some(1)); + assert_eq!(midpoint(0, 4), Some(2)); + assert_eq!(midpoint(1, 4), Some(2)); + assert_eq!(midpoint(2, 4), Some(3)); + assert_eq!(midpoint(3, 4), None); + assert_eq!(midpoint(3, 4), None); +} + +impl ProgramOrder for Layout { + fn cmp(&self, a: A, b: B) -> cmp::Ordering + where + A: Into, + B: Into, + { + let a_seq = self.seq(a); + let b_seq = self.seq(b); + a_seq.cmp(&b_seq) + } + + fn is_block_gap(&self, inst: Inst, block: Block) -> bool { + let i = &self.insts[inst]; + let e = &self.blocks[block]; + + i.next.is_none() && i.block == e.prev + } +} + +// Private methods for dealing with sequence numbers. +impl Layout { + /// Get the sequence number of a program point that must correspond to an entity in the layout. + fn seq>(&self, pp: PP) -> SequenceNumber { + // When `PP = Inst` or `PP = Block`, we expect this dynamic type check to be optimized out. + match pp.into() { + ExpandedProgramPoint::Block(block) => self.blocks[block].seq, + ExpandedProgramPoint::Inst(inst) => self.insts[inst].seq, + } + } + + /// Get the last sequence number in `block`. + fn last_block_seq(&self, block: Block) -> SequenceNumber { + // Get the seq of the last instruction if it exists, otherwise use the block header seq. + self.blocks[block] + .last_inst + .map(|inst| self.insts[inst].seq) + .unwrap_or(self.blocks[block].seq) + } + + /// Assign a valid sequence number to `block` such that the numbers are still monotonic. This may + /// require renumbering. + fn assign_block_seq(&mut self, block: Block) { + debug_assert!(self.is_block_inserted(block)); + + // Get the sequence number immediately before `block`, or 0. + let prev_seq = self.blocks[block] + .prev + .map(|prev_block| self.last_block_seq(prev_block)) + .unwrap_or(0); + + // Get the sequence number immediately following `block`. + let next_seq = if let Some(inst) = self.blocks[block].first_inst.expand() { + self.insts[inst].seq + } else if let Some(next_block) = self.blocks[block].next.expand() { + self.blocks[next_block].seq + } else { + // There is nothing after `block`. We can just use a major stride. + self.blocks[block].seq = prev_seq + MAJOR_STRIDE; + return; + }; + + // Check if there is room between these sequence numbers. + if let Some(seq) = midpoint(prev_seq, next_seq) { + self.blocks[block].seq = seq; + } else { + // No available integers between `prev_seq` and `next_seq`. We have to renumber. + self.renumber_from_block(block, prev_seq + MINOR_STRIDE, prev_seq + LOCAL_LIMIT); + } + } + + /// Assign a valid sequence number to `inst` such that the numbers are still monotonic. This may + /// require renumbering. + fn assign_inst_seq(&mut self, inst: Inst) { + let block = self + .inst_block(inst) + .expect("inst must be inserted before assigning an seq"); + + // Get the sequence number immediately before `inst`. + let prev_seq = match self.insts[inst].prev.expand() { + Some(prev_inst) => self.insts[prev_inst].seq, + None => self.blocks[block].seq, + }; + + // Get the sequence number immediately following `inst`. + let next_seq = if let Some(next_inst) = self.insts[inst].next.expand() { + self.insts[next_inst].seq + } else if let Some(next_block) = self.blocks[block].next.expand() { + self.blocks[next_block].seq + } else { + // There is nothing after `inst`. We can just use a major stride. + self.insts[inst].seq = prev_seq + MAJOR_STRIDE; + return; + }; + + // Check if there is room between these sequence numbers. + if let Some(seq) = midpoint(prev_seq, next_seq) { + self.insts[inst].seq = seq; + } else { + // No available integers between `prev_seq` and `next_seq`. We have to renumber. + self.renumber_from_inst(inst, prev_seq + MINOR_STRIDE, prev_seq + LOCAL_LIMIT); + } + } + + /// Renumber instructions starting from `inst` until the end of the block or until numbers catch + /// up. + /// + /// Return `None` if renumbering has caught up and the sequence is monotonic again. Otherwise + /// return the last used sequence number. + /// + /// If sequence numbers exceed `limit`, switch to a full function renumbering and return `None`. + fn renumber_insts( + &mut self, + inst: Inst, + seq: SequenceNumber, + limit: SequenceNumber, + ) -> Option { + let mut inst = inst; + let mut seq = seq; + + loop { + self.insts[inst].seq = seq; + + // Next instruction. + inst = match self.insts[inst].next.expand() { + None => return Some(seq), + Some(next) => next, + }; + + if seq < self.insts[inst].seq { + // Sequence caught up. + return None; + } + + if seq > limit { + // We're pushing too many instructions in front of us. + // Switch to a full function renumbering to make some space. + self.full_renumber(); + return None; + } + + seq += MINOR_STRIDE; + } + } + + /// Renumber starting from `block` to `seq` and continuing until the sequence numbers are + /// monotonic again. + fn renumber_from_block( + &mut self, + block: Block, + first_seq: SequenceNumber, + limit: SequenceNumber, + ) { + let mut block = block; + let mut seq = first_seq; + + loop { + self.blocks[block].seq = seq; + + // Renumber instructions in `block`. Stop when the numbers catch up. + if let Some(inst) = self.blocks[block].first_inst.expand() { + seq = match self.renumber_insts(inst, seq + MINOR_STRIDE, limit) { + Some(s) => s, + None => return, + } + } + + // Advance to the next block. + block = match self.blocks[block].next.expand() { + Some(next) => next, + None => return, + }; + + // Stop renumbering once the numbers catch up. + if seq < self.blocks[block].seq { + return; + } + + seq += MINOR_STRIDE; + } + } + + /// Renumber starting from `inst` to `seq` and continuing until the sequence numbers are + /// monotonic again. + fn renumber_from_inst(&mut self, inst: Inst, first_seq: SequenceNumber, limit: SequenceNumber) { + if let Some(seq) = self.renumber_insts(inst, first_seq, limit) { + // Renumbering spills over into next block. + if let Some(next_block) = self.blocks[self.inst_block(inst).unwrap()].next.expand() { + self.renumber_from_block(next_block, seq + MINOR_STRIDE, limit); + } + } + } + + /// Renumber all blocks and instructions in the layout. + /// + /// This doesn't affect the position of anything, but it gives more room in the internal + /// sequence numbers for inserting instructions later. + fn full_renumber(&mut self) { + let _tt = timing::layout_renumber(); + let mut seq = 0; + let mut next_block = self.first_block; + while let Some(block) = next_block { + self.blocks[block].seq = seq; + seq += MAJOR_STRIDE; + next_block = self.blocks[block].next.expand(); + + let mut next_inst = self.blocks[block].first_inst.expand(); + while let Some(inst) = next_inst { + self.insts[inst].seq = seq; + seq += MAJOR_STRIDE; + next_inst = self.insts[inst].next.expand(); + } + } + debug!("Renumbered {} program points", seq / MAJOR_STRIDE); + } +} + +/// Methods for laying out blocks. +/// +/// An unknown block starts out as *not inserted* in the block layout. The layout is a linear order of +/// inserted blocks. Once an block has been inserted in the layout, instructions can be added. An block +/// can only be removed from the layout when it is empty. +/// +/// Since every block must end with a terminator instruction which cannot fall through, the layout of +/// blocks do not affect the semantics of the program. +/// +impl Layout { + /// Is `block` currently part of the layout? + pub fn is_block_inserted(&self, block: Block) -> bool { + Some(block) == self.first_block || self.blocks[block].prev.is_some() + } + + /// Insert `block` as the last block in the layout. + pub fn append_block(&mut self, block: Block) { + debug_assert!( + !self.is_block_inserted(block), + "Cannot append block that is already in the layout" + ); + { + let node = &mut self.blocks[block]; + debug_assert!(node.first_inst.is_none() && node.last_inst.is_none()); + node.prev = self.last_block.into(); + node.next = None.into(); + } + if let Some(last) = self.last_block { + self.blocks[last].next = block.into(); + } else { + self.first_block = Some(block); + } + self.last_block = Some(block); + self.assign_block_seq(block); + } + + /// Insert `block` in the layout before the existing block `before`. + pub fn insert_block(&mut self, block: Block, before: Block) { + debug_assert!( + !self.is_block_inserted(block), + "Cannot insert block that is already in the layout" + ); + debug_assert!( + self.is_block_inserted(before), + "block Insertion point not in the layout" + ); + let after = self.blocks[before].prev; + { + let node = &mut self.blocks[block]; + node.next = before.into(); + node.prev = after; + } + self.blocks[before].prev = block.into(); + match after.expand() { + None => self.first_block = Some(block), + Some(a) => self.blocks[a].next = block.into(), + } + self.assign_block_seq(block); + } + + /// Insert `block` in the layout *after* the existing block `after`. + pub fn insert_block_after(&mut self, block: Block, after: Block) { + debug_assert!( + !self.is_block_inserted(block), + "Cannot insert block that is already in the layout" + ); + debug_assert!( + self.is_block_inserted(after), + "block Insertion point not in the layout" + ); + let before = self.blocks[after].next; + { + let node = &mut self.blocks[block]; + node.next = before; + node.prev = after.into(); + } + self.blocks[after].next = block.into(); + match before.expand() { + None => self.last_block = Some(block), + Some(b) => self.blocks[b].prev = block.into(), + } + self.assign_block_seq(block); + } + + /// Remove `block` from the layout. + pub fn remove_block(&mut self, block: Block) { + debug_assert!(self.is_block_inserted(block), "block not in the layout"); + debug_assert!(self.first_inst(block).is_none(), "block must be empty."); + + // Clear the `block` node and extract links. + let prev; + let next; + { + let n = &mut self.blocks[block]; + prev = n.prev; + next = n.next; + n.prev = None.into(); + n.next = None.into(); + } + // Fix up links to `block`. + match prev.expand() { + None => self.first_block = next.expand(), + Some(p) => self.blocks[p].next = next, + } + match next.expand() { + None => self.last_block = prev.expand(), + Some(n) => self.blocks[n].prev = prev, + } + } + + /// Return an iterator over all blocks in layout order. + pub fn blocks(&self) -> Blocks { + Blocks { + layout: self, + next: self.first_block, + } + } + + /// Get the function's entry block. + /// This is simply the first block in the layout order. + pub fn entry_block(&self) -> Option { + self.first_block + } + + /// Get the last block in the layout. + pub fn last_block(&self) -> Option { + self.last_block + } + + /// Get the block preceding `block` in the layout order. + pub fn prev_block(&self, block: Block) -> Option { + self.blocks[block].prev.expand() + } + + /// Get the block following `block` in the layout order. + pub fn next_block(&self, block: Block) -> Option { + self.blocks[block].next.expand() + } +} + +#[derive(Clone, Debug, Default)] +struct BlockNode { + prev: PackedOption, + next: PackedOption, + first_inst: PackedOption, + last_inst: PackedOption, + seq: SequenceNumber, +} + +/// Iterate over blocks in layout order. See `Layout::blocks()`. +pub struct Blocks<'f> { + layout: &'f Layout, + next: Option, +} + +impl<'f> Iterator for Blocks<'f> { + type Item = Block; + + fn next(&mut self) -> Option { + match self.next { + Some(block) => { + self.next = self.layout.next_block(block); + Some(block) + } + None => None, + } + } +} + +/// Use a layout reference in a for loop. +impl<'f> IntoIterator for &'f Layout { + type Item = Block; + type IntoIter = Blocks<'f>; + + fn into_iter(self) -> Blocks<'f> { + self.blocks() + } +} + +/// Methods for arranging instructions. +/// +/// An instruction starts out as *not inserted* in the layout. An instruction can be inserted into +/// an block at a given position. +impl Layout { + /// Get the block containing `inst`, or `None` if `inst` is not inserted in the layout. + pub fn inst_block(&self, inst: Inst) -> Option { + self.insts[inst].block.into() + } + + /// Get the block containing the program point `pp`. Panic if `pp` is not in the layout. + pub fn pp_block(&self, pp: PP) -> Block + where + PP: Into, + { + match pp.into() { + ExpandedProgramPoint::Block(block) => block, + ExpandedProgramPoint::Inst(inst) => { + self.inst_block(inst).expect("Program point not in layout") + } + } + } + + /// Append `inst` to the end of `block`. + pub fn append_inst(&mut self, inst: Inst, block: Block) { + debug_assert_eq!(self.inst_block(inst), None); + debug_assert!( + self.is_block_inserted(block), + "Cannot append instructions to block not in layout" + ); + { + let block_node = &mut self.blocks[block]; + { + let inst_node = &mut self.insts[inst]; + inst_node.block = block.into(); + inst_node.prev = block_node.last_inst; + debug_assert!(inst_node.next.is_none()); + } + if block_node.first_inst.is_none() { + block_node.first_inst = inst.into(); + } else { + self.insts[block_node.last_inst.unwrap()].next = inst.into(); + } + block_node.last_inst = inst.into(); + } + self.assign_inst_seq(inst); + } + + /// Fetch an block's first instruction. + pub fn first_inst(&self, block: Block) -> Option { + self.blocks[block].first_inst.into() + } + + /// Fetch an block's last instruction. + pub fn last_inst(&self, block: Block) -> Option { + self.blocks[block].last_inst.into() + } + + /// Fetch the instruction following `inst`. + pub fn next_inst(&self, inst: Inst) -> Option { + self.insts[inst].next.expand() + } + + /// Fetch the instruction preceding `inst`. + pub fn prev_inst(&self, inst: Inst) -> Option { + self.insts[inst].prev.expand() + } + + /// Fetch the first instruction in an block's terminal branch group. + pub fn canonical_branch_inst(&self, dfg: &DataFlowGraph, block: Block) -> Option { + // Basic blocks permit at most two terminal branch instructions. + // If two, the former is conditional and the latter is unconditional. + let last = self.last_inst(block)?; + if let Some(prev) = self.prev_inst(last) { + if dfg[prev].opcode().is_branch() { + return Some(prev); + } + } + Some(last) + } + + /// Insert `inst` before the instruction `before` in the same block. + pub fn insert_inst(&mut self, inst: Inst, before: Inst) { + debug_assert_eq!(self.inst_block(inst), None); + let block = self + .inst_block(before) + .expect("Instruction before insertion point not in the layout"); + let after = self.insts[before].prev; + { + let inst_node = &mut self.insts[inst]; + inst_node.block = block.into(); + inst_node.next = before.into(); + inst_node.prev = after; + } + self.insts[before].prev = inst.into(); + match after.expand() { + None => self.blocks[block].first_inst = inst.into(), + Some(a) => self.insts[a].next = inst.into(), + } + self.assign_inst_seq(inst); + } + + /// Remove `inst` from the layout. + pub fn remove_inst(&mut self, inst: Inst) { + let block = self.inst_block(inst).expect("Instruction already removed."); + // Clear the `inst` node and extract links. + let prev; + let next; + { + let n = &mut self.insts[inst]; + prev = n.prev; + next = n.next; + n.block = None.into(); + n.prev = None.into(); + n.next = None.into(); + } + // Fix up links to `inst`. + match prev.expand() { + None => self.blocks[block].first_inst = next, + Some(p) => self.insts[p].next = next, + } + match next.expand() { + None => self.blocks[block].last_inst = prev, + Some(n) => self.insts[n].prev = prev, + } + } + + /// Iterate over the instructions in `block` in layout order. + pub fn block_insts(&self, block: Block) -> Insts { + Insts { + layout: self, + head: self.blocks[block].first_inst.into(), + tail: self.blocks[block].last_inst.into(), + } + } + + /// Split the block containing `before` in two. + /// + /// Insert `new_block` after the old block and move `before` and the following instructions to + /// `new_block`: + /// + /// ```text + /// old_block: + /// i1 + /// i2 + /// i3 << before + /// i4 + /// ``` + /// becomes: + /// + /// ```text + /// old_block: + /// i1 + /// i2 + /// new_block: + /// i3 << before + /// i4 + /// ``` + pub fn split_block(&mut self, new_block: Block, before: Inst) { + let old_block = self + .inst_block(before) + .expect("The `before` instruction must be in the layout"); + debug_assert!(!self.is_block_inserted(new_block)); + + // Insert new_block after old_block. + let next_block = self.blocks[old_block].next; + let last_inst = self.blocks[old_block].last_inst; + { + let node = &mut self.blocks[new_block]; + node.prev = old_block.into(); + node.next = next_block; + node.first_inst = before.into(); + node.last_inst = last_inst; + } + self.blocks[old_block].next = new_block.into(); + + // Fix backwards link. + if Some(old_block) == self.last_block { + self.last_block = Some(new_block); + } else { + self.blocks[next_block.unwrap()].prev = new_block.into(); + } + + // Disconnect the instruction links. + let prev_inst = self.insts[before].prev; + self.insts[before].prev = None.into(); + self.blocks[old_block].last_inst = prev_inst; + match prev_inst.expand() { + None => self.blocks[old_block].first_inst = None.into(), + Some(pi) => self.insts[pi].next = None.into(), + } + + // Fix the instruction -> block pointers. + let mut opt_i = Some(before); + while let Some(i) = opt_i { + debug_assert_eq!(self.insts[i].block.expand(), Some(old_block)); + self.insts[i].block = new_block.into(); + opt_i = self.insts[i].next.into(); + } + + self.assign_block_seq(new_block); + } +} + +#[derive(Clone, Debug, Default)] +struct InstNode { + /// The Block containing this instruction, or `None` if the instruction is not yet inserted. + block: PackedOption, + prev: PackedOption, + next: PackedOption, + seq: SequenceNumber, +} + +/// Iterate over instructions in an block in layout order. See `Layout::block_insts()`. +pub struct Insts<'f> { + layout: &'f Layout, + head: Option, + tail: Option, +} + +impl<'f> Iterator for Insts<'f> { + type Item = Inst; + + fn next(&mut self) -> Option { + let rval = self.head; + if let Some(inst) = rval { + if self.head == self.tail { + self.head = None; + self.tail = None; + } else { + self.head = self.layout.insts[inst].next.into(); + } + } + rval + } +} + +impl<'f> DoubleEndedIterator for Insts<'f> { + fn next_back(&mut self) -> Option { + let rval = self.tail; + if let Some(inst) = rval { + if self.head == self.tail { + self.head = None; + self.tail = None; + } else { + self.tail = self.layout.insts[inst].prev.into(); + } + } + rval + } +} + +#[cfg(test)] +mod tests { + use super::Layout; + use crate::cursor::{Cursor, CursorPosition}; + use crate::entity::EntityRef; + use crate::ir::{Block, Inst, ProgramOrder, SourceLoc}; + use alloc::vec::Vec; + use core::cmp::Ordering; + + struct LayoutCursor<'f> { + /// Borrowed function layout. Public so it can be re-borrowed from this cursor. + pub layout: &'f mut Layout, + pos: CursorPosition, + } + + impl<'f> Cursor for LayoutCursor<'f> { + fn position(&self) -> CursorPosition { + self.pos + } + + fn set_position(&mut self, pos: CursorPosition) { + self.pos = pos; + } + + fn srcloc(&self) -> SourceLoc { + unimplemented!() + } + + fn set_srcloc(&mut self, _srcloc: SourceLoc) { + unimplemented!() + } + + fn layout(&self) -> &Layout { + self.layout + } + + fn layout_mut(&mut self) -> &mut Layout { + self.layout + } + } + + impl<'f> LayoutCursor<'f> { + /// Create a new `LayoutCursor` for `layout`. + /// The cursor holds a mutable reference to `layout` for its entire lifetime. + pub fn new(layout: &'f mut Layout) -> Self { + Self { + layout, + pos: CursorPosition::Nowhere, + } + } + } + + fn verify(layout: &mut Layout, blocks: &[(Block, &[Inst])]) { + // Check that blocks are inserted and instructions belong the right places. + // Check forward linkage with iterators. + // Check that layout sequence numbers are strictly monotonic. + { + let mut seq = 0; + let mut block_iter = layout.blocks(); + for &(block, insts) in blocks { + assert!(layout.is_block_inserted(block)); + assert_eq!(block_iter.next(), Some(block)); + assert!(layout.blocks[block].seq > seq); + seq = layout.blocks[block].seq; + + let mut inst_iter = layout.block_insts(block); + for &inst in insts { + assert_eq!(layout.inst_block(inst), Some(block)); + assert_eq!(inst_iter.next(), Some(inst)); + assert!(layout.insts[inst].seq > seq); + seq = layout.insts[inst].seq; + } + assert_eq!(inst_iter.next(), None); + } + assert_eq!(block_iter.next(), None); + } + + // Check backwards linkage with a cursor. + let mut cur = LayoutCursor::new(layout); + for &(block, insts) in blocks.into_iter().rev() { + assert_eq!(cur.prev_block(), Some(block)); + for &inst in insts.into_iter().rev() { + assert_eq!(cur.prev_inst(), Some(inst)); + } + assert_eq!(cur.prev_inst(), None); + } + assert_eq!(cur.prev_block(), None); + } + + #[test] + fn append_block() { + let mut layout = Layout::new(); + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + { + let imm = &layout; + assert!(!imm.is_block_inserted(e0)); + assert!(!imm.is_block_inserted(e1)); + } + verify(&mut layout, &[]); + + layout.append_block(e1); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(!layout.is_block_inserted(e2)); + let v: Vec = layout.blocks().collect(); + assert_eq!(v, [e1]); + + layout.append_block(e2); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + let v: Vec = layout.blocks().collect(); + assert_eq!(v, [e1, e2]); + + layout.append_block(e0); + assert!(layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + let v: Vec = layout.blocks().collect(); + assert_eq!(v, [e1, e2, e0]); + + { + let imm = &layout; + let mut v = Vec::new(); + for e in imm { + v.push(e); + } + assert_eq!(v, [e1, e2, e0]); + } + + // Test cursor positioning. + let mut cur = LayoutCursor::new(&mut layout); + assert_eq!(cur.position(), CursorPosition::Nowhere); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + + assert_eq!(cur.next_block(), Some(e1)); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.next_block(), Some(e2)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Before(e2)); + assert_eq!(cur.next_block(), Some(e0)); + assert_eq!(cur.next_block(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + + // Backwards through the blocks. + assert_eq!(cur.prev_block(), Some(e0)); + assert_eq!(cur.position(), CursorPosition::After(e0)); + assert_eq!(cur.prev_block(), Some(e2)); + assert_eq!(cur.prev_block(), Some(e1)); + assert_eq!(cur.prev_block(), None); + assert_eq!(cur.position(), CursorPosition::Nowhere); + } + + #[test] + fn insert_block() { + let mut layout = Layout::new(); + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + { + let imm = &layout; + assert!(!imm.is_block_inserted(e0)); + assert!(!imm.is_block_inserted(e1)); + + let v: Vec = layout.blocks().collect(); + assert_eq!(v, []); + } + + layout.append_block(e1); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(!layout.is_block_inserted(e2)); + verify(&mut layout, &[(e1, &[])]); + + layout.insert_block(e2, e1); + assert!(!layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + verify(&mut layout, &[(e2, &[]), (e1, &[])]); + + layout.insert_block(e0, e1); + assert!(layout.is_block_inserted(e0)); + assert!(layout.is_block_inserted(e1)); + assert!(layout.is_block_inserted(e2)); + verify(&mut layout, &[(e2, &[]), (e0, &[]), (e1, &[])]); + } + + #[test] + fn insert_block_after() { + let mut layout = Layout::new(); + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + layout.append_block(e1); + layout.insert_block_after(e2, e1); + verify(&mut layout, &[(e1, &[]), (e2, &[])]); + + layout.insert_block_after(e0, e1); + verify(&mut layout, &[(e1, &[]), (e0, &[]), (e2, &[])]); + } + + #[test] + fn append_inst() { + let mut layout = Layout::new(); + let e1 = Block::new(1); + + layout.append_block(e1); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, []); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), None); + assert_eq!(layout.inst_block(i2), None); + + layout.append_inst(i1, e1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), None); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i1]); + + layout.append_inst(i2, e1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), Some(e1)); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i1, i2]); + + // Test double-ended instruction iterator. + let v: Vec = layout.block_insts(e1).rev().collect(); + assert_eq!(v, [i2, i1]); + + layout.append_inst(i0, e1); + verify(&mut layout, &[(e1, &[i1, i2, i0])]); + + // Test cursor positioning. + let mut cur = LayoutCursor::new(&mut layout).at_top(e1); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + assert_eq!(cur.next_inst(), Some(i1)); + assert_eq!(cur.position(), CursorPosition::At(i1)); + assert_eq!(cur.next_inst(), Some(i2)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), Some(i2)); + assert_eq!(cur.position(), CursorPosition::At(i2)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.position(), CursorPosition::At(i0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + assert_eq!(cur.prev_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), Some(i2)); + assert_eq!(cur.prev_inst(), Some(i1)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.position(), CursorPosition::Before(e1)); + + // Test remove_inst. + cur.goto_inst(i2); + assert_eq!(cur.remove_inst(), i2); + verify(cur.layout, &[(e1, &[i1, i0])]); + assert_eq!(cur.layout.inst_block(i2), None); + assert_eq!(cur.remove_inst(), i0); + verify(cur.layout, &[(e1, &[i1])]); + assert_eq!(cur.layout.inst_block(i0), None); + assert_eq!(cur.position(), CursorPosition::After(e1)); + cur.layout.remove_inst(i1); + verify(cur.layout, &[(e1, &[])]); + assert_eq!(cur.layout.inst_block(i1), None); + } + + #[test] + fn insert_inst() { + let mut layout = Layout::new(); + let e1 = Block::new(1); + + layout.append_block(e1); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, []); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), None); + assert_eq!(layout.inst_block(i2), None); + + layout.append_inst(i1, e1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), None); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i1]); + + layout.insert_inst(i2, i1); + assert_eq!(layout.inst_block(i0), None); + assert_eq!(layout.inst_block(i1), Some(e1)); + assert_eq!(layout.inst_block(i2), Some(e1)); + let v: Vec = layout.block_insts(e1).collect(); + assert_eq!(v, [i2, i1]); + + layout.insert_inst(i0, i1); + verify(&mut layout, &[(e1, &[i2, i0, i1])]); + } + + #[test] + fn multiple_blocks() { + let mut layout = Layout::new(); + + let e0 = Block::new(0); + let e1 = Block::new(1); + + assert_eq!(layout.entry_block(), None); + layout.append_block(e0); + assert_eq!(layout.entry_block(), Some(e0)); + layout.append_block(e1); + assert_eq!(layout.entry_block(), Some(e0)); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + + layout.append_inst(i0, e0); + layout.append_inst(i1, e0); + layout.append_inst(i2, e1); + layout.append_inst(i3, e1); + + let v0: Vec = layout.block_insts(e0).collect(); + let v1: Vec = layout.block_insts(e1).collect(); + assert_eq!(v0, [i0, i1]); + assert_eq!(v1, [i2, i3]); + } + + #[test] + fn split_block() { + let mut layout = Layout::new(); + + let e0 = Block::new(0); + let e1 = Block::new(1); + let e2 = Block::new(2); + + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + + layout.append_block(e0); + layout.append_inst(i0, e0); + assert_eq!(layout.inst_block(i0), Some(e0)); + layout.split_block(e1, i0); + assert_eq!(layout.inst_block(i0), Some(e1)); + + { + let mut cur = LayoutCursor::new(&mut layout); + assert_eq!(cur.next_block(), Some(e0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), Some(e1)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), None); + + // Check backwards links. + assert_eq!(cur.prev_block(), Some(e1)); + assert_eq!(cur.prev_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), Some(e0)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), None); + } + + layout.append_inst(i1, e0); + layout.append_inst(i2, e0); + layout.append_inst(i3, e0); + layout.split_block(e2, i2); + + assert_eq!(layout.inst_block(i0), Some(e1)); + assert_eq!(layout.inst_block(i1), Some(e0)); + assert_eq!(layout.inst_block(i2), Some(e2)); + assert_eq!(layout.inst_block(i3), Some(e2)); + + { + let mut cur = LayoutCursor::new(&mut layout); + assert_eq!(cur.next_block(), Some(e0)); + assert_eq!(cur.next_inst(), Some(i1)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), Some(e2)); + assert_eq!(cur.next_inst(), Some(i2)); + assert_eq!(cur.next_inst(), Some(i3)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), Some(e1)); + assert_eq!(cur.next_inst(), Some(i0)); + assert_eq!(cur.next_inst(), None); + assert_eq!(cur.next_block(), None); + + assert_eq!(cur.prev_block(), Some(e1)); + assert_eq!(cur.prev_inst(), Some(i0)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), Some(e2)); + assert_eq!(cur.prev_inst(), Some(i3)); + assert_eq!(cur.prev_inst(), Some(i2)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), Some(e0)); + assert_eq!(cur.prev_inst(), Some(i1)); + assert_eq!(cur.prev_inst(), None); + assert_eq!(cur.prev_block(), None); + } + + // Check `ProgramOrder`. + assert_eq!(layout.cmp(e2, e2), Ordering::Equal); + assert_eq!(layout.cmp(e2, i2), Ordering::Less); + assert_eq!(layout.cmp(i3, i2), Ordering::Greater); + + assert_eq!(layout.is_block_gap(i1, e2), true); + assert_eq!(layout.is_block_gap(i3, e1), true); + assert_eq!(layout.is_block_gap(i1, e1), false); + assert_eq!(layout.is_block_gap(i2, e1), false); + } +} diff --git a/cranelift/codegen/src/ir/libcall.rs b/cranelift/codegen/src/ir/libcall.rs new file mode 100644 index 0000000000..f4f6d941f8 --- /dev/null +++ b/cranelift/codegen/src/ir/libcall.rs @@ -0,0 +1,229 @@ +//! Naming well-known routines in the runtime library. + +use crate::ir::{ + types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode, + Signature, Type, +}; +use crate::isa::{CallConv, RegUnit, TargetIsa}; +use core::fmt; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// The name of a runtime library routine. +/// +/// Runtime library calls are generated for Cranelift IR instructions that don't have an equivalent +/// ISA instruction or an easy macro expansion. A `LibCall` is used as a well-known name to refer to +/// the runtime library routine. This way, Cranelift doesn't have to know about the naming +/// convention in the embedding VM's runtime library. +/// +/// This list is likely to grow over time. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum LibCall { + /// probe for stack overflow. These are emitted for functions which need + /// when the `enable_probestack` setting is true. + Probestack, + /// ceil.f32 + CeilF32, + /// ceil.f64 + CeilF64, + /// floor.f32 + FloorF32, + /// floor.f64 + FloorF64, + /// trunc.f32 + TruncF32, + /// frunc.f64 + TruncF64, + /// nearest.f32 + NearestF32, + /// nearest.f64 + NearestF64, + /// libc.memcpy + Memcpy, + /// libc.memset + Memset, + /// libc.memmove + Memmove, + + /// Elf __tls_get_addr + ElfTlsGetAddr, +} + +impl fmt::Display for LibCall { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + +impl FromStr for LibCall { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "Probestack" => Ok(Self::Probestack), + "CeilF32" => Ok(Self::CeilF32), + "CeilF64" => Ok(Self::CeilF64), + "FloorF32" => Ok(Self::FloorF32), + "FloorF64" => Ok(Self::FloorF64), + "TruncF32" => Ok(Self::TruncF32), + "TruncF64" => Ok(Self::TruncF64), + "NearestF32" => Ok(Self::NearestF32), + "NearestF64" => Ok(Self::NearestF64), + "Memcpy" => Ok(Self::Memcpy), + "Memset" => Ok(Self::Memset), + "Memmove" => Ok(Self::Memmove), + + "ElfTlsGetAddr" => Ok(Self::ElfTlsGetAddr), + _ => Err(()), + } + } +} + +impl LibCall { + /// Get the well-known library call name to use as a replacement for an instruction with the + /// given opcode and controlling type variable. + /// + /// Returns `None` if no well-known library routine name exists for that instruction. + pub fn for_inst(opcode: Opcode, ctrl_type: Type) -> Option { + Some(match ctrl_type { + types::F32 => match opcode { + Opcode::Ceil => Self::CeilF32, + Opcode::Floor => Self::FloorF32, + Opcode::Trunc => Self::TruncF32, + Opcode::Nearest => Self::NearestF32, + _ => return None, + }, + types::F64 => match opcode { + Opcode::Ceil => Self::CeilF64, + Opcode::Floor => Self::FloorF64, + Opcode::Trunc => Self::TruncF64, + Opcode::Nearest => Self::NearestF64, + _ => return None, + }, + _ => return None, + }) + } +} + +/// Get a function reference for `libcall` in `func`, following the signature +/// for `inst`. +/// +/// If there is an existing reference, use it, otherwise make a new one. +pub(crate) fn get_libcall_funcref( + libcall: LibCall, + call_conv: CallConv, + func: &mut Function, + inst: Inst, + isa: &dyn TargetIsa, +) -> FuncRef { + find_funcref(libcall, func) + .unwrap_or_else(|| make_funcref_for_inst(libcall, call_conv, func, inst, isa)) +} + +/// Get a function reference for the probestack function in `func`. +/// +/// If there is an existing reference, use it, otherwise make a new one. +pub fn get_probestack_funcref( + func: &mut Function, + reg_type: Type, + arg_reg: RegUnit, + isa: &dyn TargetIsa, +) -> FuncRef { + find_funcref(LibCall::Probestack, func) + .unwrap_or_else(|| make_funcref_for_probestack(func, reg_type, arg_reg, isa)) +} + +/// Get the existing function reference for `libcall` in `func` if it exists. +fn find_funcref(libcall: LibCall, func: &Function) -> Option { + // We're assuming that all libcall function decls are at the end. + // If we get this wrong, worst case we'll have duplicate libcall decls which is harmless. + for (fref, func_data) in func.dfg.ext_funcs.iter().rev() { + match func_data.name { + ExternalName::LibCall(lc) => { + if lc == libcall { + return Some(fref); + } + } + _ => break, + } + } + None +} + +/// Create a funcref for `LibCall::Probestack`. +fn make_funcref_for_probestack( + func: &mut Function, + reg_type: Type, + arg_reg: RegUnit, + isa: &dyn TargetIsa, +) -> FuncRef { + let mut sig = Signature::new(CallConv::Probestack); + let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg); + sig.params.push(rax); + if !isa.flags().probestack_func_adjusts_sp() { + sig.returns.push(rax); + } + make_funcref(LibCall::Probestack, func, sig, isa) +} + +/// Create a funcref for `libcall` with a signature matching `inst`. +fn make_funcref_for_inst( + libcall: LibCall, + call_conv: CallConv, + func: &mut Function, + inst: Inst, + isa: &dyn TargetIsa, +) -> FuncRef { + let mut sig = Signature::new(call_conv); + for &v in func.dfg.inst_args(inst) { + sig.params.push(AbiParam::new(func.dfg.value_type(v))); + } + for &v in func.dfg.inst_results(inst) { + sig.returns.push(AbiParam::new(func.dfg.value_type(v))); + } + + if call_conv.extends_baldrdash() { + // Adds the special VMContext parameter to the signature. + sig.params.push(AbiParam::special( + isa.pointer_type(), + ArgumentPurpose::VMContext, + )); + } + + make_funcref(libcall, func, sig, isa) +} + +/// Create a funcref for `libcall`. +fn make_funcref( + libcall: LibCall, + func: &mut Function, + sig: Signature, + isa: &dyn TargetIsa, +) -> FuncRef { + let sigref = func.import_signature(sig); + + func.import_function(ExtFuncData { + name: ExternalName::LibCall(libcall), + signature: sigref, + colocated: isa.flags().use_colocated_libcalls(), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn display() { + assert_eq!(LibCall::CeilF32.to_string(), "CeilF32"); + assert_eq!(LibCall::NearestF64.to_string(), "NearestF64"); + } + + #[test] + fn parsing() { + assert_eq!("FloorF32".parse(), Ok(LibCall::FloorF32)); + } +} diff --git a/cranelift/codegen/src/ir/memflags.rs b/cranelift/codegen/src/ir/memflags.rs new file mode 100644 index 0000000000..87fd6bf3ab --- /dev/null +++ b/cranelift/codegen/src/ir/memflags.rs @@ -0,0 +1,117 @@ +//! Memory operation flags. + +use core::fmt; + +enum FlagBit { + Notrap, + Aligned, + Readonly, +} + +const NAMES: [&str; 3] = ["notrap", "aligned", "readonly"]; + +/// Flags for memory operations like load/store. +/// +/// Each of these flags introduce a limited form of undefined behavior. The flags each enable +/// certain optimizations that need to make additional assumptions. Generally, the semantics of a +/// program does not change when a flag is removed, but adding a flag will. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub struct MemFlags { + bits: u8, +} + +impl MemFlags { + /// Create a new empty set of flags. + pub fn new() -> Self { + Self { bits: 0 } + } + + /// Create a set of flags representing an access from a "trusted" address, meaning it's + /// known to be aligned and non-trapping. + pub fn trusted() -> Self { + let mut result = Self::new(); + result.set_notrap(); + result.set_aligned(); + result + } + + /// Read a flag bit. + fn read(self, bit: FlagBit) -> bool { + self.bits & (1 << bit as usize) != 0 + } + + /// Set a flag bit. + fn set(&mut self, bit: FlagBit) { + self.bits |= 1 << bit as usize + } + + /// Set a flag bit by name. + /// + /// Returns true if the flag was found and set, false for an unknown flag name. + pub fn set_by_name(&mut self, name: &str) -> bool { + match NAMES.iter().position(|&s| s == name) { + Some(bit) => { + self.bits |= 1 << bit; + true + } + None => false, + } + } + + /// Test if the `notrap` flag is set. + /// + /// Normally, trapping is part of the semantics of a load/store operation. If the platform + /// would cause a trap when accessing the effective address, the Cranelift memory operation is + /// also required to trap. + /// + /// The `notrap` flag tells Cranelift that the memory is *accessible*, which means that + /// accesses will not trap. This makes it possible to delete an unused load or a dead store + /// instruction. + pub fn notrap(self) -> bool { + self.read(FlagBit::Notrap) + } + + /// Set the `notrap` flag. + pub fn set_notrap(&mut self) { + self.set(FlagBit::Notrap) + } + + /// Test if the `aligned` flag is set. + /// + /// By default, Cranelift memory instructions work with any unaligned effective address. If the + /// `aligned` flag is set, the instruction is permitted to trap or return a wrong result if the + /// effective address is misaligned. + pub fn aligned(self) -> bool { + self.read(FlagBit::Aligned) + } + + /// Set the `aligned` flag. + pub fn set_aligned(&mut self) { + self.set(FlagBit::Aligned) + } + + /// Test if the `readonly` flag is set. + /// + /// Loads with this flag have no memory dependencies. + /// This results in undefined behavior if the dereferenced memory is mutated at any time + /// between when the function is called and when it is exited. + pub fn readonly(self) -> bool { + self.read(FlagBit::Readonly) + } + + /// Set the `readonly` flag. + pub fn set_readonly(&mut self) { + self.set(FlagBit::Readonly) + } +} + +impl fmt::Display for MemFlags { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (i, n) in NAMES.iter().enumerate() { + if self.bits & (1 << i) != 0 { + write!(f, " {}", n)?; + } + } + Ok(()) + } +} diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs new file mode 100644 index 0000000000..3c222ca9f5 --- /dev/null +++ b/cranelift/codegen/src/ir/mod.rs @@ -0,0 +1,115 @@ +//! Representation of Cranelift IR functions. + +mod builder; +pub mod constant; +pub mod dfg; +pub mod entities; +mod extfunc; +mod extname; +mod framelayout; +pub mod function; +mod globalvalue; +mod heap; +pub mod immediates; +pub mod instructions; +pub mod jumptable; +pub mod layout; +pub(crate) mod libcall; +mod memflags; +mod progpoint; +mod sourceloc; +pub mod stackslot; +mod table; +mod trapcode; +pub mod types; +mod valueloc; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +pub use crate::ir::builder::{ + InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase, ReplaceBuilder, +}; +pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool}; +pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; +pub use crate::ir::entities::{ + Block, Constant, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot, + Table, Value, +}; +pub use crate::ir::extfunc::{ + AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, +}; +pub use crate::ir::extname::ExternalName; +pub use crate::ir::framelayout::{FrameLayout, FrameLayoutChange, FrameLayoutChanges}; +pub use crate::ir::function::{DisplayFunctionAnnotations, Function}; +pub use crate::ir::globalvalue::GlobalValueData; +pub use crate::ir::heap::{HeapData, HeapStyle}; +pub use crate::ir::instructions::{ + InstructionData, Opcode, ValueList, ValueListPool, VariableArgs, +}; +pub use crate::ir::jumptable::JumpTableData; +pub use crate::ir::layout::Layout; +pub use crate::ir::libcall::{get_probestack_funcref, LibCall}; +pub use crate::ir::memflags::MemFlags; +pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint}; +pub use crate::ir::sourceloc::SourceLoc; +pub use crate::ir::stackslot::{StackLayoutInfo, StackSlotData, StackSlotKind, StackSlots}; +pub use crate::ir::table::TableData; +pub use crate::ir::trapcode::TrapCode; +pub use crate::ir::types::Type; +pub use crate::ir::valueloc::{ArgumentLoc, ValueLoc}; +pub use cranelift_codegen_shared::condcodes; + +use crate::binemit; +use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; +use crate::isa; + +/// Map of value locations. +pub type ValueLocations = SecondaryMap; + +/// Map of jump tables. +pub type JumpTables = PrimaryMap; + +/// Map of instruction encodings. +pub type InstEncodings = SecondaryMap; + +/// Code offsets for blocks. +pub type BlockOffsets = SecondaryMap; + +/// Code offsets for Jump Tables. +pub type JumpTableOffsets = SecondaryMap; + +/// Source locations for instructions. +pub type SourceLocs = SecondaryMap; + +/// Marked with a label value. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ValueLabel(u32); +entity_impl!(ValueLabel, "val"); + +/// A label of a Value. +#[derive(Debug, Clone)] +pub struct ValueLabelStart { + /// Source location when it is in effect + pub from: SourceLoc, + + /// The label index. + pub label: ValueLabel, +} + +/// Value label assignements: label starts or value aliases. +#[derive(Debug, Clone)] +pub enum ValueLabelAssignments { + /// Original value labels assigned at transform. + Starts(alloc::vec::Vec), + + /// A value alias to original value. + Alias { + /// Source location when it is in effect + from: SourceLoc, + + /// The label index. + value: Value, + }, +} diff --git a/cranelift/codegen/src/ir/progpoint.rs b/cranelift/codegen/src/ir/progpoint.rs new file mode 100644 index 0000000000..df1a7d14b3 --- /dev/null +++ b/cranelift/codegen/src/ir/progpoint.rs @@ -0,0 +1,164 @@ +//! Program points. + +use crate::entity::EntityRef; +use crate::ir::{Block, Inst, ValueDef}; +use core::cmp; +use core::fmt; +use core::u32; + +/// A `ProgramPoint` represents a position in a function where the live range of an SSA value can +/// begin or end. It can be either: +/// +/// 1. An instruction or +/// 2. An block header. +/// +/// This corresponds more or less to the lines in the textual form of Cranelift IR. +#[derive(PartialEq, Eq, Clone, Copy)] +pub struct ProgramPoint(u32); + +impl From for ProgramPoint { + fn from(inst: Inst) -> Self { + let idx = inst.index(); + debug_assert!(idx < (u32::MAX / 2) as usize); + Self((idx * 2) as u32) + } +} + +impl From for ProgramPoint { + fn from(block: Block) -> Self { + let idx = block.index(); + debug_assert!(idx < (u32::MAX / 2) as usize); + Self((idx * 2 + 1) as u32) + } +} + +impl From for ProgramPoint { + fn from(def: ValueDef) -> Self { + match def { + ValueDef::Result(inst, _) => inst.into(), + ValueDef::Param(block, _) => block.into(), + } + } +} + +/// An expanded program point directly exposes the variants, but takes twice the space to +/// represent. +#[derive(PartialEq, Eq, Clone, Copy)] +pub enum ExpandedProgramPoint { + /// An instruction in the function. + Inst(Inst), + /// An block header. + Block(Block), +} + +impl ExpandedProgramPoint { + /// Get the instruction we know is inside. + pub fn unwrap_inst(self) -> Inst { + match self { + Self::Inst(x) => x, + Self::Block(x) => panic!("expected inst: {}", x), + } + } +} + +impl From for ExpandedProgramPoint { + fn from(inst: Inst) -> Self { + Self::Inst(inst) + } +} + +impl From for ExpandedProgramPoint { + fn from(block: Block) -> Self { + Self::Block(block) + } +} + +impl From for ExpandedProgramPoint { + fn from(def: ValueDef) -> Self { + match def { + ValueDef::Result(inst, _) => inst.into(), + ValueDef::Param(block, _) => block.into(), + } + } +} + +impl From for ExpandedProgramPoint { + fn from(pp: ProgramPoint) -> Self { + if pp.0 & 1 == 0 { + Self::Inst(Inst::from_u32(pp.0 / 2)) + } else { + Self::Block(Block::from_u32(pp.0 / 2)) + } + } +} + +impl fmt::Display for ExpandedProgramPoint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Inst(x) => write!(f, "{}", x), + Self::Block(x) => write!(f, "{}", x), + } + } +} + +impl fmt::Display for ProgramPoint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let epp: ExpandedProgramPoint = (*self).into(); + epp.fmt(f) + } +} + +impl fmt::Debug for ExpandedProgramPoint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ExpandedProgramPoint({})", self) + } +} + +impl fmt::Debug for ProgramPoint { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ProgramPoint({})", self) + } +} + +/// Context for ordering program points. +/// +/// `ProgramPoint` objects don't carry enough information to be ordered independently, they need a +/// context providing the program order. +pub trait ProgramOrder { + /// Compare the program points `a` and `b` relative to this program order. + /// + /// Return `Less` if `a` appears in the program before `b`. + /// + /// This is declared as a generic such that it can be called with `Inst` and `Block` arguments + /// directly. Depending on the implementation, there is a good chance performance will be + /// improved for those cases where the type of either argument is known statically. + fn cmp(&self, a: A, b: B) -> cmp::Ordering + where + A: Into, + B: Into; + + /// Is the range from `inst` to `block` just the gap between consecutive blocks? + /// + /// This returns true if `inst` is the terminator in the block immediately before `block`. + fn is_block_gap(&self, inst: Inst, block: Block) -> bool; +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + use crate::ir::{Block, Inst}; + use alloc::string::ToString; + + #[test] + fn convert() { + let i5 = Inst::new(5); + let b3 = Block::new(3); + + let pp1: ProgramPoint = i5.into(); + let pp2: ProgramPoint = b3.into(); + + assert_eq!(pp1.to_string(), "inst5"); + assert_eq!(pp2.to_string(), "block3"); + } +} diff --git a/cranelift/codegen/src/ir/sourceloc.rs b/cranelift/codegen/src/ir/sourceloc.rs new file mode 100644 index 0000000000..ccab62f89b --- /dev/null +++ b/cranelift/codegen/src/ir/sourceloc.rs @@ -0,0 +1,66 @@ +//! Source locations. +//! +//! Cranelift tracks the original source location of each instruction, and preserves the source +//! location when instructions are transformed. + +use core::fmt; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// A source location. +/// +/// This is an opaque 32-bit number attached to each Cranelift IR instruction. Cranelift does not +/// interpret source locations in any way, they are simply preserved from the input to the output. +/// +/// The default source location uses the all-ones bit pattern `!0`. It is used for instructions +/// that can't be given a real source location. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct SourceLoc(u32); + +impl SourceLoc { + /// Create a new source location with the given bits. + pub fn new(bits: u32) -> Self { + Self(bits) + } + + /// Is this the default source location? + pub fn is_default(self) -> bool { + self == Default::default() + } + + /// Read the bits of this source location. + pub fn bits(self) -> u32 { + self.0 + } +} + +impl Default for SourceLoc { + fn default() -> Self { + Self(!0) + } +} + +impl fmt::Display for SourceLoc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_default() { + write!(f, "@-") + } else { + write!(f, "@{:04x}", self.0) + } + } +} + +#[cfg(test)] +mod tests { + use crate::ir::SourceLoc; + use alloc::string::ToString; + + #[test] + fn display() { + assert_eq!(SourceLoc::default().to_string(), "@-"); + assert_eq!(SourceLoc::new(0).to_string(), "@0000"); + assert_eq!(SourceLoc::new(16).to_string(), "@0010"); + assert_eq!(SourceLoc::new(0xabcdef).to_string(), "@abcdef"); + } +} diff --git a/cranelift/codegen/src/ir/stackslot.rs b/cranelift/codegen/src/ir/stackslot.rs new file mode 100644 index 0000000000..5bb70d1f0e --- /dev/null +++ b/cranelift/codegen/src/ir/stackslot.rs @@ -0,0 +1,450 @@ +//! Stack slots. +//! +//! The `StackSlotData` struct keeps track of a single stack slot in a function. +//! + +use crate::entity::{Iter, IterMut, Keys, PrimaryMap}; +use crate::ir::{StackSlot, Type}; +use crate::packed_option::PackedOption; +use alloc::vec::Vec; +use core::cmp; +use core::fmt; +use core::ops::{Index, IndexMut}; +use core::slice; +use core::str::FromStr; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// The size of an object on the stack, or the size of a stack frame. +/// +/// We don't use `usize` to represent object sizes on the target platform because Cranelift supports +/// cross-compilation, and `usize` is a type that depends on the host platform, not the target +/// platform. +pub type StackSize = u32; + +/// A stack offset. +/// +/// The location of a stack offset relative to a stack pointer or frame pointer. +pub type StackOffset = i32; + +/// The minimum size of a spill slot in bytes. +/// +/// ISA implementations are allowed to assume that small types like `b1` and `i8` get a full 4-byte +/// spill slot. +const MIN_SPILL_SLOT_SIZE: StackSize = 4; + +/// Get the spill slot size to use for `ty`. +fn spill_size(ty: Type) -> StackSize { + cmp::max(MIN_SPILL_SLOT_SIZE, ty.bytes()) +} + +/// The kind of a stack slot. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum StackSlotKind { + /// A spill slot. This is a stack slot created by the register allocator. + SpillSlot, + + /// An explicit stack slot. This is a chunk of stack memory for use by the `stack_load` + /// and `stack_store` instructions. + ExplicitSlot, + + /// An incoming function argument. + /// + /// If the current function has more arguments than fits in registers, the remaining arguments + /// are passed on the stack by the caller. These incoming arguments are represented as SSA + /// values assigned to incoming stack slots. + IncomingArg, + + /// An outgoing function argument. + /// + /// When preparing to call a function whose arguments don't fit in registers, outgoing argument + /// stack slots are used to represent individual arguments in the outgoing call frame. These + /// stack slots are only valid while setting up a call. + OutgoingArg, + + /// Space allocated in the caller's frame for the callee's return values + /// that are passed out via return pointer. + /// + /// If there are more return values than registers available for the callee's calling + /// convention, or the return value is larger than the available registers' space, then we + /// allocate stack space in this frame and pass a pointer to the callee, which then writes its + /// return values into this space. + StructReturnSlot, + + /// An emergency spill slot. + /// + /// Emergency slots are allocated late when the register's constraint solver needs extra space + /// to shuffle registers around. They are only used briefly, and can be reused. + EmergencySlot, +} + +impl FromStr for StackSlotKind { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::StackSlotKind::*; + match s { + "explicit_slot" => Ok(ExplicitSlot), + "spill_slot" => Ok(SpillSlot), + "incoming_arg" => Ok(IncomingArg), + "outgoing_arg" => Ok(OutgoingArg), + "sret_slot" => Ok(StructReturnSlot), + "emergency_slot" => Ok(EmergencySlot), + _ => Err(()), + } + } +} + +impl fmt::Display for StackSlotKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::StackSlotKind::*; + f.write_str(match *self { + ExplicitSlot => "explicit_slot", + SpillSlot => "spill_slot", + IncomingArg => "incoming_arg", + OutgoingArg => "outgoing_arg", + StructReturnSlot => "sret_slot", + EmergencySlot => "emergency_slot", + }) + } +} + +/// Contents of a stack slot. +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct StackSlotData { + /// The kind of stack slot. + pub kind: StackSlotKind, + + /// Size of stack slot in bytes. + pub size: StackSize, + + /// Offset of stack slot relative to the stack pointer in the caller. + /// + /// On x86, the base address is the stack pointer *before* the return address was pushed. On + /// RISC ISAs, the base address is the value of the stack pointer on entry to the function. + /// + /// For `OutgoingArg` stack slots, the offset is relative to the current function's stack + /// pointer immediately before the call. + pub offset: Option, +} + +impl StackSlotData { + /// Create a stack slot with the specified byte size. + pub fn new(kind: StackSlotKind, size: StackSize) -> Self { + Self { + kind, + size, + offset: None, + } + } + + /// Get the alignment in bytes of this stack slot given the stack pointer alignment. + pub fn alignment(&self, max_align: StackSize) -> StackSize { + debug_assert!(max_align.is_power_of_two()); + // We want to find the largest power of two that divides both `self.size` and `max_align`. + // That is the same as isolating the rightmost bit in `x`. + let x = self.size | max_align; + // C.f. Hacker's delight. + x & x.wrapping_neg() + } +} + +impl fmt::Display for StackSlotData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.kind, self.size)?; + if let Some(offset) = self.offset { + write!(f, ", offset {}", offset)?; + } + Ok(()) + } +} + +/// Stack frame layout information. +/// +/// This is computed by the `layout_stack()` method. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct StackLayoutInfo { + /// The total size of the stack frame. + /// + /// This is the distance from the stack pointer in the current function to the stack pointer in + /// the calling function, so it includes a pushed return address as well as space for outgoing + /// call arguments. + pub frame_size: StackSize, + + /// The total size of the stack frame for inbound arguments pushed by the caller. + pub inbound_args_size: StackSize, +} + +/// Stack frame manager. +/// +/// Keep track of all the stack slots used by a function. +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct StackSlots { + /// All allocated stack slots. + slots: PrimaryMap, + + /// All the outgoing stack slots, ordered by offset. + outgoing: Vec, + + /// All the emergency slots. + emergency: Vec, + + /// Layout information computed from `layout_stack`. + pub layout_info: Option, +} + +/// Stack slot manager functions that behave mostly like an entity map. +impl StackSlots { + /// Create an empty stack slot manager. + pub fn new() -> Self { + Self { + slots: PrimaryMap::new(), + outgoing: Vec::new(), + emergency: Vec::new(), + layout_info: None, + } + } + + /// Clear out everything. + pub fn clear(&mut self) { + self.slots.clear(); + self.outgoing.clear(); + self.emergency.clear(); + self.layout_info = None; + } + + /// Allocate a new stack slot. + /// + /// This function should be primarily used by the text format parser. There are more convenient + /// functions for creating specific kinds of stack slots below. + pub fn push(&mut self, data: StackSlotData) -> StackSlot { + self.slots.push(data) + } + + /// Check if `ss` is a valid stack slot reference. + pub fn is_valid(&self, ss: StackSlot) -> bool { + self.slots.is_valid(ss) + } + + /// Get an iterator over all the stack slot keys. + pub fn iter(&self) -> Iter { + self.slots.iter() + } + + /// Get an iterator over all the stack slot keys, mutable edition. + pub fn iter_mut(&mut self) -> IterMut { + self.slots.iter_mut() + } + + /// Get an iterator over all the stack slot records. + pub fn values(&self) -> slice::Iter { + self.slots.values() + } + + /// Get an iterator over all the stack slot records, mutable edition. + pub fn values_mut(&mut self) -> slice::IterMut { + self.slots.values_mut() + } + + /// Get an iterator over all the stack slot keys. + pub fn keys(&self) -> Keys { + self.slots.keys() + } + + /// Get a reference to the next stack slot that would be created by `push()`. + /// + /// This should just be used by the parser. + pub fn next_key(&self) -> StackSlot { + self.slots.next_key() + } +} + +impl Index for StackSlots { + type Output = StackSlotData; + + fn index(&self, ss: StackSlot) -> &StackSlotData { + &self.slots[ss] + } +} + +impl IndexMut for StackSlots { + fn index_mut(&mut self, ss: StackSlot) -> &mut StackSlotData { + &mut self.slots[ss] + } +} + +/// Higher-level stack frame manipulation functions. +impl StackSlots { + /// Create a new spill slot for spilling values of type `ty`. + pub fn make_spill_slot(&mut self, ty: Type) -> StackSlot { + self.push(StackSlotData::new(StackSlotKind::SpillSlot, spill_size(ty))) + } + + /// Create a stack slot representing an incoming function argument. + pub fn make_incoming_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot { + let mut data = StackSlotData::new(StackSlotKind::IncomingArg, ty.bytes()); + debug_assert!(offset <= StackOffset::max_value() - data.size as StackOffset); + data.offset = Some(offset); + self.push(data) + } + + /// Get a stack slot representing an outgoing argument. + /// + /// This may create a new stack slot, or reuse an existing outgoing stack slot with the + /// requested offset and size. + /// + /// The requested offset is relative to this function's stack pointer immediately before making + /// the call. + pub fn get_outgoing_arg(&mut self, ty: Type, offset: StackOffset) -> StackSlot { + let size = ty.bytes(); + + // Look for an existing outgoing stack slot with the same offset and size. + let inspos = match self.outgoing.binary_search_by_key(&(offset, size), |&ss| { + (self[ss].offset.unwrap(), self[ss].size) + }) { + Ok(idx) => return self.outgoing[idx], + Err(idx) => idx, + }; + + // No existing slot found. Make one and insert it into `outgoing`. + let mut data = StackSlotData::new(StackSlotKind::OutgoingArg, size); + debug_assert!(offset <= StackOffset::max_value() - size as StackOffset); + data.offset = Some(offset); + let ss = self.slots.push(data); + self.outgoing.insert(inspos, ss); + ss + } + + /// Get an emergency spill slot that can be used to store a `ty` value. + /// + /// This may allocate a new slot, or it may reuse an existing emergency spill slot, excluding + /// any slots in the `in_use` list. + pub fn get_emergency_slot( + &mut self, + ty: Type, + in_use: &[PackedOption], + ) -> StackSlot { + let size = spill_size(ty); + + // Find the smallest existing slot that can fit the type. + if let Some(&ss) = self + .emergency + .iter() + .filter(|&&ss| self[ss].size >= size && !in_use.contains(&ss.into())) + .min_by_key(|&&ss| self[ss].size) + { + return ss; + } + + // Alternatively, use the largest available slot and make it larger. + if let Some(&ss) = self + .emergency + .iter() + .filter(|&&ss| !in_use.contains(&ss.into())) + .max_by_key(|&&ss| self[ss].size) + { + self.slots[ss].size = size; + return ss; + } + + // No existing slot found. Make one and insert it into `emergency`. + let data = StackSlotData::new(StackSlotKind::EmergencySlot, size); + let ss = self.slots.push(data); + self.emergency.push(ss); + ss + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ir::types; + use crate::ir::Function; + use alloc::string::ToString; + + #[test] + fn stack_slot() { + let mut func = Function::new(); + + let ss0 = func.create_stack_slot(StackSlotData::new(StackSlotKind::IncomingArg, 4)); + let ss1 = func.create_stack_slot(StackSlotData::new(StackSlotKind::SpillSlot, 8)); + assert_eq!(ss0.to_string(), "ss0"); + assert_eq!(ss1.to_string(), "ss1"); + + assert_eq!(func.stack_slots[ss0].size, 4); + assert_eq!(func.stack_slots[ss1].size, 8); + + assert_eq!(func.stack_slots[ss0].to_string(), "incoming_arg 4"); + assert_eq!(func.stack_slots[ss1].to_string(), "spill_slot 8"); + } + + #[test] + fn outgoing() { + let mut sss = StackSlots::new(); + + let ss0 = sss.get_outgoing_arg(types::I32, 8); + let ss1 = sss.get_outgoing_arg(types::I32, 4); + let ss2 = sss.get_outgoing_arg(types::I64, 8); + + assert_eq!(sss[ss0].offset, Some(8)); + assert_eq!(sss[ss0].size, 4); + + assert_eq!(sss[ss1].offset, Some(4)); + assert_eq!(sss[ss1].size, 4); + + assert_eq!(sss[ss2].offset, Some(8)); + assert_eq!(sss[ss2].size, 8); + + assert_eq!(sss.get_outgoing_arg(types::I32, 8), ss0); + assert_eq!(sss.get_outgoing_arg(types::I32, 4), ss1); + assert_eq!(sss.get_outgoing_arg(types::I64, 8), ss2); + } + + #[test] + fn alignment() { + let slot = StackSlotData::new(StackSlotKind::SpillSlot, 8); + + assert_eq!(slot.alignment(4), 4); + assert_eq!(slot.alignment(8), 8); + assert_eq!(slot.alignment(16), 8); + + let slot2 = StackSlotData::new(StackSlotKind::ExplicitSlot, 24); + + assert_eq!(slot2.alignment(4), 4); + assert_eq!(slot2.alignment(8), 8); + assert_eq!(slot2.alignment(16), 8); + assert_eq!(slot2.alignment(32), 8); + } + + #[test] + fn emergency() { + let mut sss = StackSlots::new(); + + let ss0 = sss.get_emergency_slot(types::I32, &[]); + assert_eq!(sss[ss0].size, 4); + + // When a smaller size is requested, we should simply get the same slot back. + assert_eq!(sss.get_emergency_slot(types::I8, &[]), ss0); + assert_eq!(sss[ss0].size, 4); + assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss0); + assert_eq!(sss[ss0].size, 4); + + // Ask for a larger size and the slot should grow. + assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0); + assert_eq!(sss[ss0].size, 8); + + // When one slot is in use, we should get a new one. + let ss1 = sss.get_emergency_slot(types::I32, &[None.into(), ss0.into()]); + assert_eq!(sss[ss0].size, 8); + assert_eq!(sss[ss1].size, 4); + + // Now we should get the smallest fit of the two available slots. + assert_eq!(sss.get_emergency_slot(types::F32, &[]), ss1); + assert_eq!(sss.get_emergency_slot(types::F64, &[]), ss0); + } +} diff --git a/cranelift/codegen/src/ir/table.rs b/cranelift/codegen/src/ir/table.rs new file mode 100644 index 0000000000..9e436cca64 --- /dev/null +++ b/cranelift/codegen/src/ir/table.rs @@ -0,0 +1,36 @@ +//! Tables. + +use crate::ir::immediates::Uimm64; +use crate::ir::{GlobalValue, Type}; +use core::fmt; + +/// Information about a table declaration. +#[derive(Clone)] +pub struct TableData { + /// Global value giving the address of the start of the table. + pub base_gv: GlobalValue, + + /// Guaranteed minimum table size in elements. Table accesses before `min_size` don't need + /// bounds checking. + pub min_size: Uimm64, + + /// Global value giving the current bound of the table, in elements. + pub bound_gv: GlobalValue, + + /// The size of a table element, in bytes. + pub element_size: Uimm64, + + /// The index type for the table. + pub index_type: Type, +} + +impl fmt::Display for TableData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("dynamic")?; + write!( + f, + " {}, min {}, bound {}, element_size {}, index_type {}", + self.base_gv, self.min_size, self.bound_gv, self.element_size, self.index_type + ) + } +} diff --git a/cranelift/codegen/src/ir/trapcode.rs b/cranelift/codegen/src/ir/trapcode.rs new file mode 100644 index 0000000000..3f8ffe3fb5 --- /dev/null +++ b/cranelift/codegen/src/ir/trapcode.rs @@ -0,0 +1,137 @@ +//! Trap codes describing the reason for a trap. + +use core::fmt::{self, Display, Formatter}; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// A trap code describing the reason for a trap. +/// +/// All trap instructions have an explicit trap code. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum TrapCode { + /// The current stack space was exhausted. + /// + /// On some platforms, a stack overflow may also be indicated by a segmentation fault from the + /// stack guard page. + StackOverflow, + + /// A `heap_addr` instruction detected an out-of-bounds error. + /// + /// Note that not all out-of-bounds heap accesses are reported this way; + /// some are detected by a segmentation fault on the heap unmapped or + /// offset-guard pages. + HeapOutOfBounds, + + /// A `table_addr` instruction detected an out-of-bounds error. + TableOutOfBounds, + + /// Other bounds checking error. + OutOfBounds, + + /// Indirect call to a null table entry. + IndirectCallToNull, + + /// Signature mismatch on indirect call. + BadSignature, + + /// An integer arithmetic operation caused an overflow. + IntegerOverflow, + + /// An integer division by zero. + IntegerDivisionByZero, + + /// Failed float-to-int conversion. + BadConversionToInteger, + + /// Code that was supposed to have been unreachable was reached. + UnreachableCodeReached, + + /// Execution has potentially run too long and may be interrupted. + /// This trap is resumable. + Interrupt, + + /// A user-defined trap code. + User(u16), +} + +impl Display for TrapCode { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + use self::TrapCode::*; + let identifier = match *self { + StackOverflow => "stk_ovf", + HeapOutOfBounds => "heap_oob", + TableOutOfBounds => "table_oob", + OutOfBounds => "oob", + IndirectCallToNull => "icall_null", + BadSignature => "bad_sig", + IntegerOverflow => "int_ovf", + IntegerDivisionByZero => "int_divz", + BadConversionToInteger => "bad_toint", + UnreachableCodeReached => "unreachable", + Interrupt => "interrupt", + User(x) => return write!(f, "user{}", x), + }; + f.write_str(identifier) + } +} + +impl FromStr for TrapCode { + type Err = (); + + fn from_str(s: &str) -> Result { + use self::TrapCode::*; + match s { + "stk_ovf" => Ok(StackOverflow), + "heap_oob" => Ok(HeapOutOfBounds), + "table_oob" => Ok(TableOutOfBounds), + "oob" => Ok(OutOfBounds), + "icall_null" => Ok(IndirectCallToNull), + "bad_sig" => Ok(BadSignature), + "int_ovf" => Ok(IntegerOverflow), + "int_divz" => Ok(IntegerDivisionByZero), + "bad_toint" => Ok(BadConversionToInteger), + "unreachable" => Ok(UnreachableCodeReached), + "interrupt" => Ok(Interrupt), + _ if s.starts_with("user") => s[4..].parse().map(User).map_err(|_| ()), + _ => Err(()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + // Everything but user-defined codes. + const CODES: [TrapCode; 11] = [ + TrapCode::StackOverflow, + TrapCode::HeapOutOfBounds, + TrapCode::TableOutOfBounds, + TrapCode::OutOfBounds, + TrapCode::IndirectCallToNull, + TrapCode::BadSignature, + TrapCode::IntegerOverflow, + TrapCode::IntegerDivisionByZero, + TrapCode::BadConversionToInteger, + TrapCode::UnreachableCodeReached, + TrapCode::Interrupt, + ]; + + #[test] + fn display() { + for r in &CODES { + let tc = *r; + assert_eq!(tc.to_string().parse(), Ok(tc)); + } + assert_eq!("bogus".parse::(), Err(())); + + assert_eq!(TrapCode::User(17).to_string(), "user17"); + assert_eq!("user22".parse(), Ok(TrapCode::User(22))); + assert_eq!("user".parse::(), Err(())); + assert_eq!("user-1".parse::(), Err(())); + assert_eq!("users".parse::(), Err(())); + } +} diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs new file mode 100644 index 0000000000..1baca567e1 --- /dev/null +++ b/cranelift/codegen/src/ir/types.rs @@ -0,0 +1,508 @@ +//! Common types for the Cranelift code generator. + +use core::default::Default; +use core::fmt::{self, Debug, Display, Formatter}; +use cranelift_codegen_shared::constants; +use target_lexicon::{PointerWidth, Triple}; + +/// The type of an SSA value. +/// +/// The `INVALID` type isn't a real type, and is used as a placeholder in the IR where a type +/// field is present put no type is needed, such as the controlling type variable for a +/// non-polymorphic instruction. +/// +/// Basic integer types: `I8`, `I16`, `I32`, `I64`, and `I128`. These types are sign-agnostic. +/// +/// Basic floating point types: `F32` and `F64`. IEEE single and double precision. +/// +/// Boolean types: `B1`, `B8`, `B16`, `B32`, `B64`, and `B128`. These all encode 'true' or 'false'. The +/// larger types use redundant bits. +/// +/// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type. +/// +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Type(u8); + +/// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector. +pub const INVALID: Type = Type(0); + +// Include code generated by `cranelift-codegen/meta/gen_types.rs`. This file contains constant +// definitions for all the scalar types as well as common vector types for 64, 128, 256, and +// 512-bit SIMD vectors. +include!(concat!(env!("OUT_DIR"), "/types.rs")); + +impl Type { + /// Get the lane type of this SIMD vector type. + /// + /// A lane type is the same as a SIMD vector type with one lane, so it returns itself. + pub fn lane_type(self) -> Self { + if self.0 < constants::VECTOR_BASE { + self + } else { + Self(constants::LANE_BASE | (self.0 & 0x0f)) + } + } + + /// The type transformation that returns the lane type of a type variable; it is just a + /// renaming of lane_type() to be used in context where we think in terms of type variable + /// transformations. + pub fn lane_of(self) -> Self { + self.lane_type() + } + + /// Get log_2 of the number of bits in a lane. + pub fn log2_lane_bits(self) -> u8 { + match self.lane_type() { + B1 => 0, + B8 | I8 => 3, + B16 | I16 => 4, + B32 | I32 | F32 | R32 => 5, + B64 | I64 | F64 | R64 => 6, + B128 | I128 => 7, + _ => 0, + } + } + + /// Get the number of bits in a lane. + pub fn lane_bits(self) -> u8 { + match self.lane_type() { + B1 => 1, + B8 | I8 => 8, + B16 | I16 => 16, + B32 | I32 | F32 | R32 => 32, + B64 | I64 | F64 | R64 => 64, + B128 | I128 => 128, + _ => 0, + } + } + + /// Get an integer type with the requested number of bits. + pub fn int(bits: u16) -> Option { + match bits { + 8 => Some(I8), + 16 => Some(I16), + 32 => Some(I32), + 64 => Some(I64), + 128 => Some(I128), + _ => None, + } + } + + /// Get a type with the same number of lanes as `self`, but using `lane` as the lane type. + fn replace_lanes(self, lane: Self) -> Self { + debug_assert!(lane.is_lane() && !self.is_special()); + Self((lane.0 & 0x0f) | (self.0 & 0xf0)) + } + + /// Get a type with the same number of lanes as this type, but with the lanes replaced by + /// booleans of the same size. + /// + /// Lane types are treated as vectors with one lane, so they are converted to the multi-bit + /// boolean types. + pub fn as_bool_pedantic(self) -> Self { + // Replace the low 4 bits with the boolean version, preserve the high 4 bits. + self.replace_lanes(match self.lane_type() { + B8 | I8 => B8, + B16 | I16 => B16, + B32 | I32 | F32 => B32, + B64 | I64 | F64 => B64, + R32 | R64 => panic!("Reference types should not convert to bool"), + B128 | I128 => B128, + _ => B1, + }) + } + + /// Get a type with the same number of lanes as this type, but with the lanes replaced by + /// booleans of the same size. + /// + /// Scalar types are all converted to `b1` which is usually what you want. + pub fn as_bool(self) -> Self { + if !self.is_vector() { + B1 + } else { + self.as_bool_pedantic() + } + } + + /// Get a type with the same number of lanes as this type, but with lanes that are half the + /// number of bits. + pub fn half_width(self) -> Option { + Some(self.replace_lanes(match self.lane_type() { + I16 => I8, + I32 => I16, + I64 => I32, + I128 => I64, + F64 => F32, + B16 => B8, + B32 => B16, + B64 => B32, + B128 => B64, + _ => return None, + })) + } + + /// Get a type with the same number of lanes as this type, but with lanes that are twice the + /// number of bits. + pub fn double_width(self) -> Option { + Some(self.replace_lanes(match self.lane_type() { + I8 => I16, + I16 => I32, + I32 => I64, + I64 => I128, + F32 => F64, + B8 => B16, + B16 => B32, + B32 => B64, + B64 => B128, + _ => return None, + })) + } + + /// Is this the INVALID type? + pub fn is_invalid(self) -> bool { + self == INVALID + } + + /// Is this a special type? + pub fn is_special(self) -> bool { + self.0 < constants::LANE_BASE + } + + /// Is this a lane type? + /// + /// This is a scalar type that can also appear as the lane type of a SIMD vector. + pub fn is_lane(self) -> bool { + constants::LANE_BASE <= self.0 && self.0 < constants::VECTOR_BASE + } + + /// Is this a SIMD vector type? + /// + /// A vector type has 2 or more lanes. + pub fn is_vector(self) -> bool { + self.0 >= constants::VECTOR_BASE + } + + /// Is this a scalar boolean type? + pub fn is_bool(self) -> bool { + match self { + B1 | B8 | B16 | B32 | B64 | B128 => true, + _ => false, + } + } + + /// Is this a scalar integer type? + pub fn is_int(self) -> bool { + match self { + I8 | I16 | I32 | I64 | I128 => true, + _ => false, + } + } + + /// Is this a scalar floating point type? + pub fn is_float(self) -> bool { + match self { + F32 | F64 => true, + _ => false, + } + } + + /// Is this a CPU flags type? + pub fn is_flags(self) -> bool { + match self { + IFLAGS | FFLAGS => true, + _ => false, + } + } + + /// Is this a ref type? + pub fn is_ref(self) -> bool { + match self { + R32 | R64 => true, + _ => false, + } + } + + /// Get log_2 of the number of lanes in this SIMD vector type. + /// + /// All SIMD types have a lane count that is a power of two and no larger than 256, so this + /// will be a number in the range 0-8. + /// + /// A scalar type is the same as a SIMD vector type with one lane, so it returns 0. + pub fn log2_lane_count(self) -> u8 { + self.0.saturating_sub(constants::LANE_BASE) >> 4 + } + + /// Get the number of lanes in this SIMD vector type. + /// + /// A scalar type is the same as a SIMD vector type with one lane, so it returns 1. + pub fn lane_count(self) -> u16 { + 1 << self.log2_lane_count() + } + + /// Get the total number of bits used to represent this type. + pub fn bits(self) -> u16 { + u16::from(self.lane_bits()) * self.lane_count() + } + + /// Get the number of bytes used to store this type in memory. + pub fn bytes(self) -> u32 { + (u32::from(self.bits()) + 7) / 8 + } + + /// Get a SIMD vector type with `n` times more lanes than this one. + /// + /// If this is a scalar type, this produces a SIMD type with this as a lane type and `n` lanes. + /// + /// If this is already a SIMD vector type, this produces a SIMD vector type with `n * + /// self.lane_count()` lanes. + pub fn by(self, n: u16) -> Option { + if self.lane_bits() == 0 || !n.is_power_of_two() { + return None; + } + let log2_lanes: u32 = n.trailing_zeros(); + let new_type = u32::from(self.0) + (log2_lanes << 4); + if new_type < 0x100 { + Some(Self(new_type as u8)) + } else { + None + } + } + + /// Get a SIMD vector with half the number of lanes. + /// + /// There is no `double_vector()` method. Use `t.by(2)` instead. + pub fn half_vector(self) -> Option { + if self.is_vector() { + Some(Self(self.0 - 0x10)) + } else { + None + } + } + + /// Index of this type, for use with hash tables etc. + pub fn index(self) -> usize { + usize::from(self.0) + } + + /// True iff: + /// + /// 1. `self.lane_count() == other.lane_count()` and + /// 2. `self.lane_bits() >= other.lane_bits()` + pub fn wider_or_equal(self, other: Self) -> bool { + self.lane_count() == other.lane_count() && self.lane_bits() >= other.lane_bits() + } + + /// Return the pointer type for the given target triple. + pub fn triple_pointer_type(triple: &Triple) -> Self { + match triple.pointer_width() { + Ok(PointerWidth::U16) => I16, + Ok(PointerWidth::U32) => I32, + Ok(PointerWidth::U64) => I64, + Err(()) => panic!("unable to determine architecture pointer width"), + } + } +} + +impl Display for Type { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if self.is_bool() { + write!(f, "b{}", self.lane_bits()) + } else if self.is_int() { + write!(f, "i{}", self.lane_bits()) + } else if self.is_float() { + write!(f, "f{}", self.lane_bits()) + } else if self.is_vector() { + write!(f, "{}x{}", self.lane_type(), self.lane_count()) + } else if self.is_ref() { + write!(f, "r{}", self.lane_bits()) + } else { + f.write_str(match *self { + IFLAGS => "iflags", + FFLAGS => "fflags", + INVALID => panic!("INVALID encountered"), + _ => panic!("Unknown Type(0x{:x})", self.0), + }) + } + } +} + +impl Debug for Type { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if self.is_bool() { + write!(f, "types::B{}", self.lane_bits()) + } else if self.is_int() { + write!(f, "types::I{}", self.lane_bits()) + } else if self.is_float() { + write!(f, "types::F{}", self.lane_bits()) + } else if self.is_vector() { + write!(f, "{:?}X{}", self.lane_type(), self.lane_count()) + } else if self.is_ref() { + write!(f, "types::R{}", self.lane_bits()) + } else { + match *self { + INVALID => write!(f, "types::INVALID"), + IFLAGS => write!(f, "types::IFLAGS"), + FFLAGS => write!(f, "types::FFLAGS"), + _ => write!(f, "Type(0x{:x})", self.0), + } + } + } +} + +impl Default for Type { + fn default() -> Self { + INVALID + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn basic_scalars() { + assert_eq!(INVALID, INVALID.lane_type()); + assert_eq!(0, INVALID.bits()); + assert_eq!(IFLAGS, IFLAGS.lane_type()); + assert_eq!(0, IFLAGS.bits()); + assert_eq!(FFLAGS, FFLAGS.lane_type()); + assert_eq!(0, FFLAGS.bits()); + assert_eq!(B1, B1.lane_type()); + assert_eq!(B8, B8.lane_type()); + assert_eq!(B16, B16.lane_type()); + assert_eq!(B32, B32.lane_type()); + assert_eq!(B64, B64.lane_type()); + assert_eq!(B128, B128.lane_type()); + assert_eq!(I8, I8.lane_type()); + assert_eq!(I16, I16.lane_type()); + assert_eq!(I32, I32.lane_type()); + assert_eq!(I64, I64.lane_type()); + assert_eq!(I128, I128.lane_type()); + assert_eq!(F32, F32.lane_type()); + assert_eq!(F64, F64.lane_type()); + assert_eq!(B1, B1.by(8).unwrap().lane_type()); + assert_eq!(I32, I32X4.lane_type()); + assert_eq!(F64, F64X2.lane_type()); + assert_eq!(R32, R32.lane_type()); + assert_eq!(R64, R64.lane_type()); + + assert_eq!(INVALID.lane_bits(), 0); + assert_eq!(IFLAGS.lane_bits(), 0); + assert_eq!(FFLAGS.lane_bits(), 0); + assert_eq!(B1.lane_bits(), 1); + assert_eq!(B8.lane_bits(), 8); + assert_eq!(B16.lane_bits(), 16); + assert_eq!(B32.lane_bits(), 32); + assert_eq!(B64.lane_bits(), 64); + assert_eq!(B128.lane_bits(), 128); + assert_eq!(I8.lane_bits(), 8); + assert_eq!(I16.lane_bits(), 16); + assert_eq!(I32.lane_bits(), 32); + assert_eq!(I64.lane_bits(), 64); + assert_eq!(I128.lane_bits(), 128); + assert_eq!(F32.lane_bits(), 32); + assert_eq!(F64.lane_bits(), 64); + assert_eq!(R32.lane_bits(), 32); + assert_eq!(R64.lane_bits(), 64); + } + + #[test] + fn typevar_functions() { + assert_eq!(INVALID.half_width(), None); + assert_eq!(INVALID.half_width(), None); + assert_eq!(FFLAGS.half_width(), None); + assert_eq!(B1.half_width(), None); + assert_eq!(B8.half_width(), None); + assert_eq!(B16.half_width(), Some(B8)); + assert_eq!(B32.half_width(), Some(B16)); + assert_eq!(B64.half_width(), Some(B32)); + assert_eq!(B128.half_width(), Some(B64)); + assert_eq!(I8.half_width(), None); + assert_eq!(I16.half_width(), Some(I8)); + assert_eq!(I32.half_width(), Some(I16)); + assert_eq!(I32X4.half_width(), Some(I16X4)); + assert_eq!(I64.half_width(), Some(I32)); + assert_eq!(I128.half_width(), Some(I64)); + assert_eq!(F32.half_width(), None); + assert_eq!(F64.half_width(), Some(F32)); + + assert_eq!(INVALID.double_width(), None); + assert_eq!(IFLAGS.double_width(), None); + assert_eq!(FFLAGS.double_width(), None); + assert_eq!(B1.double_width(), None); + assert_eq!(B8.double_width(), Some(B16)); + assert_eq!(B16.double_width(), Some(B32)); + assert_eq!(B32.double_width(), Some(B64)); + assert_eq!(B64.double_width(), Some(B128)); + assert_eq!(B128.double_width(), None); + assert_eq!(I8.double_width(), Some(I16)); + assert_eq!(I16.double_width(), Some(I32)); + assert_eq!(I32.double_width(), Some(I64)); + assert_eq!(I32X4.double_width(), Some(I64X4)); + assert_eq!(I64.double_width(), Some(I128)); + assert_eq!(I128.double_width(), None); + assert_eq!(F32.double_width(), Some(F64)); + assert_eq!(F64.double_width(), None); + } + + #[test] + fn vectors() { + let big = F64.by(256).unwrap(); + assert_eq!(big.lane_bits(), 64); + assert_eq!(big.lane_count(), 256); + assert_eq!(big.bits(), 64 * 256); + + assert_eq!(big.half_vector().unwrap().to_string(), "f64x128"); + assert_eq!(B1.by(2).unwrap().half_vector().unwrap().to_string(), "b1"); + assert_eq!(I32.half_vector(), None); + assert_eq!(INVALID.half_vector(), None); + + // Check that the generated constants match the computed vector types. + assert_eq!(I32.by(4), Some(I32X4)); + assert_eq!(F64.by(8), Some(F64X8)); + } + + #[test] + fn format_scalars() { + assert_eq!(IFLAGS.to_string(), "iflags"); + assert_eq!(FFLAGS.to_string(), "fflags"); + assert_eq!(B1.to_string(), "b1"); + assert_eq!(B8.to_string(), "b8"); + assert_eq!(B16.to_string(), "b16"); + assert_eq!(B32.to_string(), "b32"); + assert_eq!(B64.to_string(), "b64"); + assert_eq!(B128.to_string(), "b128"); + assert_eq!(I8.to_string(), "i8"); + assert_eq!(I16.to_string(), "i16"); + assert_eq!(I32.to_string(), "i32"); + assert_eq!(I64.to_string(), "i64"); + assert_eq!(I128.to_string(), "i128"); + assert_eq!(F32.to_string(), "f32"); + assert_eq!(F64.to_string(), "f64"); + assert_eq!(R32.to_string(), "r32"); + assert_eq!(R64.to_string(), "r64"); + } + + #[test] + fn format_vectors() { + assert_eq!(B1.by(8).unwrap().to_string(), "b1x8"); + assert_eq!(B8.by(1).unwrap().to_string(), "b8"); + assert_eq!(B16.by(256).unwrap().to_string(), "b16x256"); + assert_eq!(B32.by(4).unwrap().by(2).unwrap().to_string(), "b32x8"); + assert_eq!(B64.by(8).unwrap().to_string(), "b64x8"); + assert_eq!(I8.by(64).unwrap().to_string(), "i8x64"); + assert_eq!(F64.by(2).unwrap().to_string(), "f64x2"); + assert_eq!(I8.by(3), None); + assert_eq!(I8.by(512), None); + assert_eq!(INVALID.by(4), None); + } + + #[test] + fn as_bool() { + assert_eq!(I32X4.as_bool(), B32X4); + assert_eq!(I32.as_bool(), B1); + assert_eq!(I32X4.as_bool_pedantic(), B32X4); + assert_eq!(I32.as_bool_pedantic(), B32); + } +} diff --git a/cranelift/codegen/src/ir/valueloc.rs b/cranelift/codegen/src/ir/valueloc.rs new file mode 100644 index 0000000000..9d81a55381 --- /dev/null +++ b/cranelift/codegen/src/ir/valueloc.rs @@ -0,0 +1,165 @@ +//! Value locations. +//! +//! The register allocator assigns every SSA value to either a register or a stack slot. This +//! assignment is represented by a `ValueLoc` object. + +use crate::ir::StackSlot; +use crate::isa::{RegInfo, RegUnit}; +use core::fmt; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// Value location. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum ValueLoc { + /// This value has not been assigned to a location yet. + Unassigned, + /// Value is assigned to a register. + Reg(RegUnit), + /// Value is assigned to a stack slot. + Stack(StackSlot), +} + +impl Default for ValueLoc { + fn default() -> Self { + Self::Unassigned + } +} + +impl ValueLoc { + /// Is this an assigned location? (That is, not `Unassigned`). + pub fn is_assigned(self) -> bool { + match self { + Self::Unassigned => false, + _ => true, + } + } + + /// Get the register unit of this location, or panic. + pub fn unwrap_reg(self) -> RegUnit { + match self { + Self::Reg(ru) => ru, + _ => panic!("Expected register: {:?}", self), + } + } + + /// Get the stack slot of this location, or panic. + pub fn unwrap_stack(self) -> StackSlot { + match self { + Self::Stack(ss) => ss, + _ => panic!("Expected stack slot: {:?}", self), + } + } + + /// Return an object that can display this value location, using the register info from the + /// target ISA. + pub fn display<'a, R: Into>>(self, regs: R) -> DisplayValueLoc<'a> { + DisplayValueLoc(self, regs.into()) + } +} + +/// Displaying a `ValueLoc` correctly requires the associated `RegInfo` from the target ISA. +/// Without the register info, register units are simply show as numbers. +/// +/// The `DisplayValueLoc` type can display the contained `ValueLoc`. +pub struct DisplayValueLoc<'a>(ValueLoc, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayValueLoc<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + ValueLoc::Unassigned => write!(f, "-"), + ValueLoc::Reg(ru) => match self.1 { + Some(regs) => write!(f, "{}", regs.display_regunit(ru)), + None => write!(f, "%{}", ru), + }, + ValueLoc::Stack(ss) => write!(f, "{}", ss), + } + } +} + +/// Function argument location. +/// +/// The ABI specifies how arguments are passed to a function, and where return values appear after +/// the call. Just like a `ValueLoc`, function arguments can be passed in registers or on the +/// stack. +/// +/// Function arguments on the stack are accessed differently for the incoming arguments to the +/// current function and the outgoing arguments to a called external function. For this reason, +/// the location of stack arguments is described as an offset into the array of function arguments +/// on the stack. +/// +/// An `ArgumentLoc` can be translated to a `ValueLoc` only when we know if we're talking about an +/// incoming argument or an outgoing argument. +/// +/// - For stack arguments, different `StackSlot` entities are used to represent incoming and +/// outgoing arguments. +/// - For register arguments, there is usually no difference, but if we ever add support for a +/// register-window ISA like SPARC, register arguments would also need to be translated. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub enum ArgumentLoc { + /// This argument has not been assigned to a location yet. + Unassigned, + /// Argument is passed in a register. + Reg(RegUnit), + /// Argument is passed on the stack, at the given byte offset into the argument array. + Stack(i32), +} + +impl Default for ArgumentLoc { + fn default() -> Self { + Self::Unassigned + } +} + +impl ArgumentLoc { + /// Is this an assigned location? (That is, not `Unassigned`). + pub fn is_assigned(self) -> bool { + match self { + Self::Unassigned => false, + _ => true, + } + } + + /// Is this a register location? + pub fn is_reg(self) -> bool { + match self { + Self::Reg(_) => true, + _ => false, + } + } + + /// Is this a stack location? + pub fn is_stack(self) -> bool { + match self { + Self::Stack(_) => true, + _ => false, + } + } + + /// Return an object that can display this argument location, using the register info from the + /// target ISA. + pub fn display<'a, R: Into>>(self, regs: R) -> DisplayArgumentLoc<'a> { + DisplayArgumentLoc(self, regs.into()) + } +} + +/// Displaying a `ArgumentLoc` correctly requires the associated `RegInfo` from the target ISA. +/// Without the register info, register units are simply show as numbers. +/// +/// The `DisplayArgumentLoc` type can display the contained `ArgumentLoc`. +pub struct DisplayArgumentLoc<'a>(ArgumentLoc, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayArgumentLoc<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + ArgumentLoc::Unassigned => write!(f, "-"), + ArgumentLoc::Reg(ru) => match self.1 { + Some(regs) => write!(f, "{}", regs.display_regunit(ru)), + None => write!(f, "%{}", ru), + }, + ArgumentLoc::Stack(offset) => write!(f, "{}", offset), + } + } +} diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs new file mode 100644 index 0000000000..85dc8d8f43 --- /dev/null +++ b/cranelift/codegen/src/isa/arm32/abi.rs @@ -0,0 +1,108 @@ +//! ARM ABI implementation. +//! This is from the RISC-V target and will need to be updated for ARM32. + +use super::registers::{D, GPR, Q, S}; +use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; +use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, Type}; +use crate::isa::RegClass; +use crate::regalloc::RegisterSet; +use alloc::borrow::Cow; +use core::i32; +use target_lexicon::Triple; + +struct Args { + pointer_bits: u8, + pointer_bytes: u8, + pointer_type: Type, + regs: u32, + reg_limit: u32, + offset: u32, +} + +impl Args { + fn new(bits: u8) -> Self { + Self { + pointer_bits: bits, + pointer_bytes: bits / 8, + pointer_type: Type::int(u16::from(bits)).unwrap(), + regs: 0, + reg_limit: 8, + offset: 0, + } + } +} + +impl ArgAssigner for Args { + fn assign(&mut self, arg: &AbiParam) -> ArgAction { + fn align(value: u32, to: u32) -> u32 { + (value + to - 1) & !(to - 1) + } + + let ty = arg.value_type; + + // Check for a legal type. + // SIMD instructions are currently no implemented, so break down vectors + if ty.is_vector() { + return ValueConversion::VectorSplit.into(); + } + + // Large integers and booleans are broken down to fit in a register. + if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) { + // Align registers and stack to a multiple of two pointers. + self.regs = align(self.regs, 2); + self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes)); + return ValueConversion::IntSplit.into(); + } + + // Small integers are extended to the size of a pointer register. + if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) { + match arg.extension { + ArgumentExtension::None => {} + ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), + ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), + } + } + + if self.regs < self.reg_limit { + // Assign to a register. + let reg = GPR.unit(10 + self.regs as usize); + self.regs += 1; + ArgumentLoc::Reg(reg).into() + } else { + // Assign a stack location. + let loc = ArgumentLoc::Stack(self.offset as i32); + self.offset += u32::from(self.pointer_bytes); + debug_assert!(self.offset <= i32::MAX as u32); + loc.into() + } + } +} + +/// Legalize `sig`. +pub fn legalize_signature(sig: &mut Cow, triple: &Triple, _current: bool) { + let bits = triple.pointer_width().unwrap().bits(); + + let mut args = Args::new(bits); + if let Some(new_params) = legalize_args(&sig.params, &mut args) { + sig.to_mut().params = new_params; + } +} + +/// Get register class for a type appearing in a legalized signature. +pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass { + if ty.is_int() { + GPR + } else { + match ty.bits() { + 32 => S, + 64 => D, + 128 => Q, + _ => panic!("Unexpected {} ABI type for arm32", ty), + } + } +} + +/// Get the set of allocatable registers for `func`. +pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet { + unimplemented!() +} diff --git a/cranelift/codegen/src/isa/arm32/binemit.rs b/cranelift/codegen/src/isa/arm32/binemit.rs new file mode 100644 index 0000000000..d74ee0911a --- /dev/null +++ b/cranelift/codegen/src/isa/arm32/binemit.rs @@ -0,0 +1,8 @@ +//! Emitting binary ARM32 machine code. + +use crate::binemit::{bad_encoding, CodeSink}; +use crate::ir::{Function, Inst}; +use crate::isa::TargetIsa; +use crate::regalloc::RegDiversions; + +include!(concat!(env!("OUT_DIR"), "/binemit-arm32.rs")); diff --git a/cranelift/codegen/src/isa/arm32/enc_tables.rs b/cranelift/codegen/src/isa/arm32/enc_tables.rs new file mode 100644 index 0000000000..5cb4e5c4c0 --- /dev/null +++ b/cranelift/codegen/src/isa/arm32/enc_tables.rs @@ -0,0 +1,10 @@ +//! Encoding tables for ARM32 ISA. + +use crate::ir; +use crate::isa; +use crate::isa::constraints::*; +use crate::isa::enc_tables::*; +use crate::isa::encoding::RecipeSizing; + +include!(concat!(env!("OUT_DIR"), "/encoding-arm32.rs")); +include!(concat!(env!("OUT_DIR"), "/legalize-arm32.rs")); diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs new file mode 100644 index 0000000000..0358a70113 --- /dev/null +++ b/cranelift/codegen/src/isa/arm32/mod.rs @@ -0,0 +1,144 @@ +//! ARM 32-bit Instruction Set Architecture. + +mod abi; +mod binemit; +mod enc_tables; +mod registers; +pub mod settings; + +use super::super::settings as shared_settings; +#[cfg(feature = "testing_hooks")] +use crate::binemit::CodeSink; +use crate::binemit::{emit_function, MemoryCodeSink}; +use crate::ir; +use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; +use crate::isa::Builder as IsaBuilder; +use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; +use crate::regalloc; +use alloc::borrow::Cow; +use alloc::boxed::Box; +use core::fmt; +use target_lexicon::{Architecture, Triple}; + +#[allow(dead_code)] +struct Isa { + triple: Triple, + shared_flags: shared_settings::Flags, + isa_flags: settings::Flags, + cpumode: &'static [shared_enc_tables::Level1Entry], +} + +/// Get an ISA builder for creating ARM32 targets. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + IsaBuilder { + triple, + setup: settings::builder(), + constructor: isa_constructor, + } +} + +fn isa_constructor( + triple: Triple, + shared_flags: shared_settings::Flags, + builder: shared_settings::Builder, +) -> Box { + let level1 = match triple.architecture { + Architecture::Arm(arm) => { + if arm.is_thumb() { + &enc_tables::LEVEL1_T32[..] + } else { + &enc_tables::LEVEL1_A32[..] + } + } + _ => panic!(), + }; + Box::new(Isa { + triple, + isa_flags: settings::Flags::new(&shared_flags, builder), + shared_flags, + cpumode: level1, + }) +} + +impl TargetIsa for Isa { + fn name(&self) -> &'static str { + "arm32" + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.shared_flags + } + + fn register_info(&self) -> RegInfo { + registers::INFO.clone() + } + + fn encoding_info(&self) -> EncInfo { + enc_tables::INFO.clone() + } + + fn legal_encodings<'a>( + &'a self, + func: &'a ir::Function, + inst: &'a ir::InstructionData, + ctrl_typevar: ir::Type, + ) -> Encodings<'a> { + lookup_enclist( + ctrl_typevar, + inst, + func, + self.cpumode, + &enc_tables::LEVEL2[..], + &enc_tables::ENCLISTS[..], + &enc_tables::LEGALIZE_ACTIONS[..], + &enc_tables::RECIPE_PREDICATES[..], + &enc_tables::INST_PREDICATES[..], + self.isa_flags.predicate_view(), + ) + } + + fn legalize_signature(&self, sig: &mut Cow, current: bool) { + abi::legalize_signature(sig, &self.triple, current) + } + + fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { + abi::regclass_for_abi_type(ty) + } + + fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet { + abi::allocatable_registers(func) + } + + #[cfg(feature = "testing_hooks")] + fn emit_inst( + &self, + func: &ir::Function, + inst: ir::Inst, + divert: &mut regalloc::RegDiversions, + sink: &mut dyn CodeSink, + ) { + binemit::emit_inst(func, inst, divert, sink, self) + } + + fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { + emit_function(func, binemit::emit_inst, sink, self) + } + + fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { + ir::condcodes::IntCC::UnsignedLessThan + } + + fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { + ir::condcodes::IntCC::UnsignedGreaterThanOrEqual + } +} + +impl fmt::Display for Isa { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}\n{}", self.shared_flags, self.isa_flags) + } +} diff --git a/cranelift/codegen/src/isa/arm32/registers.rs b/cranelift/codegen/src/isa/arm32/registers.rs new file mode 100644 index 0000000000..df555b4043 --- /dev/null +++ b/cranelift/codegen/src/isa/arm32/registers.rs @@ -0,0 +1,68 @@ +//! ARM32 register descriptions. + +use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; + +include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); + +#[cfg(test)] +mod tests { + use super::{D, GPR, INFO, S}; + use crate::isa::RegUnit; + use alloc::string::{String, ToString}; + + #[test] + fn unit_encodings() { + assert_eq!(INFO.parse_regunit("s0"), Some(0)); + assert_eq!(INFO.parse_regunit("s31"), Some(31)); + assert_eq!(INFO.parse_regunit("s32"), Some(32)); + assert_eq!(INFO.parse_regunit("r0"), Some(64)); + assert_eq!(INFO.parse_regunit("r15"), Some(79)); + } + + #[test] + fn unit_names() { + fn uname(ru: RegUnit) -> String { + INFO.display_regunit(ru).to_string() + } + + assert_eq!(uname(0), "%s0"); + assert_eq!(uname(1), "%s1"); + assert_eq!(uname(31), "%s31"); + assert_eq!(uname(64), "%r0"); + } + + #[test] + fn overlaps() { + // arm32 has the most interesting register geometries, so test `regs_overlap()` here. + use crate::isa::regs_overlap; + + let r0 = GPR.unit(0); + let r1 = GPR.unit(1); + let r2 = GPR.unit(2); + + assert!(regs_overlap(GPR, r0, GPR, r0)); + assert!(regs_overlap(GPR, r2, GPR, r2)); + assert!(!regs_overlap(GPR, r0, GPR, r1)); + assert!(!regs_overlap(GPR, r1, GPR, r0)); + assert!(!regs_overlap(GPR, r2, GPR, r1)); + assert!(!regs_overlap(GPR, r1, GPR, r2)); + + let s0 = S.unit(0); + let s1 = S.unit(1); + let s2 = S.unit(2); + let s3 = S.unit(3); + let d0 = D.unit(0); + let d1 = D.unit(1); + + assert!(regs_overlap(S, s0, D, d0)); + assert!(regs_overlap(S, s1, D, d0)); + assert!(!regs_overlap(S, s0, D, d1)); + assert!(!regs_overlap(S, s1, D, d1)); + assert!(regs_overlap(S, s2, D, d1)); + assert!(regs_overlap(S, s3, D, d1)); + assert!(!regs_overlap(D, d1, S, s1)); + assert!(regs_overlap(D, d1, S, s2)); + assert!(!regs_overlap(D, d0, D, d1)); + assert!(regs_overlap(D, d1, D, d1)); + } +} diff --git a/cranelift/codegen/src/isa/arm32/settings.rs b/cranelift/codegen/src/isa/arm32/settings.rs new file mode 100644 index 0000000000..bef631b2bb --- /dev/null +++ b/cranelift/codegen/src/isa/arm32/settings.rs @@ -0,0 +1,9 @@ +//! ARM32 Settings. + +use crate::settings::{self, detail, Builder}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/arm32/mod.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs")); diff --git a/cranelift/codegen/src/isa/arm64/abi.rs b/cranelift/codegen/src/isa/arm64/abi.rs new file mode 100644 index 0000000000..8d486d4193 --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/abi.rs @@ -0,0 +1,31 @@ +//! ARM 64 ABI implementation. + +use super::registers::{FPR, GPR}; +use crate::ir; +use crate::isa::RegClass; +use crate::regalloc::RegisterSet; +use crate::settings as shared_settings; +use alloc::borrow::Cow; + +/// Legalize `sig`. +pub fn legalize_signature( + _sig: &mut Cow, + _flags: &shared_settings::Flags, + _current: bool, +) { + unimplemented!() +} + +/// Get register class for a type appearing in a legalized signature. +pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass { + if ty.is_int() { + GPR + } else { + FPR + } +} + +/// Get the set of allocatable registers for `func`. +pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet { + unimplemented!() +} diff --git a/cranelift/codegen/src/isa/arm64/binemit.rs b/cranelift/codegen/src/isa/arm64/binemit.rs new file mode 100644 index 0000000000..4401b6d6f5 --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/binemit.rs @@ -0,0 +1,8 @@ +//! Emitting binary ARM64 machine code. + +use crate::binemit::{bad_encoding, CodeSink}; +use crate::ir::{Function, Inst}; +use crate::isa::TargetIsa; +use crate::regalloc::RegDiversions; + +include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs")); diff --git a/cranelift/codegen/src/isa/arm64/enc_tables.rs b/cranelift/codegen/src/isa/arm64/enc_tables.rs new file mode 100644 index 0000000000..6040a9b866 --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/enc_tables.rs @@ -0,0 +1,10 @@ +//! Encoding tables for ARM64 ISA. + +use crate::ir; +use crate::isa; +use crate::isa::constraints::*; +use crate::isa::enc_tables::*; +use crate::isa::encoding::RecipeSizing; + +include!(concat!(env!("OUT_DIR"), "/encoding-arm64.rs")); +include!(concat!(env!("OUT_DIR"), "/legalize-arm64.rs")); diff --git a/cranelift/codegen/src/isa/arm64/mod.rs b/cranelift/codegen/src/isa/arm64/mod.rs new file mode 100644 index 0000000000..f00062b2af --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/mod.rs @@ -0,0 +1,132 @@ +//! ARM 64-bit Instruction Set Architecture. + +mod abi; +mod binemit; +mod enc_tables; +mod registers; +pub mod settings; + +use super::super::settings as shared_settings; +#[cfg(feature = "testing_hooks")] +use crate::binemit::CodeSink; +use crate::binemit::{emit_function, MemoryCodeSink}; +use crate::ir; +use crate::isa::enc_tables::{lookup_enclist, Encodings}; +use crate::isa::Builder as IsaBuilder; +use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; +use crate::regalloc; +use alloc::borrow::Cow; +use alloc::boxed::Box; +use core::fmt; +use target_lexicon::Triple; + +#[allow(dead_code)] +struct Isa { + triple: Triple, + shared_flags: shared_settings::Flags, + isa_flags: settings::Flags, +} + +/// Get an ISA builder for creating ARM64 targets. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + IsaBuilder { + triple, + setup: settings::builder(), + constructor: isa_constructor, + } +} + +fn isa_constructor( + triple: Triple, + shared_flags: shared_settings::Flags, + builder: shared_settings::Builder, +) -> Box { + Box::new(Isa { + triple, + isa_flags: settings::Flags::new(&shared_flags, builder), + shared_flags, + }) +} + +impl TargetIsa for Isa { + fn name(&self) -> &'static str { + "arm64" + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.shared_flags + } + + fn register_info(&self) -> RegInfo { + registers::INFO.clone() + } + + fn encoding_info(&self) -> EncInfo { + enc_tables::INFO.clone() + } + + fn legal_encodings<'a>( + &'a self, + func: &'a ir::Function, + inst: &'a ir::InstructionData, + ctrl_typevar: ir::Type, + ) -> Encodings<'a> { + lookup_enclist( + ctrl_typevar, + inst, + func, + &enc_tables::LEVEL1_A64[..], + &enc_tables::LEVEL2[..], + &enc_tables::ENCLISTS[..], + &enc_tables::LEGALIZE_ACTIONS[..], + &enc_tables::RECIPE_PREDICATES[..], + &enc_tables::INST_PREDICATES[..], + self.isa_flags.predicate_view(), + ) + } + + fn legalize_signature(&self, sig: &mut Cow, current: bool) { + abi::legalize_signature(sig, &self.shared_flags, current) + } + + fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { + abi::regclass_for_abi_type(ty) + } + + fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet { + abi::allocatable_registers(func) + } + + #[cfg(feature = "testing_hooks")] + fn emit_inst( + &self, + func: &ir::Function, + inst: ir::Inst, + divert: &mut regalloc::RegDiversions, + sink: &mut dyn CodeSink, + ) { + binemit::emit_inst(func, inst, divert, sink, self) + } + + fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { + emit_function(func, binemit::emit_inst, sink, self) + } + + fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { + ir::condcodes::IntCC::UnsignedLessThan + } + + fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { + ir::condcodes::IntCC::UnsignedGreaterThanOrEqual + } +} + +impl fmt::Display for Isa { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}\n{}", self.shared_flags, self.isa_flags) + } +} diff --git a/cranelift/codegen/src/isa/arm64/registers.rs b/cranelift/codegen/src/isa/arm64/registers.rs new file mode 100644 index 0000000000..c02f6b7d4d --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/registers.rs @@ -0,0 +1,39 @@ +//! ARM64 register descriptions. + +use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; + +include!(concat!(env!("OUT_DIR"), "/registers-arm64.rs")); + +#[cfg(test)] +mod tests { + use super::INFO; + use crate::isa::RegUnit; + use alloc::string::{String, ToString}; + + #[test] + fn unit_encodings() { + assert_eq!(INFO.parse_regunit("x0"), Some(0)); + assert_eq!(INFO.parse_regunit("x31"), Some(31)); + assert_eq!(INFO.parse_regunit("v0"), Some(32)); + assert_eq!(INFO.parse_regunit("v31"), Some(63)); + + assert_eq!(INFO.parse_regunit("x32"), None); + assert_eq!(INFO.parse_regunit("v32"), None); + } + + #[test] + fn unit_names() { + fn uname(ru: RegUnit) -> String { + INFO.display_regunit(ru).to_string() + } + + assert_eq!(uname(0), "%x0"); + assert_eq!(uname(1), "%x1"); + assert_eq!(uname(31), "%x31"); + assert_eq!(uname(32), "%v0"); + assert_eq!(uname(33), "%v1"); + assert_eq!(uname(63), "%v31"); + assert_eq!(uname(64), "%nzcv"); + assert_eq!(uname(65), "%INVALID65"); + } +} diff --git a/cranelift/codegen/src/isa/arm64/settings.rs b/cranelift/codegen/src/isa/arm64/settings.rs new file mode 100644 index 0000000000..56d0f4ee0b --- /dev/null +++ b/cranelift/codegen/src/isa/arm64/settings.rs @@ -0,0 +1,9 @@ +//! ARM64 Settings. + +use crate::settings::{self, detail, Builder}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/arm64/mod.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs")); diff --git a/cranelift/codegen/src/isa/call_conv.rs b/cranelift/codegen/src/isa/call_conv.rs new file mode 100644 index 0000000000..2b3b2a5f35 --- /dev/null +++ b/cranelift/codegen/src/isa/call_conv.rs @@ -0,0 +1,97 @@ +use crate::isa::TargetIsa; +use crate::settings::LibcallCallConv; +use core::fmt; +use core::str; +use target_lexicon::{CallingConvention, Triple}; + +/// Calling convention identifiers. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum CallConv { + /// Best performance, not ABI-stable + Fast, + /// Smallest caller code size, not ABI-stable + Cold, + /// System V-style convention used on many platforms + SystemV, + /// Windows "fastcall" convention, also used for x64 and ARM + WindowsFastcall, + /// SpiderMonkey WebAssembly convention on systems using natively SystemV + BaldrdashSystemV, + /// SpiderMonkey WebAssembly convention on Windows + BaldrdashWindows, + /// Specialized convention for the probestack function + Probestack, +} + +impl CallConv { + /// Return the default calling convention for the given target triple. + pub fn triple_default(triple: &Triple) -> Self { + match triple.default_calling_convention() { + // Default to System V for unknown targets because most everything + // uses System V. + Ok(CallingConvention::SystemV) | Err(()) => Self::SystemV, + Ok(CallingConvention::WindowsFastcall) => Self::WindowsFastcall, + Ok(unimp) => unimplemented!("calling convention: {:?}", unimp), + } + } + + /// Returns the calling convention used for libcalls for the given ISA. + pub fn for_libcall(isa: &dyn TargetIsa) -> Self { + match isa.flags().libcall_call_conv() { + LibcallCallConv::IsaDefault => isa.default_call_conv(), + LibcallCallConv::Fast => Self::Fast, + LibcallCallConv::Cold => Self::Cold, + LibcallCallConv::SystemV => Self::SystemV, + LibcallCallConv::WindowsFastcall => Self::WindowsFastcall, + LibcallCallConv::BaldrdashSystemV => Self::BaldrdashSystemV, + LibcallCallConv::BaldrdashWindows => Self::BaldrdashWindows, + LibcallCallConv::Probestack => Self::Probestack, + } + } + + /// Is the calling convention extending the Windows Fastcall ABI? + pub fn extends_windows_fastcall(self) -> bool { + match self { + Self::WindowsFastcall | Self::BaldrdashWindows => true, + _ => false, + } + } + + /// Is the calling convention extending the Baldrdash ABI? + pub fn extends_baldrdash(self) -> bool { + match self { + Self::BaldrdashSystemV | Self::BaldrdashWindows => true, + _ => false, + } + } +} + +impl fmt::Display for CallConv { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match *self { + Self::Fast => "fast", + Self::Cold => "cold", + Self::SystemV => "system_v", + Self::WindowsFastcall => "windows_fastcall", + Self::BaldrdashSystemV => "baldrdash_system_v", + Self::BaldrdashWindows => "baldrdash_windows", + Self::Probestack => "probestack", + }) + } +} + +impl str::FromStr for CallConv { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "fast" => Ok(Self::Fast), + "cold" => Ok(Self::Cold), + "system_v" => Ok(Self::SystemV), + "windows_fastcall" => Ok(Self::WindowsFastcall), + "baldrdash_system_v" => Ok(Self::BaldrdashSystemV), + "baldrdash_windows" => Ok(Self::BaldrdashWindows), + "probestack" => Ok(Self::Probestack), + _ => Err(()), + } + } +} diff --git a/cranelift/codegen/src/isa/constraints.rs b/cranelift/codegen/src/isa/constraints.rs new file mode 100644 index 0000000000..c87c3bd9d4 --- /dev/null +++ b/cranelift/codegen/src/isa/constraints.rs @@ -0,0 +1,207 @@ +//! Register constraints for instruction operands. +//! +//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only +//! works if the operands and results satisfy certain constraints. Constraints on immediate +//! operands are checked by instruction predicates when the recipe is chosen. +//! +//! It is the register allocator's job to make sure that the register constraints on value operands +//! are satisfied. + +use crate::binemit::CodeOffset; +use crate::ir::{Function, Inst, ValueLoc}; +use crate::isa::{RegClass, RegUnit}; +use crate::regalloc::RegDiversions; + +/// Register constraint for a single value operand or instruction result. +#[derive(PartialEq, Debug)] +pub struct OperandConstraint { + /// The kind of constraint. + pub kind: ConstraintKind, + + /// The register class of the operand. + /// + /// This applies to all kinds of constraints, but with slightly different meaning. + pub regclass: RegClass, +} + +impl OperandConstraint { + /// Check if this operand constraint is satisfied by the given value location. + /// For tied constraints, this only checks the register class, not that the + /// counterpart operand has the same value location. + pub fn satisfied(&self, loc: ValueLoc) -> bool { + match self.kind { + ConstraintKind::Reg | ConstraintKind::Tied(_) => { + if let ValueLoc::Reg(reg) = loc { + self.regclass.contains(reg) + } else { + false + } + } + ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => { + loc == ValueLoc::Reg(reg) && self.regclass.contains(reg) + } + ConstraintKind::Stack => { + if let ValueLoc::Stack(_) = loc { + true + } else { + false + } + } + } + } +} + +/// The different kinds of operand constraints. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum ConstraintKind { + /// This operand or result must be a register from the given register class. + Reg, + + /// This operand or result must be a fixed register. + /// + /// The constraint's `regclass` field is the top-level register class containing the fixed + /// register. + FixedReg(RegUnit), + + /// This result value must use the same register as an input value operand. + /// + /// The associated number is the index of the input value operand this result is tied to. The + /// constraint's `regclass` field is the same as the tied operand's register class. + /// + /// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and + /// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for + /// the out operand is `Tied(in)`. + Tied(u8), + + /// This operand must be a fixed register, and it has a tied counterpart. + /// + /// This works just like `FixedReg`, but additionally indicates that there are identical + /// input/output operands for this fixed register. For an input operand, this means that the + /// value will be clobbered by the instruction + FixedTied(RegUnit), + + /// This operand must be a value in a stack slot. + /// + /// The constraint's `regclass` field is the register class that would normally be used to load + /// and store values of this type. + Stack, +} + +/// Value operand constraints for an encoding recipe. +#[derive(PartialEq, Clone)] +pub struct RecipeConstraints { + /// Constraints for the instruction's fixed value operands. + /// + /// If the instruction takes a variable number of operands, the register constraints for those + /// operands must be computed dynamically. + /// + /// - For branches and jumps, block arguments must match the expectations of the destination block. + /// - For calls and returns, the calling convention ABI specifies constraints. + pub ins: &'static [OperandConstraint], + + /// Constraints for the instruction's fixed results. + /// + /// If the instruction produces a variable number of results, it's probably a call and the + /// constraints must be derived from the calling convention ABI. + pub outs: &'static [OperandConstraint], + + /// Are any of the input constraints `FixedReg` or `FixedTied`? + pub fixed_ins: bool, + + /// Are any of the output constraints `FixedReg` or `FixedTied`? + pub fixed_outs: bool, + + /// Are any of the input/output constraints `Tied` (but not `FixedTied`)? + pub tied_ops: bool, + + /// Does this instruction clobber the CPU flags? + /// + /// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction. + pub clobbers_flags: bool, +} + +impl RecipeConstraints { + /// Check that these constraints are satisfied by the operands on `inst`. + pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool { + for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) { + let loc = divert.get(arg, &func.locations); + + if let ConstraintKind::Tied(out_index) = constraint.kind { + let out_val = func.dfg.inst_results(inst)[out_index as usize]; + let out_loc = func.locations[out_val]; + if loc != out_loc { + return false; + } + } + + if !constraint.satisfied(loc) { + return false; + } + } + + for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) { + let loc = divert.get(arg, &func.locations); + if !constraint.satisfied(loc) { + return false; + } + } + + true + } +} + +/// Constraints on the range of a branch instruction. +/// +/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin. +/// The origin depends on the ISA and the specific instruction: +/// +/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`. +/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte +/// branch instruction. +/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`. +#[derive(Clone, Copy, Debug)] +pub struct BranchRange { + /// Offset in bytes from the address of the branch instruction to the origin used for computing + /// the branch displacement. This is the destination of a branch that encodes a 0 displacement. + pub origin: u8, + + /// Number of bits in the signed byte displacement encoded in the instruction. This does not + /// account for branches that can only target aligned addresses. + pub bits: u8, +} + +impl BranchRange { + /// Determine if this branch range can represent the range from `branch` to `dest`, where + /// `branch` is the code offset of the branch instruction itself and `dest` is the code offset + /// of the destination block header. + /// + /// This method does not detect if the range is larger than 2 GB. + pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool { + let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32; + let s = 32 - self.bits; + d == d << s >> s + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn branch_range() { + // ARM T1 branch. + let t1 = BranchRange { origin: 4, bits: 9 }; + assert!(t1.contains(0, 0)); + assert!(t1.contains(0, 2)); + assert!(t1.contains(2, 0)); + assert!(t1.contains(1000, 1000)); + + // Forward limit. + assert!(t1.contains(1000, 1258)); + assert!(!t1.contains(1000, 1260)); + + // Backward limit + assert!(t1.contains(1000, 748)); + assert!(!t1.contains(1000, 746)); + } +} diff --git a/cranelift/codegen/src/isa/enc_tables.rs b/cranelift/codegen/src/isa/enc_tables.rs new file mode 100644 index 0000000000..e21557497e --- /dev/null +++ b/cranelift/codegen/src/isa/enc_tables.rs @@ -0,0 +1,292 @@ +//! Support types for generated encoding tables. +//! +//! This module contains types and functions for working with the encoding tables generated by +//! `cranelift-codegen/meta/src/gen_encodings.rs`. + +use crate::constant_hash::{probe, Table}; +use crate::ir::{Function, InstructionData, Opcode, Type}; +use crate::isa::{Encoding, Legalize}; +use crate::settings::PredicateView; +use core::ops::Range; + +/// A recipe predicate. +/// +/// This is a predicate function capable of testing ISA and instruction predicates simultaneously. +/// +/// A None predicate is always satisfied. +pub type RecipePredicate = Option bool>; + +/// An instruction predicate. +/// +/// This is a predicate function that needs to be tested in addition to the recipe predicate. It +/// can't depend on ISA settings. +pub type InstPredicate = fn(&Function, &InstructionData) -> bool; + +/// Legalization action to perform when no encoding can be found for an instruction. +/// +/// This is an index into an ISA-specific table of legalization actions. +pub type LegalizeCode = u8; + +/// Level 1 hash table entry. +/// +/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type +/// variable, using `INVALID` for non-polymorphic instructions. +/// +/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2` +/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables +/// have a power-of-two size. +/// +/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the +/// size of the `LEVEL2` table. +/// +/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range. +/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of +/// bounds. +pub struct Level1Entry + Copy> { + pub ty: Type, + pub log2len: u8, + pub legalize: LegalizeCode, + pub offset: OffT, +} + +impl + Copy> Level1Entry { + /// Get the level 2 table range indicated by this entry. + fn range(&self) -> Range { + let b = self.offset.into() as usize; + b..b + (1 << self.log2len) + } +} + +impl + Copy> Table for [Level1Entry] { + fn len(&self) -> usize { + self.len() + } + + fn key(&self, idx: usize) -> Option { + if self[idx].log2len != !0 { + Some(self[idx].ty) + } else { + None + } + } +} + +/// Level 2 hash table entry. +/// +/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS` +/// table where the encoding recipes for the instruction are stored. +/// +/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16` +/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8` +/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16 +/// bits. +/// +/// Empty entries are encoded with a `NotAnOpcode` `opcode` field. +pub struct Level2Entry + Copy> { + pub opcode: Option, + pub offset: OffT, +} + +impl + Copy> Table for [Level2Entry] { + fn len(&self) -> usize { + self.len() + } + + fn key(&self, idx: usize) -> Option { + self[idx].opcode + } +} + +/// Two-level hash table lookup and iterator construction. +/// +/// Given the controlling type variable and instruction opcode, find the corresponding encoding +/// list. +/// +/// Returns an iterator that produces legal encodings for `inst`. +pub fn lookup_enclist<'a, OffT1, OffT2>( + ctrl_typevar: Type, + inst: &'a InstructionData, + func: &'a Function, + level1_table: &'static [Level1Entry], + level2_table: &'static [Level2Entry], + enclist: &'static [EncListEntry], + legalize_actions: &'static [Legalize], + recipe_preds: &'static [RecipePredicate], + inst_preds: &'static [InstPredicate], + isa_preds: PredicateView<'a>, +) -> Encodings<'a> +where + OffT1: Into + Copy, + OffT2: Into + Copy, +{ + let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) { + Err(l1idx) => { + // No level 1 entry found for the type. + // We have a sentinel entry with the default legalization code. + (!0, level1_table[l1idx].legalize) + } + Ok(l1idx) => { + // We have a valid level 1 entry for this type. + let l1ent = &level1_table[l1idx]; + let offset = match level2_table.get(l1ent.range()) { + Some(l2tab) => { + let opcode = inst.opcode(); + match probe(l2tab, opcode, opcode as usize) { + Ok(l2idx) => l2tab[l2idx].offset.into() as usize, + Err(_) => !0, + } + } + // The l1ent range is invalid. This means that we just have a customized + // legalization code for this type. The level 2 table is empty. + None => !0, + }; + (offset, l1ent.legalize) + } + }; + + // Now we have an offset into `enclist` that is `!0` when no encoding list could be found. + // The default legalization code is always valid. + Encodings::new( + offset, + legalize, + inst, + func, + enclist, + legalize_actions, + recipe_preds, + inst_preds, + isa_preds, + ) +} + +/// Encoding list entry. +/// +/// Encoding lists are represented as sequences of u16 words. +pub type EncListEntry = u16; + +/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`. +const PRED_BITS: u8 = 12; +const PRED_MASK: usize = (1 << PRED_BITS) - 1; +/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`. +const PRED_START: usize = 0x1000; + +/// An iterator over legal encodings for the instruction. +pub struct Encodings<'a> { + // Current offset into `enclist`, or out of bounds after we've reached the end. + offset: usize, + // Legalization code to use of no encoding is found. + legalize: LegalizeCode, + inst: &'a InstructionData, + func: &'a Function, + enclist: &'static [EncListEntry], + legalize_actions: &'static [Legalize], + recipe_preds: &'static [RecipePredicate], + inst_preds: &'static [InstPredicate], + isa_preds: PredicateView<'a>, +} + +impl<'a> Encodings<'a> { + /// Creates a new instance of `Encodings`. + /// + /// This iterator provides search for encodings that applies to the given instruction. The + /// encoding lists are laid out such that first call to `next` returns valid entry in the list + /// or `None`. + pub fn new( + offset: usize, + legalize: LegalizeCode, + inst: &'a InstructionData, + func: &'a Function, + enclist: &'static [EncListEntry], + legalize_actions: &'static [Legalize], + recipe_preds: &'static [RecipePredicate], + inst_preds: &'static [InstPredicate], + isa_preds: PredicateView<'a>, + ) -> Self { + Encodings { + offset, + inst, + func, + legalize, + isa_preds, + recipe_preds, + inst_preds, + enclist, + legalize_actions, + } + } + + /// Get the legalization action that caused the enumeration of encodings to stop. + /// This can be the default legalization action for the type or a custom code for the + /// instruction. + /// + /// This method must only be called after the iterator returns `None`. + pub fn legalize(&self) -> Legalize { + debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()"); + self.legalize_actions[self.legalize as usize] + } + + /// Check if the `rpred` recipe predicate is satisfied. + fn check_recipe(&self, rpred: RecipePredicate) -> bool { + match rpred { + Some(p) => p(self.isa_preds, self.inst), + None => true, + } + } + + /// Check an instruction or isa predicate. + fn check_pred(&self, pred: usize) -> bool { + if let Some(&p) = self.inst_preds.get(pred) { + p(self.func, self.inst) + } else { + let pred = pred - self.inst_preds.len(); + self.isa_preds.test(pred) + } + } +} + +impl<'a> Iterator for Encodings<'a> { + type Item = Encoding; + + fn next(&mut self) -> Option { + while let Some(entryref) = self.enclist.get(self.offset) { + let entry = *entryref as usize; + + // Check for "recipe+bits". + let recipe = entry >> 1; + if let Some(&rpred) = self.recipe_preds.get(recipe) { + let bits = self.offset + 1; + if entry & 1 == 0 { + self.offset += 2; // Next entry. + } else { + self.offset = !0; // Stop. + } + if self.check_recipe(rpred) { + return Some(Encoding::new(recipe as u16, self.enclist[bits])); + } + continue; + } + + // Check for "stop with legalize". + if entry < PRED_START { + self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode; + self.offset = !0; // Stop. + return None; + } + + // Finally, this must be a predicate entry. + let pred_entry = entry - PRED_START; + let skip = pred_entry >> PRED_BITS; + let pred = pred_entry & PRED_MASK; + + if self.check_pred(pred) { + self.offset += 1; + } else if skip == 0 { + self.offset = !0; // Stop. + return None; + } else { + self.offset += 1 + skip; + } + } + None + } +} diff --git a/cranelift/codegen/src/isa/encoding.rs b/cranelift/codegen/src/isa/encoding.rs new file mode 100644 index 0000000000..99894cab2c --- /dev/null +++ b/cranelift/codegen/src/isa/encoding.rs @@ -0,0 +1,163 @@ +//! The `Encoding` struct. + +use crate::binemit::CodeOffset; +use crate::ir::{Function, Inst}; +use crate::isa::constraints::{BranchRange, RecipeConstraints}; +use crate::regalloc::RegDiversions; +use core::fmt; + +/// Bits needed to encode an instruction as binary machine code. +/// +/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and +/// encoding *bits*. The recipe determines the native instruction format and the mapping of +/// operands to encoded bits. The encoding bits provide additional information to the recipe, +/// typically parts of the opcode. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Encoding { + recipe: u16, + bits: u16, +} + +impl Encoding { + /// Create a new `Encoding` containing `(recipe, bits)`. + pub fn new(recipe: u16, bits: u16) -> Self { + Self { recipe, bits } + } + + /// Get the recipe number in this encoding. + pub fn recipe(self) -> usize { + self.recipe as usize + } + + /// Get the recipe-specific encoding bits. + pub fn bits(self) -> u16 { + self.bits + } + + /// Is this a legal encoding, or the default placeholder? + pub fn is_legal(self) -> bool { + self != Self::default() + } +} + +/// The default encoding is the illegal one. +impl Default for Encoding { + fn default() -> Self { + Self::new(0xffff, 0xffff) + } +} + +/// ISA-independent display of an encoding. +impl fmt::Display for Encoding { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_legal() { + write!(f, "{}#{:02x}", self.recipe, self.bits) + } else { + write!(f, "-") + } + } +} + +/// Temporary object that holds enough context to properly display an encoding. +/// This is meant to be created by `EncInfo::display()`. +pub struct DisplayEncoding { + pub encoding: Encoding, + pub recipe_names: &'static [&'static str], +} + +impl fmt::Display for DisplayEncoding { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.encoding.is_legal() { + write!( + f, + "{}#{:02x}", + self.recipe_names[self.encoding.recipe()], + self.encoding.bits + ) + } else { + write!(f, "-") + } + } +} + +type SizeCalculatorFn = fn(&RecipeSizing, Encoding, Inst, &RegDiversions, &Function) -> u8; + +/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most +/// encodings; others can be variable and longer than this base size, depending on the registers +/// they're using and use a different function, specific per platform. +pub fn base_size( + sizing: &RecipeSizing, + _: Encoding, + _: Inst, + _: &RegDiversions, + _: &Function, +) -> u8 { + sizing.base_size +} + +/// Code size information for an encoding recipe. +/// +/// Encoding recipes may have runtime-determined instruction size. +pub struct RecipeSizing { + /// Minimum size in bytes of instructions encoded with this recipe. + pub base_size: u8, + + /// Method computing the instruction's real size, given inputs and outputs. + pub compute_size: SizeCalculatorFn, + + /// Allowed branch range in this recipe, if any. + /// + /// All encoding recipes for branches have exact branch range information. + pub branch_range: Option, +} + +/// Information about all the encodings in this ISA. +#[derive(Clone)] +pub struct EncInfo { + /// Constraints on value operands per recipe. + pub constraints: &'static [RecipeConstraints], + + /// Code size information per recipe. + pub sizing: &'static [RecipeSizing], + + /// Names of encoding recipes. + pub names: &'static [&'static str], +} + +impl EncInfo { + /// Get the value operand constraints for `enc` if it is a legal encoding. + pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> { + self.constraints.get(enc.recipe()) + } + + /// Create an object that can display an ISA-dependent encoding properly. + pub fn display(&self, enc: Encoding) -> DisplayEncoding { + DisplayEncoding { + encoding: enc, + recipe_names: self.names, + } + } + + /// Get the size in bytes of `inst`, if it were encoded with `enc`. + /// + /// Returns 0 for illegal encodings. + pub fn byte_size( + &self, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, + ) -> CodeOffset { + self.sizing.get(enc.recipe()).map_or(0, |s| { + let compute_size = s.compute_size; + CodeOffset::from(compute_size(&s, enc, inst, divert, func)) + }) + } + + /// Get the branch range that is supported by `enc`, if any. + /// + /// This will never return `None` for a legal branch encoding. + pub fn branch_range(&self, enc: Encoding) -> Option { + self.sizing.get(enc.recipe()).and_then(|s| s.branch_range) + } +} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs new file mode 100644 index 0000000000..590f59a293 --- /dev/null +++ b/cranelift/codegen/src/isa/mod.rs @@ -0,0 +1,392 @@ +//! Instruction Set Architectures. +//! +//! The `isa` module provides a `TargetIsa` trait which provides the behavior specialization needed +//! by the ISA-independent code generator. The sub-modules of this module provide definitions for +//! the instruction sets that Cranelift can target. Each sub-module has it's own implementation of +//! `TargetIsa`. +//! +//! # Constructing a `TargetIsa` instance +//! +//! The target ISA is built from the following information: +//! +//! - The name of the target ISA as a string. Cranelift is a cross-compiler, so the ISA to target +//! can be selected dynamically. Individual ISAs can be left out when Cranelift is compiled, so a +//! string is used to identify the proper sub-module. +//! - Values for settings that apply to all ISAs. This is represented by a `settings::Flags` +//! instance. +//! - Values for ISA-specific settings. +//! +//! The `isa::lookup()` function is the main entry point which returns an `isa::Builder` +//! appropriate for the requested ISA: +//! +//! ``` +//! # extern crate cranelift_codegen; +//! # #[macro_use] extern crate target_lexicon; +//! use cranelift_codegen::isa; +//! use cranelift_codegen::settings::{self, Configurable}; +//! use std::str::FromStr; +//! use target_lexicon::Triple; +//! +//! let shared_builder = settings::builder(); +//! let shared_flags = settings::Flags::new(shared_builder); +//! +//! match isa::lookup(triple!("riscv32")) { +//! Err(_) => { +//! // The RISC-V target ISA is not available. +//! } +//! Ok(mut isa_builder) => { +//! isa_builder.set("supports_m", "on"); +//! let isa = isa_builder.finish(shared_flags); +//! } +//! } +//! ``` +//! +//! The configured target ISA trait object is a `Box` which can be used for multiple +//! concurrent function compilations. + +pub use crate::isa::call_conv::CallConv; +pub use crate::isa::constraints::{ + BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints, +}; +pub use crate::isa::encoding::{base_size, EncInfo, Encoding}; +pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit}; +pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef}; + +use crate::binemit; +use crate::flowgraph; +use crate::ir; +use crate::isa::enc_tables::Encodings; +use crate::regalloc; +use crate::result::CodegenResult; +use crate::settings; +use crate::settings::SetResult; +use crate::timing; +use alloc::borrow::Cow; +use alloc::boxed::Box; +use core::fmt; +use target_lexicon::{triple, Architecture, PointerWidth, Triple}; +use thiserror::Error; + +#[cfg(feature = "riscv")] +mod riscv; + +#[cfg(feature = "x86")] +mod x86; + +#[cfg(feature = "arm32")] +mod arm32; + +#[cfg(feature = "arm64")] +mod arm64; + +mod call_conv; +mod constraints; +mod enc_tables; +mod encoding; +pub mod registers; +mod stack; + +/// Returns a builder that can create a corresponding `TargetIsa` +/// or `Err(LookupError::SupportDisabled)` if not enabled. +macro_rules! isa_builder { + ($name: ident, $feature: tt, $triple: ident) => {{ + #[cfg(feature = $feature)] + { + Ok($name::isa_builder($triple)) + } + #[cfg(not(feature = $feature))] + { + Err(LookupError::SupportDisabled) + } + }}; +} + +/// Look for an ISA for the given `triple`. +/// Return a builder that can create a corresponding `TargetIsa`. +pub fn lookup(triple: Triple) -> Result { + match triple.architecture { + Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv", triple), + Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => { + isa_builder!(x86, "x86", triple) + } + Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple), + Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple), + _ => Err(LookupError::Unsupported), + } +} + +/// Look for a supported ISA with the given `name`. +/// Return a builder that can create a corresponding `TargetIsa`. +pub fn lookup_by_name(name: &str) -> Result { + use alloc::str::FromStr; + lookup(triple!(name)) +} + +/// Describes reason for target lookup failure +#[derive(Error, PartialEq, Eq, Copy, Clone, Debug)] +pub enum LookupError { + /// Support for this target was disabled in the current build. + #[error("Support for this target is disabled")] + SupportDisabled, + + /// Support for this target has not yet been implemented. + #[error("Support for this target has not been implemented yet")] + Unsupported, +} + +/// Builder for a `TargetIsa`. +/// Modify the ISA-specific settings before creating the `TargetIsa` trait object with `finish`. +pub struct Builder { + triple: Triple, + setup: settings::Builder, + constructor: fn(Triple, settings::Flags, settings::Builder) -> Box, +} + +impl Builder { + /// Combine the ISA-specific settings with the provided ISA-independent settings and allocate a + /// fully configured `TargetIsa` trait object. + pub fn finish(self, shared_flags: settings::Flags) -> Box { + (self.constructor)(self.triple, shared_flags, self.setup) + } +} + +impl settings::Configurable for Builder { + fn set(&mut self, name: &str, value: &str) -> SetResult<()> { + self.setup.set(name, value) + } + + fn enable(&mut self, name: &str) -> SetResult<()> { + self.setup.enable(name) + } +} + +/// After determining that an instruction doesn't have an encoding, how should we proceed to +/// legalize it? +/// +/// The `Encodings` iterator returns a legalization function to call. +pub type Legalize = + fn(ir::Inst, &mut ir::Function, &mut flowgraph::ControlFlowGraph, &dyn TargetIsa) -> bool; + +/// This struct provides information that a frontend may need to know about a target to +/// produce Cranelift IR for the target. +#[derive(Clone, Copy)] +pub struct TargetFrontendConfig { + /// The default calling convention of the target. + pub default_call_conv: CallConv, + + /// The pointer width of the target. + pub pointer_width: PointerWidth, +} + +impl TargetFrontendConfig { + /// Get the pointer type of this target. + pub fn pointer_type(self) -> ir::Type { + ir::Type::int(u16::from(self.pointer_bits())).unwrap() + } + + /// Get the width of pointers on this target, in units of bits. + pub fn pointer_bits(self) -> u8 { + self.pointer_width.bits() + } + + /// Get the width of pointers on this target, in units of bytes. + pub fn pointer_bytes(self) -> u8 { + self.pointer_width.bytes() + } +} + +/// Methods that are specialized to a target ISA. Implies a Display trait that shows the +/// shared flags, as well as any isa-specific flags. +pub trait TargetIsa: fmt::Display + Send + Sync { + /// Get the name of this ISA. + fn name(&self) -> &'static str; + + /// Get the target triple that was used to make this trait object. + fn triple(&self) -> &Triple; + + /// Get the ISA-independent flags that were used to make this trait object. + fn flags(&self) -> &settings::Flags; + + /// Get the default calling convention of this target. + fn default_call_conv(&self) -> CallConv { + CallConv::triple_default(self.triple()) + } + + /// Get the pointer type of this ISA. + fn pointer_type(&self) -> ir::Type { + ir::Type::int(u16::from(self.pointer_bits())).unwrap() + } + + /// Get the width of pointers on this ISA. + fn pointer_width(&self) -> PointerWidth { + self.triple().pointer_width().unwrap() + } + + /// Get the width of pointers on this ISA, in units of bits. + fn pointer_bits(&self) -> u8 { + self.pointer_width().bits() + } + + /// Get the width of pointers on this ISA, in units of bytes. + fn pointer_bytes(&self) -> u8 { + self.pointer_width().bytes() + } + + /// Get the information needed by frontends producing Cranelift IR. + fn frontend_config(&self) -> TargetFrontendConfig { + TargetFrontendConfig { + default_call_conv: self.default_call_conv(), + pointer_width: self.pointer_width(), + } + } + + /// Does the CPU implement scalar comparisons using a CPU flags register? + fn uses_cpu_flags(&self) -> bool { + false + } + + /// Does the CPU implement multi-register addressing? + fn uses_complex_addresses(&self) -> bool { + false + } + + /// Get a data structure describing the registers in this ISA. + fn register_info(&self) -> RegInfo; + + /// Returns an iterator over legal encodings for the instruction. + fn legal_encodings<'a>( + &'a self, + func: &'a ir::Function, + inst: &'a ir::InstructionData, + ctrl_typevar: ir::Type, + ) -> Encodings<'a>; + + /// Encode an instruction after determining it is legal. + /// + /// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object. + /// Otherwise, return `Legalize` action. + /// + /// This is also the main entry point for determining if an instruction is legal. + fn encode( + &self, + func: &ir::Function, + inst: &ir::InstructionData, + ctrl_typevar: ir::Type, + ) -> Result { + let mut iter = self.legal_encodings(func, inst, ctrl_typevar); + iter.next().ok_or_else(|| iter.legalize()) + } + + /// Get a data structure describing the instruction encodings in this ISA. + fn encoding_info(&self) -> EncInfo; + + /// Legalize a function signature. + /// + /// This is used to legalize both the signature of the function being compiled and any called + /// functions. The signature should be modified by adding `ArgumentLoc` annotations to all + /// arguments and return values. + /// + /// Arguments with types that are not supported by the ABI can be expanded into multiple + /// arguments: + /// + /// - Integer types that are too large to fit in a register can be broken into multiple + /// arguments of a smaller integer type. + /// - Floating point types can be bit-cast to an integer type of the same size, and possible + /// broken into smaller integer types. + /// - Vector types can be bit-cast and broken down into smaller vectors or scalars. + /// + /// The legalizer will adapt argument and return values as necessary at all ABI boundaries. + /// + /// When this function is called to legalize the signature of the function currently being + /// compiled, `current` is true. The legalized signature can then also contain special purpose + /// arguments and return values such as: + /// + /// - A `link` argument representing the link registers on RISC architectures that don't push + /// the return address on the stack. + /// - A `link` return value which will receive the value that was passed to the `link` + /// argument. + /// - An `sret` argument can be added if one wasn't present already. This is necessary if the + /// signature returns more values than registers are available for returning values. + /// - An `sret` return value can be added if the ABI requires a function to return its `sret` + /// argument in a register. + /// + /// Arguments and return values for the caller's frame pointer and other callee-saved registers + /// should not be added by this function. These arguments are not added until after register + /// allocation. + fn legalize_signature(&self, sig: &mut Cow, current: bool); + + /// Get the register class that should be used to represent an ABI argument or return value of + /// type `ty`. This should be the top-level register class that contains the argument + /// registers. + /// + /// This function can assume that it will only be asked to provide register classes for types + /// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries. + fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass; + + /// Get the set of allocatable registers that can be used when compiling `func`. + /// + /// This set excludes reserved registers like the stack pointer and other special-purpose + /// registers. + fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet; + + /// Compute the stack layout and insert prologue and epilogue code into `func`. + /// + /// Return an error if the stack frame is too large. + fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { + let _tt = timing::prologue_epilogue(); + // This default implementation is unlikely to be good enough. + use crate::ir::stackslot::{StackOffset, StackSize}; + use crate::stack_layout::layout_stack; + + let word_size = StackSize::from(self.pointer_bytes()); + + // Account for the SpiderMonkey standard prologue pushes. + if func.signature.call_conv.extends_baldrdash() { + let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size; + let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); + ss.offset = Some(-(bytes as StackOffset)); + func.stack_slots.push(ss); + } + + let is_leaf = func.is_leaf(); + layout_stack(&mut func.stack_slots, is_leaf, word_size)?; + Ok(()) + } + + /// Emit binary machine code for a single instruction into the `sink` trait object. + /// + /// Note that this will call `put*` methods on the `sink` trait object via its vtable which + /// is not the fastest way of emitting code. + /// + /// This function is under the "testing_hooks" feature, and is only suitable for use by + /// test harnesses. It increases code size, and is inefficient. + #[cfg(feature = "testing_hooks")] + fn emit_inst( + &self, + func: &ir::Function, + inst: ir::Inst, + divert: &mut regalloc::RegDiversions, + sink: &mut dyn binemit::CodeSink, + ); + + /// Emit a whole function into memory. + fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink); + + /// IntCC condition for Unsigned Addition Overflow (Carry). + fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC; + + /// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry). + fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC; + + /// Emit unwind information for the given function. + /// + /// Only some calling conventions (e.g. Windows fastcall) will have unwind information. + fn emit_unwind_info( + &self, + _func: &ir::Function, + _kind: binemit::FrameUnwindKind, + _sink: &mut dyn binemit::FrameUnwindSink, + ) { + // No-op by default + } +} diff --git a/cranelift/codegen/src/isa/registers.rs b/cranelift/codegen/src/isa/registers.rs new file mode 100644 index 0000000000..6bb9e9cf4c --- /dev/null +++ b/cranelift/codegen/src/isa/registers.rs @@ -0,0 +1,349 @@ +//! Data structures describing the registers in an ISA. + +use crate::entity::EntityRef; +use core::fmt; + +/// Register units are the smallest units of register allocation. +/// +/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA +/// has aliasing registers, the aliasing can be modeled with registers that cover multiple +/// register units. +/// +/// The register allocator will enforce that each register unit only gets used for one thing. +pub type RegUnit = u16; + +/// A bit mask indexed by register classes. +/// +/// The size of this type is determined by the ISA with the most register classes. +pub type RegClassMask = u32; + +/// A bit mask indexed by register units. +/// +/// The size of this type is determined by the target ISA that has the most register units defined. +/// Currently that is arm32 which has 64+16 units. +pub type RegUnitMask = [RegClassMask; 3]; + +/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a +/// contiguous range of register units. +/// +/// The `RegBank` struct provides a static description of a register bank. +pub struct RegBank { + /// The name of this register bank as defined in the ISA's DSL definition. + pub name: &'static str, + + /// The first register unit in this bank. + pub first_unit: RegUnit, + + /// The total number of register units in this bank. + pub units: RegUnit, + + /// Array of specially named register units. This array can be shorter than the number of units + /// in the bank. + pub names: &'static [&'static str], + + /// Name prefix to use for those register units in the bank not covered by the `names` array. + /// The remaining register units will be named this prefix followed by their decimal offset in + /// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ... + pub prefix: &'static str, + + /// Index of the first top-level register class in this bank. + pub first_toprc: usize, + + /// Number of top-level register classes in this bank. + /// + /// The top-level register classes in a bank are guaranteed to be numbered sequentially from + /// `first_toprc`, and all top-level register classes across banks come before any sub-classes. + pub num_toprcs: usize, + + /// Is register pressure tracking enabled for this bank? + pub pressure_tracking: bool, +} + +impl RegBank { + /// Does this bank contain `regunit`? + fn contains(&self, regunit: RegUnit) -> bool { + regunit >= self.first_unit && regunit - self.first_unit < self.units + } + + /// Try to parse a regunit name. The name is not expected to begin with `%`. + fn parse_regunit(&self, name: &str) -> Option { + match self.names.iter().position(|&x| x == name) { + Some(offset) => { + // This is one of the special-cased names. + Some(offset as RegUnit) + } + None => { + // Try a regular prefixed name. + if name.starts_with(self.prefix) { + name[self.prefix.len()..].parse().ok() + } else { + None + } + } + } + .and_then(|offset| { + if offset < self.units { + Some(offset + self.first_unit) + } else { + None + } + }) + } + + /// Write `regunit` to `w`, assuming that it belongs to this bank. + /// All regunits are written with a `%` prefix. + fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result { + let offset = regunit - self.first_unit; + assert!(offset < self.units); + if (offset as usize) < self.names.len() { + write!(f, "%{}", self.names[offset as usize]) + } else { + write!(f, "%{}{}", self.prefix, offset) + } + } +} + +/// A register class reference. +/// +/// All register classes are statically defined in tables generated from the meta descriptions. +pub type RegClass = &'static RegClassData; + +/// Data about a register class. +/// +/// A register class represents a subset of the registers in a bank. It describes the set of +/// permitted registers for a register operand in a given encoding of an instruction. +/// +/// A register class can be a subset of another register class. The top-level register classes are +/// disjoint. +pub struct RegClassData { + /// The name of the register class. + pub name: &'static str, + + /// The index of this class in the ISA's RegInfo description. + pub index: u8, + + /// How many register units to allocate per register. + pub width: u8, + + /// Index of the register bank this class belongs to. + pub bank: u8, + + /// Index of the top-level register class contains this one. + pub toprc: u8, + + /// The first register unit in this class. + pub first: RegUnit, + + /// Bit-mask of sub-classes of this register class, including itself. + /// + /// Bits correspond to RC indexes. + pub subclasses: RegClassMask, + + /// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the + /// first register unit in each allocatable register. + pub mask: RegUnitMask, + + /// The global `RegInfo` instance containing this register class. + pub info: &'static RegInfo, + + /// The "pinned" register of the associated register bank. + /// + /// This register must be non-volatile (callee-preserved) and must not be the fixed + /// output register of any instruction. + pub pinned_reg: Option, +} + +impl RegClassData { + /// Get the register class index corresponding to the intersection of `self` and `other`. + /// + /// This register class is guaranteed to exist if the register classes overlap. If the register + /// classes don't overlap, returns `None`. + pub fn intersect_index(&self, other: RegClass) -> Option { + // Compute the set of common subclasses. + let mask = self.subclasses & other.subclasses; + + if mask == 0 { + // No overlap. + None + } else { + // Register class indexes are topologically ordered, so the largest common subclass has + // the smallest index. + Some(RegClassIndex(mask.trailing_zeros() as u8)) + } + } + + /// Get the intersection of `self` and `other`. + pub fn intersect(&self, other: RegClass) -> Option { + self.intersect_index(other).map(|rci| self.info.rc(rci)) + } + + /// Returns true if `other` is a subclass of this register class. + /// A register class is considered to be a subclass of itself. + pub fn has_subclass>(&self, other: RCI) -> bool { + self.subclasses & (1 << other.into().0) != 0 + } + + /// Get the top-level register class containing this class. + pub fn toprc(&self) -> RegClass { + self.info.rc(RegClassIndex(self.toprc)) + } + + /// Get a specific register unit in this class. + pub fn unit(&self, offset: usize) -> RegUnit { + let uoffset = offset * usize::from(self.width); + self.first + uoffset as RegUnit + } + + /// Does this register class contain `regunit`? + pub fn contains(&self, regunit: RegUnit) -> bool { + self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32)) != 0 + } + + /// If the pinned register is used, is the given regunit the pinned register of this class? + #[inline] + pub fn is_pinned_reg(&self, enabled: bool, regunit: RegUnit) -> bool { + enabled + && self + .pinned_reg + .map_or(false, |pinned_reg| pinned_reg == regunit) + } +} + +impl fmt::Display for RegClassData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.name) + } +} + +impl fmt::Debug for RegClassData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.name) + } +} + +/// Within an ISA, register classes are uniquely identified by their index. +impl PartialEq for RegClassData { + fn eq(&self, other: &Self) -> bool { + self.index == other.index + } +} + +/// A small reference to a register class. +/// +/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method +/// can be used to get the real register class reference back. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct RegClassIndex(u8); + +impl EntityRef for RegClassIndex { + fn new(idx: usize) -> Self { + Self(idx as u8) + } + + fn index(self) -> usize { + usize::from(self.0) + } +} + +impl From for RegClassIndex { + fn from(rc: RegClass) -> Self { + Self(rc.index) + } +} + +impl fmt::Display for RegClassIndex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "rci{}", self.0) + } +} + +/// Test of two registers overlap. +/// +/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to +/// determine the width (in regunits) of the register. +pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool { + let end1 = reg1 + RegUnit::from(rc1.width); + let end2 = reg2 + RegUnit::from(rc2.width); + !(end1 <= reg2 || end2 <= reg1) +} + +/// Information about the registers in an ISA. +/// +/// The `RegUnit` data structure collects all relevant static information about the registers in an +/// ISA. +#[derive(Clone)] +pub struct RegInfo { + /// All register banks, ordered by their `first_unit`. The register banks are disjoint, but + /// there may be holes of unused register unit numbers between banks due to alignment. + pub banks: &'static [RegBank], + + /// All register classes ordered topologically so a sub-class always follows its parent. + pub classes: &'static [RegClass], +} + +impl RegInfo { + /// Get the register bank holding `regunit`. + pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> { + // We could do a binary search, but most ISAs have only two register banks... + self.banks.iter().find(|b| b.contains(regunit)) + } + + /// Try to parse a regunit name. The name is not expected to begin with `%`. + pub fn parse_regunit(&self, name: &str) -> Option { + self.banks + .iter() + .filter_map(|b| b.parse_regunit(name)) + .next() + } + + /// Make a temporary object that can display a register unit. + pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit { + DisplayRegUnit { + regunit, + reginfo: self, + } + } + + /// Get the register class corresponding to `idx`. + pub fn rc(&self, idx: RegClassIndex) -> RegClass { + self.classes[idx.index()] + } + + /// Get the top-level register class containing the `idx` class. + pub fn toprc(&self, idx: RegClassIndex) -> RegClass { + self.classes[self.rc(idx).toprc as usize] + } +} + +/// Temporary object that holds enough information to print a register unit. +pub struct DisplayRegUnit<'a> { + regunit: RegUnit, + reginfo: &'a RegInfo, +} + +impl<'a> fmt::Display for DisplayRegUnit<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.reginfo.bank_containing_regunit(self.regunit) { + Some(b) => b.write_regunit(f, self.regunit), + None => write!(f, "%INVALID{}", self.regunit), + } + } +} + +#[test] +fn assert_sizes() { + use cranelift_codegen_shared::constants; + use std::mem::size_of; + + // In these tests, size_of returns number of bytes: we actually want the number of bits, so + // multiply these by 8. + assert!( + (size_of::() * 8) <= constants::MAX_NUM_REG_CLASSES, + "need to bump MAX_NUM_REG_CLASSES or change RegClassMask type" + ); + + assert!( + constants::MAX_NUM_REG_CLASSES < (1 << (size_of::() * 8)), + "need to change RegClassIndex's type to a wider type" + ); +} diff --git a/cranelift/codegen/src/isa/riscv/abi.rs b/cranelift/codegen/src/isa/riscv/abi.rs new file mode 100644 index 0000000000..44c5f36afe --- /dev/null +++ b/cranelift/codegen/src/isa/riscv/abi.rs @@ -0,0 +1,149 @@ +//! RISC-V ABI implementation. +//! +//! This module implements the RISC-V calling convention through the primary `legalize_signature()` +//! entry point. +//! +//! This doesn't support the soft-float ABI at the moment. + +use super::registers::{FPR, GPR}; +use super::settings; +use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; +use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type}; +use crate::isa::RegClass; +use crate::regalloc::RegisterSet; +use alloc::borrow::Cow; +use core::i32; +use target_lexicon::Triple; + +struct Args { + pointer_bits: u8, + pointer_bytes: u8, + pointer_type: Type, + regs: u32, + reg_limit: u32, + offset: u32, +} + +impl Args { + fn new(bits: u8, enable_e: bool) -> Self { + Self { + pointer_bits: bits, + pointer_bytes: bits / 8, + pointer_type: Type::int(u16::from(bits)).unwrap(), + regs: 0, + reg_limit: if enable_e { 6 } else { 8 }, + offset: 0, + } + } +} + +impl ArgAssigner for Args { + fn assign(&mut self, arg: &AbiParam) -> ArgAction { + fn align(value: u32, to: u32) -> u32 { + (value + to - 1) & !(to - 1) + } + + let ty = arg.value_type; + + // Check for a legal type. + // RISC-V doesn't have SIMD at all, so break all vectors down. + if ty.is_vector() { + return ValueConversion::VectorSplit.into(); + } + + // Large integers and booleans are broken down to fit in a register. + if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) { + // Align registers and stack to a multiple of two pointers. + self.regs = align(self.regs, 2); + self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes)); + return ValueConversion::IntSplit.into(); + } + + // Small integers are extended to the size of a pointer register. + if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) { + match arg.extension { + ArgumentExtension::None => {} + ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), + ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), + } + } + + if self.regs < self.reg_limit { + // Assign to a register. + let reg = if ty.is_float() { + FPR.unit(10 + self.regs as usize) + } else { + GPR.unit(10 + self.regs as usize) + }; + self.regs += 1; + ArgumentLoc::Reg(reg).into() + } else { + // Assign a stack location. + let loc = ArgumentLoc::Stack(self.offset as i32); + self.offset += u32::from(self.pointer_bytes); + debug_assert!(self.offset <= i32::MAX as u32); + loc.into() + } + } +} + +/// Legalize `sig` for RISC-V. +pub fn legalize_signature( + sig: &mut Cow, + triple: &Triple, + isa_flags: &settings::Flags, + current: bool, +) { + let bits = triple.pointer_width().unwrap().bits(); + + let mut args = Args::new(bits, isa_flags.enable_e()); + if let Some(new_params) = legalize_args(&sig.params, &mut args) { + sig.to_mut().params = new_params; + } + + let mut rets = Args::new(bits, isa_flags.enable_e()); + if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { + sig.to_mut().returns = new_returns; + } + + if current { + let ptr = Type::int(u16::from(bits)).unwrap(); + + // Add the link register as an argument and return value. + // + // The `jalr` instruction implementing a return can technically accept the return address + // in any register, but a micro-architecture with a return address predictor will only + // recognize it as a return if the address is in `x1`. + let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1)); + sig.to_mut().params.push(link); + sig.to_mut().returns.push(link); + } +} + +/// Get register class for a type appearing in a legalized signature. +pub fn regclass_for_abi_type(ty: Type) -> RegClass { + if ty.is_float() { + FPR + } else { + GPR + } +} + +pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet { + let mut regs = RegisterSet::new(); + regs.take(GPR, GPR.unit(0)); // Hard-wired 0. + // %x1 is the link register which is available for allocation. + regs.take(GPR, GPR.unit(2)); // Stack pointer. + regs.take(GPR, GPR.unit(3)); // Global pointer. + regs.take(GPR, GPR.unit(4)); // Thread pointer. + // TODO: %x8 is the frame pointer. Reserve it? + + // Remove %x16 and up for RV32E. + if isa_flags.enable_e() { + for u in 16..32 { + regs.take(GPR, GPR.unit(u)); + } + } + + regs +} diff --git a/cranelift/codegen/src/isa/riscv/binemit.rs b/cranelift/codegen/src/isa/riscv/binemit.rs new file mode 100644 index 0000000000..a1d2b82e12 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv/binemit.rs @@ -0,0 +1,182 @@ +//! Emitting binary RISC-V machine code. + +use crate::binemit::{bad_encoding, CodeSink, Reloc}; +use crate::ir::{Function, Inst, InstructionData}; +use crate::isa::{RegUnit, StackBaseMask, StackRef, TargetIsa}; +use crate::predicates::is_signed_int; +use crate::regalloc::RegDiversions; +use core::u32; + +include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs")); + +/// R-type instructions. +/// +/// 31 24 19 14 11 6 +/// funct7 rs2 rs1 funct3 rd opcode +/// 25 20 15 12 7 0 +/// +/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`. +fn put_r(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) { + let bits = u32::from(bits); + let opcode5 = bits & 0x1f; + let funct3 = (bits >> 5) & 0x7; + let funct7 = (bits >> 8) & 0x7f; + let rs1 = u32::from(rs1) & 0x1f; + let rs2 = u32::from(rs2) & 0x1f; + let rd = u32::from(rd) & 0x1f; + + // 0-6: opcode + let mut i = 0x3; + i |= opcode5 << 2; + i |= rd << 7; + i |= funct3 << 12; + i |= rs1 << 15; + i |= rs2 << 20; + i |= funct7 << 25; + + sink.put4(i); +} + +/// R-type instructions with a shift amount instead of rs2. +/// +/// 31 25 19 14 11 6 +/// funct7 shamt rs1 funct3 rd opcode +/// 25 20 15 12 7 0 +/// +/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31. +/// +/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`. +fn put_rshamt( + bits: u16, + rs1: RegUnit, + shamt: i64, + rd: RegUnit, + sink: &mut CS, +) { + let bits = u32::from(bits); + let opcode5 = bits & 0x1f; + let funct3 = (bits >> 5) & 0x7; + let funct7 = (bits >> 8) & 0x7f; + let rs1 = u32::from(rs1) & 0x1f; + let shamt = shamt as u32 & 0x3f; + let rd = u32::from(rd) & 0x1f; + + // 0-6: opcode + let mut i = 0x3; + i |= opcode5 << 2; + i |= rd << 7; + i |= funct3 << 12; + i |= rs1 << 15; + i |= shamt << 20; + i |= funct7 << 25; + + sink.put4(i); +} + +/// I-type instructions. +/// +/// 31 19 14 11 6 +/// imm rs1 funct3 rd opcode +/// 20 15 12 7 0 +/// +/// Encoding bits: `opcode[6:2] | (funct3 << 5)` +fn put_i(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) { + let bits = u32::from(bits); + let opcode5 = bits & 0x1f; + let funct3 = (bits >> 5) & 0x7; + let rs1 = u32::from(rs1) & 0x1f; + let rd = u32::from(rd) & 0x1f; + + // 0-6: opcode + let mut i = 0x3; + i |= opcode5 << 2; + i |= rd << 7; + i |= funct3 << 12; + i |= rs1 << 15; + i |= (imm << 20) as u32; + + sink.put4(i); +} + +/// U-type instructions. +/// +/// 31 11 6 +/// imm rd opcode +/// 12 7 0 +/// +/// Encoding bits: `opcode[6:2] | (funct3 << 5)` +fn put_u(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) { + let bits = u32::from(bits); + let opcode5 = bits & 0x1f; + let rd = u32::from(rd) & 0x1f; + + // 0-6: opcode + let mut i = 0x3; + i |= opcode5 << 2; + i |= rd << 7; + i |= imm as u32 & 0xfffff000; + + sink.put4(i); +} + +/// SB-type branch instructions. +/// +/// 31 24 19 14 11 6 +/// imm rs2 rs1 funct3 imm opcode +/// 25 20 15 12 7 0 +/// +/// Encoding bits: `opcode[6:2] | (funct3 << 5)` +fn put_sb(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) { + let bits = u32::from(bits); + let opcode5 = bits & 0x1f; + let funct3 = (bits >> 5) & 0x7; + let rs1 = u32::from(rs1) & 0x1f; + let rs2 = u32::from(rs2) & 0x1f; + + debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm); + let imm = imm as u32; + + // 0-6: opcode + let mut i = 0x3; + i |= opcode5 << 2; + i |= funct3 << 12; + i |= rs1 << 15; + i |= rs2 << 20; + + // The displacement is completely hashed up. + i |= ((imm >> 11) & 0x1) << 7; + i |= ((imm >> 1) & 0xf) << 8; + i |= ((imm >> 5) & 0x3f) << 25; + i |= ((imm >> 12) & 0x1) << 31; + + sink.put4(i); +} + +/// UJ-type jump instructions. +/// +/// 31 11 6 +/// imm rd opcode +/// 12 7 0 +/// +/// Encoding bits: `opcode[6:2]` +fn put_uj(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) { + let bits = u32::from(bits); + let opcode5 = bits & 0x1f; + let rd = u32::from(rd) & 0x1f; + + debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm); + let imm = imm as u32; + + // 0-6: opcode + let mut i = 0x3; + i |= opcode5 << 2; + i |= rd << 7; + + // The displacement is completely hashed up. + i |= imm & 0xff000; + i |= ((imm >> 11) & 0x1) << 20; + i |= ((imm >> 1) & 0x3ff) << 21; + i |= ((imm >> 20) & 0x1) << 31; + + sink.put4(i); +} diff --git a/cranelift/codegen/src/isa/riscv/enc_tables.rs b/cranelift/codegen/src/isa/riscv/enc_tables.rs new file mode 100644 index 0000000000..76184ad727 --- /dev/null +++ b/cranelift/codegen/src/isa/riscv/enc_tables.rs @@ -0,0 +1,18 @@ +//! Encoding tables for RISC-V. + +use super::registers::*; +use crate::ir; +use crate::isa; +use crate::isa::constraints::*; +use crate::isa::enc_tables::*; +use crate::isa::encoding::{base_size, RecipeSizing}; +use crate::predicates; + +// Include the generated encoding tables: +// - `LEVEL1_RV32` +// - `LEVEL1_RV64` +// - `LEVEL2` +// - `ENCLIST` +// - `INFO` +include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs")); +include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs")); diff --git a/cranelift/codegen/src/isa/riscv/mod.rs b/cranelift/codegen/src/isa/riscv/mod.rs new file mode 100644 index 0000000000..8aa264f34f --- /dev/null +++ b/cranelift/codegen/src/isa/riscv/mod.rs @@ -0,0 +1,290 @@ +//! RISC-V Instruction Set Architecture. + +mod abi; +mod binemit; +mod enc_tables; +mod registers; +pub mod settings; + +use super::super::settings as shared_settings; +#[cfg(feature = "testing_hooks")] +use crate::binemit::CodeSink; +use crate::binemit::{emit_function, MemoryCodeSink}; +use crate::ir; +use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; +use crate::isa::Builder as IsaBuilder; +use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; +use crate::regalloc; +use alloc::borrow::Cow; +use alloc::boxed::Box; +use core::fmt; +use target_lexicon::{PointerWidth, Triple}; + +#[allow(dead_code)] +struct Isa { + triple: Triple, + shared_flags: shared_settings::Flags, + isa_flags: settings::Flags, + cpumode: &'static [shared_enc_tables::Level1Entry], +} + +/// Get an ISA builder for creating RISC-V targets. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + IsaBuilder { + triple, + setup: settings::builder(), + constructor: isa_constructor, + } +} + +fn isa_constructor( + triple: Triple, + shared_flags: shared_settings::Flags, + builder: shared_settings::Builder, +) -> Box { + let level1 = match triple.pointer_width().unwrap() { + PointerWidth::U16 => panic!("16-bit RISC-V unrecognized"), + PointerWidth::U32 => &enc_tables::LEVEL1_RV32[..], + PointerWidth::U64 => &enc_tables::LEVEL1_RV64[..], + }; + Box::new(Isa { + triple, + isa_flags: settings::Flags::new(&shared_flags, builder), + shared_flags, + cpumode: level1, + }) +} + +impl TargetIsa for Isa { + fn name(&self) -> &'static str { + "riscv" + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.shared_flags + } + + fn register_info(&self) -> RegInfo { + registers::INFO.clone() + } + + fn encoding_info(&self) -> EncInfo { + enc_tables::INFO.clone() + } + + fn legal_encodings<'a>( + &'a self, + func: &'a ir::Function, + inst: &'a ir::InstructionData, + ctrl_typevar: ir::Type, + ) -> Encodings<'a> { + lookup_enclist( + ctrl_typevar, + inst, + func, + self.cpumode, + &enc_tables::LEVEL2[..], + &enc_tables::ENCLISTS[..], + &enc_tables::LEGALIZE_ACTIONS[..], + &enc_tables::RECIPE_PREDICATES[..], + &enc_tables::INST_PREDICATES[..], + self.isa_flags.predicate_view(), + ) + } + + fn legalize_signature(&self, sig: &mut Cow, current: bool) { + abi::legalize_signature(sig, &self.triple, &self.isa_flags, current) + } + + fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { + abi::regclass_for_abi_type(ty) + } + + fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet { + abi::allocatable_registers(func, &self.isa_flags) + } + + #[cfg(feature = "testing_hooks")] + fn emit_inst( + &self, + func: &ir::Function, + inst: ir::Inst, + divert: &mut regalloc::RegDiversions, + sink: &mut dyn CodeSink, + ) { + binemit::emit_inst(func, inst, divert, sink, self) + } + + fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { + emit_function(func, binemit::emit_inst, sink, self) + } + + fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { + unimplemented!() + } + + fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { + unimplemented!() + } +} + +#[cfg(test)] +mod tests { + use crate::ir::{immediates, types}; + use crate::ir::{Function, InstructionData, Opcode}; + use crate::isa; + use crate::settings::{self, Configurable}; + use alloc::string::{String, ToString}; + use core::str::FromStr; + use target_lexicon::triple; + + fn encstr(isa: &dyn isa::TargetIsa, enc: Result) -> String { + match enc { + Ok(e) => isa.encoding_info().display(e).to_string(), + Err(_) => "no encoding".to_string(), + } + } + + #[test] + fn test_64bitenc() { + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + let isa = isa::lookup(triple!("riscv64")) + .unwrap() + .finish(shared_flags); + + let mut func = Function::new(); + let block = func.dfg.make_block(); + let arg64 = func.dfg.append_block_param(block, types::I64); + let arg32 = func.dfg.append_block_param(block, types::I32); + + // Try to encode iadd_imm.i64 v1, -10. + let inst64 = InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg: arg64, + imm: immediates::Imm64::new(-10), + }; + + // ADDI is I/0b00100 + assert_eq!( + encstr(&*isa, isa.encode(&func, &inst64, types::I64)), + "Ii#04" + ); + + // Try to encode iadd_imm.i64 v1, -10000. + let inst64_large = InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg: arg64, + imm: immediates::Imm64::new(-10000), + }; + + // Immediate is out of range for ADDI. + assert!(isa.encode(&func, &inst64_large, types::I64).is_err()); + + // Create an iadd_imm.i32 which is encodable in RV64. + let inst32 = InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg: arg32, + imm: immediates::Imm64::new(10), + }; + + // ADDIW is I/0b00110 + assert_eq!( + encstr(&*isa, isa.encode(&func, &inst32, types::I32)), + "Ii#06" + ); + } + + // Same as above, but for RV32. + #[test] + fn test_32bitenc() { + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + let isa = isa::lookup(triple!("riscv32")) + .unwrap() + .finish(shared_flags); + + let mut func = Function::new(); + let block = func.dfg.make_block(); + let arg64 = func.dfg.append_block_param(block, types::I64); + let arg32 = func.dfg.append_block_param(block, types::I32); + + // Try to encode iadd_imm.i64 v1, -10. + let inst64 = InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg: arg64, + imm: immediates::Imm64::new(-10), + }; + + // In 32-bit mode, an i64 bit add should be narrowed. + assert!(isa.encode(&func, &inst64, types::I64).is_err()); + + // Try to encode iadd_imm.i64 v1, -10000. + let inst64_large = InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg: arg64, + imm: immediates::Imm64::new(-10000), + }; + + // In 32-bit mode, an i64 bit add should be narrowed. + assert!(isa.encode(&func, &inst64_large, types::I64).is_err()); + + // Create an iadd_imm.i32 which is encodable in RV32. + let inst32 = InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg: arg32, + imm: immediates::Imm64::new(10), + }; + + // ADDI is I/0b00100 + assert_eq!( + encstr(&*isa, isa.encode(&func, &inst32, types::I32)), + "Ii#04" + ); + + // Create an imul.i32 which is encodable in RV32, but only when use_m is true. + let mul32 = InstructionData::Binary { + opcode: Opcode::Imul, + args: [arg32, arg32], + }; + + assert!(isa.encode(&func, &mul32, types::I32).is_err()); + } + + #[test] + fn test_rv32m() { + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + // Set the supports_m stting which in turn enables the use_m predicate that unlocks + // encodings for imul. + let mut isa_builder = isa::lookup(triple!("riscv32")).unwrap(); + isa_builder.enable("supports_m").unwrap(); + + let isa = isa_builder.finish(shared_flags); + + let mut func = Function::new(); + let block = func.dfg.make_block(); + let arg32 = func.dfg.append_block_param(block, types::I32); + + // Create an imul.i32 which is encodable in RV32M. + let mul32 = InstructionData::Binary { + opcode: Opcode::Imul, + args: [arg32, arg32], + }; + assert_eq!( + encstr(&*isa, isa.encode(&func, &mul32, types::I32)), + "R#10c" + ); + } +} + +impl fmt::Display for Isa { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}\n{}", self.shared_flags, self.isa_flags) + } +} diff --git a/cranelift/codegen/src/isa/riscv/registers.rs b/cranelift/codegen/src/isa/riscv/registers.rs new file mode 100644 index 0000000000..9043b7f65f --- /dev/null +++ b/cranelift/codegen/src/isa/riscv/registers.rs @@ -0,0 +1,50 @@ +//! RISC-V register descriptions. + +use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; + +include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs")); + +#[cfg(test)] +mod tests { + use super::{FPR, GPR, INFO}; + use crate::isa::RegUnit; + use alloc::string::{String, ToString}; + + #[test] + fn unit_encodings() { + assert_eq!(INFO.parse_regunit("x0"), Some(0)); + assert_eq!(INFO.parse_regunit("x31"), Some(31)); + assert_eq!(INFO.parse_regunit("f0"), Some(32)); + assert_eq!(INFO.parse_regunit("f31"), Some(63)); + + assert_eq!(INFO.parse_regunit("x32"), None); + assert_eq!(INFO.parse_regunit("f32"), None); + } + + #[test] + fn unit_names() { + fn uname(ru: RegUnit) -> String { + INFO.display_regunit(ru).to_string() + } + + assert_eq!(uname(0), "%x0"); + assert_eq!(uname(1), "%x1"); + assert_eq!(uname(31), "%x31"); + assert_eq!(uname(32), "%f0"); + assert_eq!(uname(33), "%f1"); + assert_eq!(uname(63), "%f31"); + assert_eq!(uname(64), "%INVALID64"); + } + + #[test] + fn classes() { + assert!(GPR.contains(GPR.unit(0))); + assert!(GPR.contains(GPR.unit(31))); + assert!(!FPR.contains(GPR.unit(0))); + assert!(!FPR.contains(GPR.unit(31))); + assert!(!GPR.contains(FPR.unit(0))); + assert!(!GPR.contains(FPR.unit(31))); + assert!(FPR.contains(FPR.unit(0))); + assert!(FPR.contains(FPR.unit(31))); + } +} diff --git a/cranelift/codegen/src/isa/riscv/settings.rs b/cranelift/codegen/src/isa/riscv/settings.rs new file mode 100644 index 0000000000..40aa3bed2b --- /dev/null +++ b/cranelift/codegen/src/isa/riscv/settings.rs @@ -0,0 +1,56 @@ +//! RISC-V Settings. + +use crate::settings::{self, detail, Builder}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/riscv/mod.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs")); + +#[cfg(test)] +mod tests { + use super::{builder, Flags}; + use crate::settings::{self, Configurable}; + use alloc::string::ToString; + + #[test] + fn display_default() { + let shared = settings::Flags::new(settings::builder()); + let b = builder(); + let f = Flags::new(&shared, b); + assert_eq!( + f.to_string(), + "[riscv]\n\ + supports_m = false\n\ + supports_a = false\n\ + supports_f = false\n\ + supports_d = false\n\ + enable_m = true\n\ + enable_e = false\n" + ); + // Predicates are not part of the Display output. + assert_eq!(f.full_float(), false); + } + + #[test] + fn predicates() { + let mut sb = settings::builder(); + sb.set("enable_simd", "true").unwrap(); + let shared = settings::Flags::new(sb); + let mut b = builder(); + b.enable("supports_f").unwrap(); + b.enable("supports_d").unwrap(); + let f = Flags::new(&shared, b); + assert_eq!(f.full_float(), true); + + let mut sb = settings::builder(); + sb.set("enable_simd", "false").unwrap(); + let shared = settings::Flags::new(sb); + let mut b = builder(); + b.enable("supports_f").unwrap(); + b.enable("supports_d").unwrap(); + let f = Flags::new(&shared, b); + assert_eq!(f.full_float(), false); + } +} diff --git a/cranelift/codegen/src/isa/stack.rs b/cranelift/codegen/src/isa/stack.rs new file mode 100644 index 0000000000..ae093bed28 --- /dev/null +++ b/cranelift/codegen/src/isa/stack.rs @@ -0,0 +1,95 @@ +//! Low-level details of stack accesses. +//! +//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type +//! defined in this module expresses the low-level details of accessing a stack slot from an +//! encoded instruction. + +use crate::ir::stackslot::{StackOffset, StackSlotKind, StackSlots}; +use crate::ir::StackSlot; + +/// A method for referencing a stack slot in the current stack frame. +/// +/// Stack slots are addressed with a constant offset from a base register. The base can be the +/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone +/// of a large stack frame. +#[derive(Clone, Copy, Debug)] +pub struct StackRef { + /// The base register to use for addressing. + pub base: StackBase, + + /// Immediate offset from the base register to the first byte of the stack slot. + pub offset: StackOffset, +} + +impl StackRef { + /// Get a reference to the stack slot `ss` using one of the base pointers in `mask`. + pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option { + // Try an SP-relative reference. + if mask.contains(StackBase::SP) { + return Some(Self::sp(ss, frame)); + } + + // No reference possible with this mask. + None + } + + /// Get a reference to `ss` using the stack pointer as a base. + pub fn sp(ss: StackSlot, frame: &StackSlots) -> Self { + let size = frame + .layout_info + .expect("Stack layout must be computed before referencing stack slots") + .frame_size; + let slot = &frame[ss]; + let offset = if slot.kind == StackSlotKind::OutgoingArg { + // Outgoing argument slots have offsets relative to our stack pointer. + slot.offset.unwrap() + } else { + // All other slots have offsets relative to our caller's stack frame. + // Offset where SP is pointing. (All ISAs have stacks growing downwards.) + let sp_offset = -(size as StackOffset); + slot.offset.unwrap() - sp_offset + }; + Self { + base: StackBase::SP, + offset, + } + } +} + +/// Generic base register for referencing stack slots. +/// +/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for +/// those two base pointers. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum StackBase { + /// Use the stack pointer. + SP = 0, + + /// Use the frame pointer (if one is present). + FP = 1, + + /// Use an explicit zone pointer in a general-purpose register. + /// + /// This feature is not yet implemented. + Zone = 2, +} + +/// Bit mask of supported stack bases. +/// +/// Many instruction encodings can use different base registers while others only work with the +/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given +/// instruction encoding. +/// +/// This behaves like a set of `StackBase` variants. +/// +/// The internal representation as a `u8` is public because stack base masks are used in constant +/// tables generated from the meta-language encoding definitions. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct StackBaseMask(pub u8); + +impl StackBaseMask { + /// Check if this mask contains the `base` variant. + pub fn contains(self, base: StackBase) -> bool { + self.0 & (1 << base as usize) != 0 + } +} diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs new file mode 100644 index 0000000000..db67457a6c --- /dev/null +++ b/cranelift/codegen/src/isa/x86/abi.rs @@ -0,0 +1,971 @@ +//! x86 ABI implementation. + +use super::super::settings as shared_settings; +#[cfg(feature = "unwind")] +use super::fde::emit_fde; +use super::registers::{FPR, GPR, RU}; +use super::settings as isa_settings; +#[cfg(feature = "unwind")] +use super::unwind::UnwindInfo; +use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; +#[cfg(feature = "unwind")] +use crate::binemit::{FrameUnwindKind, FrameUnwindSink}; +use crate::cursor::{Cursor, CursorPosition, EncCursor}; +use crate::ir; +use crate::ir::immediates::Imm64; +use crate::ir::stackslot::{StackOffset, StackSize}; +use crate::ir::{ + get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, + FrameLayoutChange, InstBuilder, ValueLoc, +}; +use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa}; +use crate::regalloc::RegisterSet; +use crate::result::CodegenResult; +use crate::stack_layout::layout_stack; +use alloc::borrow::Cow; +use core::i32; +use std::boxed::Box; +use target_lexicon::{PointerWidth, Triple}; + +/// Argument registers for x86-64 +static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9]; + +/// Return value registers. +static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx]; + +/// Argument registers for x86-64, when using windows fastcall +static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9]; + +/// Return value registers for x86-64, when using windows fastcall +static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax]; + +/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used +/// by the callee for temporary values. +/// +/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow +/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually +/// used by the callee to save & restore the values of the arguments. +/// +/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling +/// convention reserves spill space for parameters, you don’t have to use them as such" +const WIN_SHADOW_STACK_SPACE: i32 = 32; + +/// Stack alignment requirement for functions. +/// +/// 16 bytes is the perfect stack alignment, because: +/// +/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which +/// are aligned to 16 bytes in order to aid performance". +/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a +/// 16-byte aligned stack pointer. +/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located +/// higher up in the stack. +const STACK_ALIGNMENT: u32 = 16; + +#[derive(Clone)] +struct Args { + pointer_bytes: u8, + pointer_bits: u8, + pointer_type: ir::Type, + gpr: &'static [RU], + gpr_used: usize, + fpr_limit: usize, + fpr_used: usize, + offset: u32, + call_conv: CallConv, + shared_flags: shared_settings::Flags, + #[allow(dead_code)] + isa_flags: isa_settings::Flags, +} + +impl Args { + fn new( + bits: u8, + gpr: &'static [RU], + fpr_limit: usize, + call_conv: CallConv, + shared_flags: &shared_settings::Flags, + isa_flags: &isa_settings::Flags, + ) -> Self { + let offset = if call_conv.extends_windows_fastcall() { + WIN_SHADOW_STACK_SPACE + } else { + 0 + } as u32; + + Self { + pointer_bytes: bits / 8, + pointer_bits: bits, + pointer_type: ir::Type::int(u16::from(bits)).unwrap(), + gpr, + gpr_used: 0, + fpr_limit, + fpr_used: 0, + offset, + call_conv, + shared_flags: shared_flags.clone(), + isa_flags: isa_flags.clone(), + } + } +} + +impl ArgAssigner for Args { + fn assign(&mut self, arg: &AbiParam) -> ArgAction { + let ty = arg.value_type; + + // Vectors should stay in vector registers unless SIMD is not enabled--then they are split + if ty.is_vector() { + if self.shared_flags.enable_simd() { + let reg = FPR.unit(self.fpr_used); + self.fpr_used += 1; + return ArgumentLoc::Reg(reg).into(); + } + return ValueConversion::VectorSplit.into(); + } + + // Large integers and booleans are broken down to fit in a register. + if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) { + return ValueConversion::IntSplit.into(); + } + + // Small integers are extended to the size of a pointer register. + if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) { + match arg.extension { + ArgumentExtension::None => {} + ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), + ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), + } + } + + // Handle special-purpose arguments. + if ty.is_int() && self.call_conv.extends_baldrdash() { + match arg.purpose { + // This is SpiderMonkey's `WasmTlsReg`. + ArgumentPurpose::VMContext => { + return ArgumentLoc::Reg(if self.pointer_bits == 64 { + RU::r14 + } else { + RU::rsi + } as RegUnit) + .into(); + } + // This is SpiderMonkey's `WasmTableCallSigReg`. + ArgumentPurpose::SignatureId => { + return ArgumentLoc::Reg(if self.pointer_bits == 64 { + RU::r10 + } else { + RU::rcx + } as RegUnit) + .into() + } + _ => {} + } + } + + // Try to use a GPR. + if !ty.is_float() && self.gpr_used < self.gpr.len() { + let reg = self.gpr[self.gpr_used] as RegUnit; + self.gpr_used += 1; + return ArgumentLoc::Reg(reg).into(); + } + + // Try to use an FPR. + let fpr_offset = if self.call_conv.extends_windows_fastcall() { + // Float and general registers on windows share the same parameter index. + // The used register depends entirely on the parameter index: Even if XMM0 + // is not used for the first parameter, it cannot be used for the second parameter. + debug_assert_eq!(self.fpr_limit, self.gpr.len()); + &mut self.gpr_used + } else { + &mut self.fpr_used + }; + + if ty.is_float() && *fpr_offset < self.fpr_limit { + let reg = FPR.unit(*fpr_offset); + *fpr_offset += 1; + return ArgumentLoc::Reg(reg).into(); + } + + // Assign a stack location. + let loc = ArgumentLoc::Stack(self.offset as i32); + self.offset += u32::from(self.pointer_bytes); + debug_assert!(self.offset <= i32::MAX as u32); + loc.into() + } +} + +/// Legalize `sig`. +pub fn legalize_signature( + sig: &mut Cow, + triple: &Triple, + _current: bool, + shared_flags: &shared_settings::Flags, + isa_flags: &isa_settings::Flags, +) { + let bits; + let mut args; + + match triple.pointer_width().unwrap() { + PointerWidth::U16 => panic!(), + PointerWidth::U32 => { + bits = 32; + args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags); + } + PointerWidth::U64 => { + bits = 64; + args = if sig.call_conv.extends_windows_fastcall() { + Args::new( + bits, + &ARG_GPRS_WIN_FASTCALL_X64[..], + 4, + sig.call_conv, + shared_flags, + isa_flags, + ) + } else { + Args::new( + bits, + &ARG_GPRS[..], + 8, + sig.call_conv, + shared_flags, + isa_flags, + ) + }; + } + } + + let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() { + // windows-x64 calling convention only uses XMM0 or RAX for return values + (&RET_GPRS_WIN_FASTCALL_X64[..], 1) + } else { + (&RET_GPRS[..], 2) + }; + + let mut rets = Args::new( + bits, + ret_regs, + ret_fpr_limit, + sig.call_conv, + shared_flags, + isa_flags, + ); + + let sig_is_multi_return = sig.is_multi_return(); + + // If this is a multi-value return and we don't have enough available return + // registers to fit all of the return values, we need to backtrack and start + // assigning locations all over again with a different strategy. In order to + // do that, we need a copy of the original assigner for the returns. + let backup_rets_for_struct_return = if sig_is_multi_return { + Some(rets.clone()) + } else { + None + }; + + if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { + if sig.is_multi_return() + && new_returns + .iter() + .filter(|r| r.purpose == ArgumentPurpose::Normal) + .any(|r| !r.location.is_reg()) + { + // The return values couldn't all fit into available return + // registers. Introduce the use of a struct-return parameter. + debug_assert!(!sig.uses_struct_return_param()); + + // We're using the first register for the return pointer parameter. + let mut ret_ptr_param = AbiParam { + value_type: args.pointer_type, + purpose: ArgumentPurpose::StructReturn, + extension: ArgumentExtension::None, + location: ArgumentLoc::Unassigned, + }; + match args.assign(&ret_ptr_param) { + ArgAction::Assign(ArgumentLoc::Reg(reg)) => { + ret_ptr_param.location = ArgumentLoc::Reg(reg); + sig.to_mut().params.push(ret_ptr_param); + } + _ => unreachable!("return pointer should always get a register assignment"), + } + + let mut backup_rets = backup_rets_for_struct_return.unwrap(); + + // We're using the first return register for the return pointer (like + // sys v does). + let mut ret_ptr_return = AbiParam { + value_type: args.pointer_type, + purpose: ArgumentPurpose::StructReturn, + extension: ArgumentExtension::None, + location: ArgumentLoc::Unassigned, + }; + match backup_rets.assign(&ret_ptr_return) { + ArgAction::Assign(ArgumentLoc::Reg(reg)) => { + ret_ptr_return.location = ArgumentLoc::Reg(reg); + sig.to_mut().returns.push(ret_ptr_return); + } + _ => unreachable!("return pointer should always get a register assignment"), + } + + sig.to_mut().returns.retain(|ret| { + // Either this is the return pointer, in which case we want to keep + // it, or else assume that it is assigned for a reason and doesn't + // conflict with our return pointering legalization. + debug_assert_eq!( + ret.location.is_assigned(), + ret.purpose != ArgumentPurpose::Normal + ); + ret.location.is_assigned() + }); + + if let Some(new_returns) = legalize_args(&sig.returns, &mut backup_rets) { + sig.to_mut().returns = new_returns; + } + } else { + sig.to_mut().returns = new_returns; + } + } + + if let Some(new_params) = legalize_args(&sig.params, &mut args) { + sig.to_mut().params = new_params; + } +} + +/// Get register class for a type appearing in a legalized signature. +pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass { + if ty.is_int() || ty.is_bool() || ty.is_ref() { + GPR + } else { + FPR + } +} + +/// Get the set of allocatable registers for `func`. +pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet { + let mut regs = RegisterSet::new(); + regs.take(GPR, RU::rsp as RegUnit); + regs.take(GPR, RU::rbp as RegUnit); + + // 32-bit arch only has 8 registers. + if triple.pointer_width().unwrap() != PointerWidth::U64 { + for i in 8..16 { + regs.take(GPR, GPR.unit(i)); + regs.take(FPR, FPR.unit(i)); + } + if flags.enable_pinned_reg() { + unimplemented!("Pinned register not implemented on x86-32."); + } + } else { + // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and + // isn't the fixed output register of any instruction. + if flags.enable_pinned_reg() { + regs.take(GPR, RU::r15 as RegUnit); + } + } + + regs +} + +/// Get the set of callee-saved registers. +fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { + match isa.triple().pointer_width().unwrap() { + PointerWidth::U16 => panic!(), + PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi], + PointerWidth::U64 => { + if call_conv.extends_windows_fastcall() { + // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile + // and must be saved and restored by a function that uses them." + // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention + // RSP & RSB are not listed below, since they are restored automatically during + // a function call. If that wasn't the case, function calls (RET) would not work. + &[ + RU::rbx, + RU::rdi, + RU::rsi, + RU::r12, + RU::r13, + RU::r14, + RU::r15, + ] + } else { + &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15] + } + } + } +} + +/// Get the set of callee-saved registers that are used. +fn callee_saved_gprs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet { + let mut all_callee_saved = RegisterSet::empty(); + for reg in callee_saved_gprs(isa, func.signature.call_conv) { + all_callee_saved.free(GPR, *reg as RegUnit); + } + + let mut used = RegisterSet::empty(); + for value_loc in func.locations.values() { + // Note that `value_loc` here contains only a single unit of a potentially multi-unit + // register. We don't use registers that overlap each other in the x86 ISA, but in others + // we do. So this should not be blindly reused. + if let ValueLoc::Reg(ru) = *value_loc { + if !used.is_avail(GPR, ru) { + used.free(GPR, ru); + } + } + } + + // regmove and regfill instructions may temporarily divert values into other registers, + // and these are not reflected in `func.locations`. Scan the function for such instructions + // and note which callee-saved registers they use. + // + // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible + // to avoid this step. + for block in &func.layout { + for inst in func.layout.block_insts(block) { + match func.dfg[inst] { + ir::instructions::InstructionData::RegMove { dst, .. } + | ir::instructions::InstructionData::RegFill { dst, .. } => { + if !used.is_avail(GPR, dst) { + used.free(GPR, dst); + } + } + _ => (), + } + } + } + + used.intersect(&all_callee_saved); + used +} + +pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { + match func.signature.call_conv { + // For now, just translate fast and cold as system_v. + CallConv::Fast | CallConv::Cold | CallConv::SystemV => { + system_v_prologue_epilogue(func, isa) + } + CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa), + CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => { + baldrdash_prologue_epilogue(func, isa) + } + CallConv::Probestack => unimplemented!("probestack calling convention"), + } +} + +fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { + debug_assert!( + !isa.flags().enable_probestack(), + "baldrdash does not expect cranelift to emit stack probes" + ); + + let word_size = StackSize::from(isa.pointer_bytes()); + let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() { + WIN_SHADOW_STACK_SPACE as u32 + } else { + 0 + }; + + let bytes = + StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size; + + let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); + ss.offset = Some(-(bytes as StackOffset)); + func.stack_slots.push(ss); + + let is_leaf = func.is_leaf(); + layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?; + Ok(()) +} + +/// CFAState is cranelift's model of the call frame layout at any particular point in a function. +/// It describes the call frame's layout in terms of a call frame address, where it is with respect +/// to the start of the call frame, and the where the top of the stack is with respect to it. +/// +/// Changes in this layout are used to derive appropriate `ir::FrameLayoutChange` to record for +/// relevant instructions. +#[derive(Clone)] +struct CFAState { + /// The register from which we can derive the call frame address. On x86_64, this is typically + /// `rbp`, but at function entry and exit may be `rsp` while the call frame is being + /// established. + cf_ptr_reg: RegUnit, + /// Given that `cf_ptr_reg` is a register containing a pointer to some memory, `cf_ptr_offset` + /// is the offset from that pointer to the address of the start of this function's call frame. + /// + /// For a concrete x86_64 example, we will start this at 8 - the call frame begins immediately + /// before the return address. This will typically then be set to 16, after pushing `rbp` to + /// preserve the parent call frame. It is very unlikely the offset should be anything other + /// than one or two pointer widths. + cf_ptr_offset: isize, + /// The offset between the start of the call frame and the current stack pointer. This is + /// primarily useful to point to where on the stack preserved registers are, but is maintained + /// through the whole function for consistency. + current_depth: isize, +} + +/// Implementation of the fastcall-based Win64 calling convention described at [1] +/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention +fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { + if isa.triple().pointer_width().unwrap() != PointerWidth::U64 { + panic!("TODO: windows-fastcall: x86-32 not implemented yet"); + } + + let csrs = callee_saved_gprs_used(isa, func); + + // The reserved stack area is composed of: + // return address + frame pointer + all callee-saved registers + shadow space + // + // Pushing the return address is an implicit function of the `call` + // instruction. Each of the others we will then push explicitly. Then we + // will adjust the stack pointer to make room for the rest of the required + // space for this frame. + let word_size = isa.pointer_bytes() as usize; + let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32; + + // TODO: eventually use the 32 bytes (shadow store) as spill slot. This currently doesn't work + // since cranelift does not support spill slots before incoming args + + func.create_stack_slot(ir::StackSlotData { + kind: ir::StackSlotKind::IncomingArg, + size: csr_stack_size as u32, + offset: Some(-(WIN_SHADOW_STACK_SPACE + csr_stack_size)), + }); + + let is_leaf = func.is_leaf(); + let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; + let local_stack_size = i64::from(total_stack_size - csr_stack_size); + + // Add CSRs to function signature + let reg_type = isa.pointer_type(); + let fp_arg = ir::AbiParam::special_reg( + reg_type, + ir::ArgumentPurpose::FramePointer, + RU::rbp as RegUnit, + ); + func.signature.params.push(fp_arg); + func.signature.returns.push(fp_arg); + + for csr in csrs.iter(GPR) { + let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); + func.signature.params.push(csr_arg); + func.signature.returns.push(csr_arg); + } + + // Set up the cursor and insert the prologue + let entry_block = func.layout.entry_block().expect("missing entry block"); + let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); + let prologue_cfa_state = + insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); + + // Reset the cursor and insert the epilogue + let mut pos = pos.at_position(CursorPosition::Nowhere); + insert_common_epilogues( + &mut pos, + local_stack_size, + reg_type, + &csrs, + isa, + prologue_cfa_state, + ); + + Ok(()) +} + +/// Insert a System V-compatible prologue and epilogue. +fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { + let pointer_width = isa.triple().pointer_width().unwrap(); + let word_size = pointer_width.bytes() as usize; + + let csrs = callee_saved_gprs_used(isa, func); + + // The reserved stack area is composed of: + // return address + frame pointer + all callee-saved registers + // + // Pushing the return address is an implicit function of the `call` + // instruction. Each of the others we will then push explicitly. Then we + // will adjust the stack pointer to make room for the rest of the required + // space for this frame. + let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32; + func.create_stack_slot(ir::StackSlotData { + kind: ir::StackSlotKind::IncomingArg, + size: csr_stack_size as u32, + offset: Some(-csr_stack_size), + }); + + let is_leaf = func.is_leaf(); + let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; + let local_stack_size = i64::from(total_stack_size - csr_stack_size); + + // Add CSRs to function signature + let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap(); + let fp_arg = ir::AbiParam::special_reg( + reg_type, + ir::ArgumentPurpose::FramePointer, + RU::rbp as RegUnit, + ); + func.signature.params.push(fp_arg); + func.signature.returns.push(fp_arg); + + for csr in csrs.iter(GPR) { + let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); + func.signature.params.push(csr_arg); + func.signature.returns.push(csr_arg); + } + + // Set up the cursor and insert the prologue + let entry_block = func.layout.entry_block().expect("missing entry block"); + let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); + let prologue_cfa_state = + insert_common_prologue(&mut pos, local_stack_size, reg_type, &csrs, isa); + + // Reset the cursor and insert the epilogue + let mut pos = pos.at_position(CursorPosition::Nowhere); + insert_common_epilogues( + &mut pos, + local_stack_size, + reg_type, + &csrs, + isa, + prologue_cfa_state, + ); + + Ok(()) +} + +/// Insert the prologue for a given function. +/// This is used by common calling conventions such as System V. +fn insert_common_prologue( + pos: &mut EncCursor, + stack_size: i64, + reg_type: ir::types::Type, + csrs: &RegisterSet, + isa: &dyn TargetIsa, +) -> Option { + let word_size = isa.pointer_bytes() as isize; + if stack_size > 0 { + // Check if there is a special stack limit parameter. If so insert stack check. + if let Some(stack_limit_arg) = pos.func.special_param(ArgumentPurpose::StackLimit) { + // Total stack size is the size of all stack area used by the function, including + // pushed CSRs, frame pointer. + // Also, the size of a return address, implicitly pushed by a x86 `call` instruction, + // also should be accounted for. + // TODO: Check if the function body actually contains a `call` instruction. + let total_stack_size = (csrs.iter(GPR).len() + 1 + 1) as i64 * word_size as i64; + + insert_stack_check(pos, total_stack_size, stack_limit_arg); + } + } + + let mut cfa_state = if let Some(ref mut frame_layout) = pos.func.frame_layout { + let cfa_state = CFAState { + cf_ptr_reg: RU::rsp as RegUnit, + cf_ptr_offset: word_size, + current_depth: -word_size, + }; + + frame_layout.initial = vec![ + FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }, + FrameLayoutChange::ReturnAddressAt { + cfa_offset: cfa_state.current_depth, + }, + ] + .into_boxed_slice(); + + Some(cfa_state) + } else { + None + }; + + // Append param to entry block + let block = pos.current_block().expect("missing block under cursor"); + let fp = pos.func.dfg.append_block_param(block, reg_type); + pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); + + let push_fp_inst = pos.ins().x86_push(fp); + + if let Some(ref mut frame_layout) = pos.func.frame_layout { + let cfa_state = cfa_state + .as_mut() + .expect("cfa state exists when recording frame layout"); + cfa_state.current_depth -= word_size; + cfa_state.cf_ptr_offset += word_size; + frame_layout.instructions.insert( + push_fp_inst, + vec![ + FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }, + FrameLayoutChange::RegAt { + reg: RU::rbp as RegUnit, + cfa_offset: cfa_state.current_depth, + }, + ] + .into_boxed_slice(), + ); + } + + let mov_sp_inst = pos + .ins() + .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit); + + if let Some(ref mut frame_layout) = pos.func.frame_layout { + let mut cfa_state = cfa_state + .as_mut() + .expect("cfa state exists when recording frame layout"); + cfa_state.cf_ptr_reg = RU::rbp as RegUnit; + frame_layout.instructions.insert( + mov_sp_inst, + vec![FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }] + .into_boxed_slice(), + ); + } + + for reg in csrs.iter(GPR) { + // Append param to entry block + let csr_arg = pos.func.dfg.append_block_param(block, reg_type); + + // Assign it a location + pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); + + // Remember it so we can push it momentarily + let reg_push_inst = pos.ins().x86_push(csr_arg); + + if let Some(ref mut frame_layout) = pos.func.frame_layout { + let mut cfa_state = cfa_state + .as_mut() + .expect("cfa state exists when recording frame layout"); + cfa_state.current_depth -= word_size; + frame_layout.instructions.insert( + reg_push_inst, + vec![FrameLayoutChange::RegAt { + reg, + cfa_offset: cfa_state.current_depth, + }] + .into_boxed_slice(), + ); + } + } + + // Allocate stack frame storage. + if stack_size > 0 { + if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2()) + { + // Emit a stack probe. + let rax = RU::rax as RegUnit; + let rax_val = ir::ValueLoc::Reg(rax); + + // The probestack function expects its input in %rax. + let arg = pos.ins().iconst(reg_type, stack_size); + pos.func.locations[arg] = rax_val; + + // Call the probestack function. + let callee = get_probestack_funcref(pos.func, reg_type, rax, isa); + + // Make the call. + let call = if !isa.flags().is_pic() + && isa.triple().pointer_width().unwrap() == PointerWidth::U64 + && !pos.func.dfg.ext_funcs[callee].colocated + { + // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect. + // Use r11 as it may be clobbered under all supported calling conventions. + let r11 = RU::r11 as RegUnit; + let sig = pos.func.dfg.ext_funcs[callee].signature; + let addr = pos.ins().func_addr(reg_type, callee); + pos.func.locations[addr] = ir::ValueLoc::Reg(r11); + pos.ins().call_indirect(sig, addr, &[arg]) + } else { + // Otherwise just do a normal call. + pos.ins().call(callee, &[arg]) + }; + + // If the probestack function doesn't adjust sp, do it ourselves. + if !isa.flags().probestack_func_adjusts_sp() { + let result = pos.func.dfg.inst_results(call)[0]; + pos.func.locations[result] = rax_val; + pos.func.prologue_end = Some(pos.ins().adjust_sp_down(result)); + } + } else { + // Simply decrement the stack pointer. + pos.func.prologue_end = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size))); + } + } + + cfa_state +} + +/// Insert a check that generates a trap if the stack pointer goes +/// below a value in `stack_limit_arg`. +fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) { + use crate::ir::condcodes::IntCC; + + // Copy `stack_limit_arg` into a %rax and use it for calculating + // a SP threshold. + let stack_limit_copy = pos.ins().copy(stack_limit_arg); + pos.func.locations[stack_limit_copy] = ir::ValueLoc::Reg(RU::rax as RegUnit); + let sp_threshold = pos.ins().iadd_imm(stack_limit_copy, stack_size); + pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit); + + // If the stack pointer currently reaches the SP threshold or below it then after opening + // the current stack frame, the current stack pointer will reach the limit. + let cflags = pos.ins().ifcmp_sp(sp_threshold); + pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); + pos.ins().trapif( + IntCC::UnsignedGreaterThanOrEqual, + cflags, + ir::TrapCode::StackOverflow, + ); +} + +/// Find all `return` instructions and insert epilogues before them. +fn insert_common_epilogues( + pos: &mut EncCursor, + stack_size: i64, + reg_type: ir::types::Type, + csrs: &RegisterSet, + isa: &dyn TargetIsa, + cfa_state: Option, +) { + while let Some(block) = pos.next_block() { + pos.goto_last_inst(block); + if let Some(inst) = pos.current_inst() { + if pos.func.dfg[inst].opcode().is_return() { + let is_last = pos.func.layout.last_block() == Some(block); + insert_common_epilogue( + inst, + stack_size, + pos, + reg_type, + csrs, + isa, + is_last, + cfa_state.clone(), + ); + } + } + } +} + +/// Insert an epilogue given a specific `return` instruction. +/// This is used by common calling conventions such as System V. +fn insert_common_epilogue( + inst: ir::Inst, + stack_size: i64, + pos: &mut EncCursor, + reg_type: ir::types::Type, + csrs: &RegisterSet, + isa: &dyn TargetIsa, + is_last: bool, + mut cfa_state: Option, +) { + let word_size = isa.pointer_bytes() as isize; + if stack_size > 0 { + pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); + } + + // Pop all the callee-saved registers, stepping backward each time to + // preserve the correct order. + let fp_ret = pos.ins().x86_pop(reg_type); + let fp_pop_inst = pos.built_inst(); + + if let Some(ref mut cfa_state) = cfa_state.as_mut() { + // Account for CFA state in the reverse of `insert_common_prologue`. + cfa_state.current_depth += word_size; + cfa_state.cf_ptr_offset -= word_size; + // And now that we're going to overwrite `rbp`, `rsp` is the only way to get to the call frame. + // We don't apply a frame layout change *yet* because we check that at return the depth is + // exactly one `word_size`. + cfa_state.cf_ptr_reg = RU::rsp as RegUnit; + } + + pos.prev_inst(); + + pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit); + pos.func.dfg.append_inst_arg(inst, fp_ret); + + for reg in csrs.iter(GPR) { + let csr_ret = pos.ins().x86_pop(reg_type); + if let Some(ref mut cfa_state) = cfa_state.as_mut() { + // Note: don't bother recording a frame layout change because the popped value is + // still correct in memory, and won't be overwritten until we've returned where the + // current frame's layout would no longer matter. Only adjust `current_depth` for a + // consistency check later. + cfa_state.current_depth += word_size; + } + pos.prev_inst(); + + pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg); + pos.func.dfg.append_inst_arg(inst, csr_ret); + } + + if let Some(ref mut frame_layout) = pos.func.frame_layout { + let cfa_state = cfa_state + .as_mut() + .expect("cfa state exists when recording frame layout"); + // Validity checks - if we accounted correctly, CFA state at a return will match CFA state + // at the entry of a function. + // + // Current_depth starts assuming a return address is pushed, and cf_ptr_offset is one + // pointer below current_depth. + assert_eq!(cfa_state.current_depth, -word_size); + assert_eq!(cfa_state.cf_ptr_offset, word_size); + + // Inserting preserve CFA state operation after FP pop instructions. + let new_cfa = FrameLayoutChange::CallFrameAddressAt { + reg: cfa_state.cf_ptr_reg, + offset: cfa_state.cf_ptr_offset, + }; + let new_cfa = if is_last { + vec![new_cfa] + } else { + vec![FrameLayoutChange::Preserve, new_cfa] + }; + + frame_layout + .instructions + .entry(fp_pop_inst) + .and_modify(|insts| { + *insts = insts + .iter() + .cloned() + .chain(new_cfa.clone().into_iter()) + .collect::>(); + }) + .or_insert_with(|| new_cfa.into_boxed_slice()); + + if !is_last { + // Inserting restore CFA state operation after each return. + frame_layout + .instructions + .insert(inst, vec![FrameLayoutChange::Restore].into_boxed_slice()); + } + } +} + +#[cfg(feature = "unwind")] +pub fn emit_unwind_info( + func: &ir::Function, + isa: &dyn TargetIsa, + kind: FrameUnwindKind, + sink: &mut dyn FrameUnwindSink, +) { + match kind { + FrameUnwindKind::Fastcall => { + // Assumption: RBP is being used as the frame pointer + // In the future, Windows fastcall codegen should usually omit the frame pointer + if let Some(info) = UnwindInfo::try_from_func(func, isa, Some(RU::rbp.into())) { + info.emit(sink); + } + } + FrameUnwindKind::Libunwind => { + if func.frame_layout.is_some() { + emit_fde(func, isa, sink); + } + } + } +} diff --git a/cranelift/codegen/src/isa/x86/binemit.rs b/cranelift/codegen/src/isa/x86/binemit.rs new file mode 100644 index 0000000000..15defe69ee --- /dev/null +++ b/cranelift/codegen/src/isa/x86/binemit.rs @@ -0,0 +1,399 @@ +//! Emitting binary x86 machine code. + +use super::enc_tables::{needs_offset, needs_sib_byte}; +use super::registers::RU; +use crate::binemit::{bad_encoding, CodeSink, Reloc}; +use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; +use crate::ir::{ + Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode, + TrapCode, +}; +use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa}; +use crate::regalloc::RegDiversions; + +use cranelift_codegen_shared::isa::x86::EncodingBits; + +include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs")); + +// Convert a stack base to the corresponding register. +fn stk_base(base: StackBase) -> RegUnit { + let ru = match base { + StackBase::SP => RU::rsp, + StackBase::FP => RU::rbp, + StackBase::Zone => unimplemented!(), + }; + ru as RegUnit +} + +// Mandatory prefix bytes for Mp* opcodes. +const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2]; + +// Second byte for three-byte opcodes for mm=0b10 and mm=0b11. +const OP3_BYTE2: [u8; 2] = [0x38, 0x3a]; + +// A REX prefix with no bits set: 0b0100WRXB. +const BASE_REX: u8 = 0b0100_0000; + +// Create a single-register REX prefix, setting the B bit to bit 3 of the register. +// This is used for instructions that encode a register in the low 3 bits of the opcode and for +// instructions that use the ModR/M `reg` field for something else. +fn rex1(reg_b: RegUnit) -> u8 { + let b = ((reg_b >> 3) & 1) as u8; + BASE_REX | b +} + +// Create a dual-register REX prefix, setting: +// +// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. +// REX.R = bit 3 of reg register. +fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { + let b = ((rm >> 3) & 1) as u8; + let r = ((reg >> 3) & 1) as u8; + BASE_REX | b | (r << 2) +} + +// Create a three-register REX prefix, setting: +// +// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. +// REX.R = bit 3 of reg register. +// REX.X = bit 3 of SIB index register. +fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 { + let b = ((rm >> 3) & 1) as u8; + let r = ((reg >> 3) & 1) as u8; + let x = ((index >> 3) & 1) as u8; + BASE_REX | b | (x << 1) | (r << 2) +} + +/// Determines whether a REX prefix should be emitted. +#[inline] +fn needs_rex(bits: u16, rex: u8) -> bool { + rex != BASE_REX || EncodingBits::from(bits).rex_w() == 1 +} + +// Emit a REX prefix. +// +// The R, X, and B bits are computed from registers using the functions above. The W bit is +// extracted from `bits`. +fn rex_prefix(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(rex & 0xf8, BASE_REX); + let w = EncodingBits::from(bits).rex_w(); + sink.put1(rex | (w << 3)); +} + +// Emit a single-byte opcode with no REX prefix. +fn put_op1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding"); + sink.put1(bits as u8); +} + +// Emit a single-byte opcode with REX prefix. +fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*"); + rex_prefix(bits, rex, sink); + sink.put1(bits as u8); +} + +/// Emit a single-byte opcode with inferred REX prefix. +fn put_dynrexop1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*"); + if needs_rex(bits, rex) { + rex_prefix(bits, rex, sink); + } + sink.put1(bits as u8); +} + +// Emit two-byte opcode: 0F XX +fn put_op2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding"); + sink.put1(0x0f); + sink.put1(bits as u8); +} + +// Emit two-byte opcode: 0F XX with REX prefix. +fn put_rexop2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*"); + rex_prefix(bits, rex, sink); + sink.put1(0x0f); + sink.put1(bits as u8); +} + +/// Emit two-byte opcode: 0F XX with inferred REX prefix. +fn put_dynrexop2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!( + bits & 0x0f00, + 0x0400, + "Invalid encoding bits for DynRexOp2*" + ); + if needs_rex(bits, rex) { + rex_prefix(bits, rex, sink); + } + sink.put1(0x0f); + sink.put1(bits as u8); +} + +// Emit single-byte opcode with mandatory prefix. +fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*"); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding"); + sink.put1(bits as u8); +} + +// Emit single-byte opcode with mandatory prefix and REX. +fn put_rexmp1(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*"); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + rex_prefix(bits, rex, sink); + sink.put1(bits as u8); +} + +// Emit two-byte opcode (0F XX) with mandatory prefix. +fn put_mp2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*"); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding"); + sink.put1(0x0f); + sink.put1(bits as u8); +} + +// Emit two-byte opcode (0F XX) with mandatory prefix and REX. +fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*"); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + rex_prefix(bits, rex, sink); + sink.put1(0x0f); + sink.put1(bits as u8); +} + +/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX. +fn put_dynrexmp2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!( + bits & 0x0c00, + 0x0400, + "Invalid encoding bits for DynRexMp2*" + ); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + if needs_rex(bits, rex) { + rex_prefix(bits, rex, sink); + } + sink.put1(0x0f); + sink.put1(bits as u8); +} + +// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix. +fn put_mp3(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*"); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding"); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + sink.put1(0x0f); + sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); + sink.put1(bits as u8); +} + +// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX +fn put_rexmp3(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*"); + let enc = EncodingBits::from(bits); + sink.put1(PREFIX[(enc.pp() - 1) as usize]); + rex_prefix(bits, rex, sink); + sink.put1(0x0f); + sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); + sink.put1(bits as u8); +} + +/// Emit a ModR/M byte for reg-reg operands. +fn modrm_rr(rm: RegUnit, reg: RegUnit, sink: &mut CS) { + let reg = reg as u8 & 7; + let rm = rm as u8 & 7; + let mut b = 0b11000000; + b |= reg << 3; + b |= rm; + sink.put1(b); +} + +/// Emit a ModR/M byte where the reg bits are part of the opcode. +fn modrm_r_bits(rm: RegUnit, bits: u16, sink: &mut CS) { + let reg = (bits >> 12) as u8 & 7; + let rm = rm as u8 & 7; + let mut b = 0b11000000; + b |= reg << 3; + b |= rm; + sink.put1(b); +} + +/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset. +/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an +/// absolute immediate 32-bit address. +fn modrm_rm(rm: RegUnit, reg: RegUnit, sink: &mut CS) { + let reg = reg as u8 & 7; + let rm = rm as u8 & 7; + let mut b = 0b00000000; + b |= reg << 3; + b |= rm; + sink.put1(b); +} + +/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address +/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual +/// section 2.2.1.6. +fn modrm_riprel(reg: RegUnit, sink: &mut CS) { + modrm_rm(0b101, reg, sink) +} + +/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit +/// displacement. +/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. +fn modrm_disp8(rm: RegUnit, reg: RegUnit, sink: &mut CS) { + let reg = reg as u8 & 7; + let rm = rm as u8 & 7; + let mut b = 0b01000000; + b |= reg << 3; + b |= rm; + sink.put1(b); +} + +/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit +/// displacement. +/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. +fn modrm_disp32(rm: RegUnit, reg: RegUnit, sink: &mut CS) { + let reg = reg as u8 & 7; + let rm = rm as u8 & 7; + let mut b = 0b10000000; + b |= reg << 3; + b |= rm; + sink.put1(b); +} + +/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present. +fn modrm_sib(reg: RegUnit, sink: &mut CS) { + modrm_rm(0b100, reg, sink); +} + +/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit +/// displacement are present. +fn modrm_sib_disp8(reg: RegUnit, sink: &mut CS) { + modrm_disp8(0b100, reg, sink); +} + +/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit +/// displacement are present. +fn modrm_sib_disp32(reg: RegUnit, sink: &mut CS) { + modrm_disp32(0b100, reg, sink); +} + +/// Emit a SIB byte with a base register and no scale+index. +fn sib_noindex(base: RegUnit, sink: &mut CS) { + let base = base as u8 & 7; + // SIB SS_III_BBB. + let mut b = 0b00_100_000; + b |= base; + sink.put1(b); +} + +/// Emit a SIB byte with a scale, base, and index. +fn sib(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) { + // SIB SS_III_BBB. + debug_assert_eq!(scale & !0x03, 0, "Scale out of range"); + let scale = scale & 3; + let index = index as u8 & 7; + let base = base as u8 & 7; + let b: u8 = (scale << 6) | (index << 3) | base; + sink.put1(b); +} + +/// Get the low 4 bits of an opcode for an integer condition code. +/// +/// Add this offset to a base opcode for: +/// +/// ---- 0x70: Short conditional branch. +/// 0x0f 0x80: Long conditional branch. +/// 0x0f 0x90: SetCC. +/// +fn icc2opc(cond: IntCC) -> u16 { + use crate::ir::condcodes::IntCC::*; + match cond { + Overflow => 0x0, + NotOverflow => 0x1, + UnsignedLessThan => 0x2, + UnsignedGreaterThanOrEqual => 0x3, + Equal => 0x4, + NotEqual => 0x5, + UnsignedLessThanOrEqual => 0x6, + UnsignedGreaterThan => 0x7, + // 0x8 = Sign. + // 0x9 = !Sign. + // 0xa = Parity even. + // 0xb = Parity odd. + SignedLessThan => 0xc, + SignedGreaterThanOrEqual => 0xd, + SignedLessThanOrEqual => 0xe, + SignedGreaterThan => 0xf, + } +} + +/// Get the low 4 bits of an opcode for a floating point condition code. +/// +/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: +/// +/// ZPC OSA +/// UN 111 000 +/// GT 000 000 +/// LT 001 000 +/// EQ 100 000 +/// +/// Not all floating point condition codes are supported. +fn fcc2opc(cond: FloatCC) -> u16 { + use crate::ir::condcodes::FloatCC::*; + match cond { + Ordered => 0xb, // EQ|LT|GT => *np (P=0) + Unordered => 0xa, // UN => *p (P=1) + OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0), + UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1) + GreaterThan => 0x7, // GT => *a (C=0&Z=0) + GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0) + UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1) + UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1) + Equal | // EQ + NotEqual | // UN|LT|GT + LessThan | // LT + LessThanOrEqual | // LT|EQ + UnorderedOrGreaterThan | // UN|GT + UnorderedOrGreaterThanOrEqual // UN|GT|EQ + => panic!("{} not supported", cond), + } +} + +/// Emit a single-byte branch displacement to `destination`. +fn disp1(destination: Block, func: &Function, sink: &mut CS) { + let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1); + sink.put1(delta as u8); +} + +/// Emit a four-byte branch displacement to `destination`. +fn disp4(destination: Block, func: &Function, sink: &mut CS) { + let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4); + sink.put4(delta); +} + +/// Emit a four-byte displacement to jump table `jt`. +fn jt_disp4(jt: JumpTable, func: &Function, sink: &mut CS) { + let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4); + sink.put4(delta); + sink.reloc_jt(Reloc::X86PCRelRodata4, jt); +} + +/// Emit a four-byte displacement to `constant`. +fn const_disp4(constant: Constant, func: &Function, sink: &mut CS) { + let offset = func.dfg.constants.get_offset(constant); + let delta = offset.wrapping_sub(sink.offset() + 4); + sink.put4(delta); + sink.reloc_constant(Reloc::X86PCRelRodata4, offset); +} diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs new file mode 100644 index 0000000000..93e06d2795 --- /dev/null +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -0,0 +1,1327 @@ +//! Encoding tables for x86 ISAs. + +use super::registers::*; +use crate::bitset::BitSet; +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::types::*; +use crate::ir::{self, Function, Inst, InstBuilder}; +use crate::isa::constraints::*; +use crate::isa::enc_tables::*; +use crate::isa::encoding::base_size; +use crate::isa::encoding::{Encoding, RecipeSizing}; +use crate::isa::RegUnit; +use crate::isa::{self, TargetIsa}; +use crate::predicates; +use crate::regalloc::RegDiversions; + +use cranelift_codegen_shared::isa::x86::EncodingBits; + +include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); +include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); + +/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB). +/// +/// Normal x86 instructions have only 3 bits for encoding a register. +/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits. +pub fn is_extended_reg(reg: RegUnit) -> bool { + // Extended registers have the fourth bit set. + reg as u8 & 0b1000 != 0 +} + +pub fn needs_sib_byte(reg: RegUnit) -> bool { + reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit +} +pub fn needs_offset(reg: RegUnit) -> bool { + reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit +} +pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool { + needs_sib_byte(reg) || needs_offset(reg) +} + +fn test_input( + op_index: usize, + inst: Inst, + divert: &RegDiversions, + func: &Function, + condition_func: fn(RegUnit) -> bool, +) -> bool { + let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); + condition_func(in_reg) +} + +fn test_result( + result_index: usize, + inst: Inst, + divert: &RegDiversions, + func: &Function, + condition_func: fn(RegUnit) -> bool, +) -> bool { + let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations); + condition_func(out_reg) +} + +fn size_plus_maybe_offset_for_inreg_0( + sizing: &RecipeSizing, + _enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_offset = test_input(0, inst, divert, func, needs_offset); + sizing.base_size + if needs_offset { 1 } else { 0 } +} +fn size_plus_maybe_offset_for_inreg_1( + sizing: &RecipeSizing, + _enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_offset = test_input(1, inst, divert, func, needs_offset); + sizing.base_size + if needs_offset { 1 } else { 0 } +} +fn size_plus_maybe_sib_for_inreg_0( + sizing: &RecipeSizing, + _enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_sib = test_input(0, inst, divert, func, needs_sib_byte); + sizing.base_size + if needs_sib { 1 } else { 0 } +} +fn size_plus_maybe_sib_for_inreg_1( + sizing: &RecipeSizing, + _enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_sib = test_input(1, inst, divert, func, needs_sib_byte); + sizing.base_size + if needs_sib { 1 } else { 0 } +} +fn size_plus_maybe_sib_or_offset_for_inreg_0( + sizing: &RecipeSizing, + _enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset); + sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } +} +fn size_plus_maybe_sib_or_offset_for_inreg_1( + sizing: &RecipeSizing, + _enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset); + sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } +} + +/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) +/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset. +fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_input(1, inst, divert, func, is_extended_reg); + size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func) + + if needs_rex { 1 } else { 0 } +} + +/// Calculates the size while inferring if the first input register (inreg0) and first output +/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a +/// SIB or offset. +fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_result(0, inst, divert, func, is_extended_reg); + size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func) + + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg. +/// +/// A REX prefix is known to be emitted if either: +/// 1. The EncodingBits specify that REX.W is to be set. +/// 2. Registers are used that require REX.R or REX.B bits for encoding. +fn size_with_inferred_rex_for_inreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on the second operand. +fn size_with_inferred_rex_for_inreg1( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(1, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on the third operand. +fn size_with_inferred_rex_for_inreg2( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(2, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers. +/// +/// A REX prefix is known to be emitted if either: +/// 1. The EncodingBits specify that REX.W is to be set. +/// 2. Registers are used that require REX.R or REX.B bits for encoding. +fn size_with_inferred_rex_for_inreg0_inreg1( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_input(1, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on a single +/// input register and a single output register. +fn size_with_inferred_rex_for_inreg0_outreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_result(0, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, based on a single output register. +fn size_with_inferred_rex_for_outreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_result(0, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV. +/// +/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2. +fn size_with_inferred_rex_for_cmov( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(1, inst, divert, func, is_extended_reg) + || test_input(2, inst, divert, func, is_extended_reg); + sizing.base_size + if needs_rex { 1 } else { 0 } +} + +/// If the value's definition is a constant immediate, returns its unpacked value, or None +/// otherwise. +fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option { + if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) { + if let ir::InstructionData::UnaryImm { + opcode: ir::Opcode::Iconst, + imm, + } = &pos.func.dfg[*inst] + { + let value: i64 = (*imm).into(); + Some(value) + } else { + None + } + } else { + None + } +} + +/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. +fn expand_sdivrem( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + let (x, y, is_srem) = match func.dfg[inst] { + ir::InstructionData::Binary { + opcode: ir::Opcode::Sdiv, + args, + } => (args[0], args[1], false), + ir::InstructionData::Binary { + opcode: ir::Opcode::Srem, + args, + } => (args[0], args[1], true), + _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)), + }; + + let old_block = func.layout.pp_block(inst); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + pos.func.dfg.clear_results(inst); + + let avoid_div_traps = isa.flags().avoid_div_traps(); + + // If we can tolerate native division traps, sdiv doesn't need branching. + if !avoid_div_traps && !is_srem { + let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); + pos.ins().with_result(result).x86_sdivmodx(x, xhi, y); + pos.remove_inst(); + return; + } + + // Try to remove checks if the input value is an immediate other than 0 or -1. For these two + // immediates, we'd ideally replace conditional traps by traps, but this requires more + // manipulation of the dfg/cfg, which is out of scope here. + let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) { + (imm == 0, imm == -1) + } else { + (true, true) + }; + + // Put in an explicit division-by-zero trap if the environment requires it. + if avoid_div_traps && could_be_zero { + pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + } + + if !could_be_minus_one { + let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); + let reuse = if is_srem { + [None, Some(result)] + } else { + [Some(result), None] + }; + pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y); + pos.remove_inst(); + return; + } + + // block handling the nominal case. + let nominal = pos.func.dfg.make_block(); + + // block handling the -1 divisor case. + let minus_one = pos.func.dfg.make_block(); + + // Final block with one argument representing the final result value. + let done = pos.func.dfg.make_block(); + + // Move the `inst` result value onto the `done` block. + pos.func.dfg.attach_block_param(done, result); + + // Start by checking for a -1 divisor which needs to be handled specially. + let is_m1 = pos.ins().ifcmp_imm(y, -1); + pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); + pos.ins().jump(nominal, &[]); + + // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division + // by zero. + pos.insert_block(nominal); + let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); + let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y); + let divres = if is_srem { rem } else { quot }; + pos.ins().jump(done, &[divres]); + + // Now deal with the -1 divisor case. + pos.insert_block(minus_one); + let m1_result = if is_srem { + // x % -1 = 0. + pos.ins().iconst(ty, 0) + } else { + // Explicitly check for overflow: Trap when x == INT_MIN. + debug_assert!(avoid_div_traps, "Native trapping divide handled above"); + let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1)); + pos.ins() + .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow); + // x / -1 = -x. + pos.ins().irsub_imm(x, 0) + }; + + // Recycle the original instruction as a jump. + pos.func.dfg.replace(inst).jump(done, &[m1_result]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_block(done); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, nominal); + cfg.recompute_block(pos.func, minus_one); + cfg.recompute_block(pos.func, done); +} + +/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`. +fn expand_udivrem( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + let (x, y, is_urem) = match func.dfg[inst] { + ir::InstructionData::Binary { + opcode: ir::Opcode::Udiv, + args, + } => (args[0], args[1], false), + ir::InstructionData::Binary { + opcode: ir::Opcode::Urem, + args, + } => (args[0], args[1], true), + _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)), + }; + let avoid_div_traps = isa.flags().avoid_div_traps(); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + pos.func.dfg.clear_results(inst); + + // Put in an explicit division-by-zero trap if the environment requires it. + if avoid_div_traps { + let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) { + // Ideally, we'd just replace the conditional trap with a trap when the immediate is + // zero, but this requires more manipulation of the dfg/cfg, which is out of scope + // here. + imm == 0 + } else { + true + }; + if zero_check { + pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + } + } + + // Now it is safe to execute the `x86_udivmodx` instruction. + let xhi = pos.ins().iconst(ty, 0); + let reuse = if is_urem { + [None, Some(result)] + } else { + [Some(result), None] + }; + pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y); + pos.remove_inst(); +} + +/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax` +/// instructions. +fn expand_minmax( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] { + ir::InstructionData::Binary { + opcode: ir::Opcode::Fmin, + args, + } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor), + ir::InstructionData::Binary { + opcode: ir::Opcode::Fmax, + args, + } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band), + _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), + }; + let old_block = func.layout.pp_block(inst); + + // We need to handle the following conditions, depending on how x and y compare: + // + // 1. LT or GT: The native `x86_opc` min/max instruction does what we need. + // 2. EQ: We need to use `bitwise_opc` to make sure that + // fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0. + // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical. + + // block handling case 1) where operands are ordered but not equal. + let one_block = func.dfg.make_block(); + + // block handling case 3) where one operand is NaN. + let uno_block = func.dfg.make_block(); + + // block that handles the unordered or equal cases 2) and 3). + let ueq_block = func.dfg.make_block(); + + // block handling case 2) where operands are ordered and equal. + let eq_block = func.dfg.make_block(); + + // Final block with one argument representing the final result value. + let done = func.dfg.make_block(); + + // The basic blocks are laid out to minimize branching for the common cases: + // + // 1) One branch not taken, one jump. + // 2) One branch taken. + // 3) Two branches taken, one jump. + + // Move the `inst` result value onto the `done` block. + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + func.dfg.clear_results(inst); + func.dfg.attach_block_param(done, result); + + // Test for case 1) ordered and not equal. + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y); + pos.ins().brnz(cmp_ueq, ueq_block, &[]); + pos.ins().jump(one_block, &[]); + + // Handle the common ordered, not equal (LT|GT) case. + pos.insert_block(one_block); + let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0; + let one_result = pos.func.dfg.first_result(one_inst); + pos.ins().jump(done, &[one_result]); + + // Case 3) Unordered. + // We know that at least one operand is a NaN that needs to be propagated. We simply use an + // `fadd` instruction which has the same NaN propagation semantics. + pos.insert_block(uno_block); + let uno_result = pos.ins().fadd(x, y); + pos.ins().jump(done, &[uno_result]); + + // Case 2) or 3). + pos.insert_block(ueq_block); + // Test for case 3) (UN) one value is NaN. + // TODO: When we get support for flag values, we can reuse the above comparison. + let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y); + pos.ins().brnz(cmp_uno, uno_block, &[]); + pos.ins().jump(eq_block, &[]); + + // We are now in case 2) where x and y compare EQ. + // We need a bitwise operation to get the sign right. + pos.insert_block(eq_block); + let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0; + let bw_result = pos.func.dfg.first_result(bw_inst); + // This should become a fall-through for this second most common case. + // Recycle the original instruction as a jump. + pos.func.dfg.replace(inst).jump(done, &[bw_result]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_block(done); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, one_block); + cfg.recompute_block(pos.func, uno_block); + cfg.recompute_block(pos.func, ueq_block); + cfg.recompute_block(pos.func, eq_block); + cfg.recompute_block(pos.func, done); +} + +/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to +/// i64 with a pattern, the rest needs more code. +fn expand_fcvt_from_uint( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let x; + match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtFromUint, + arg, + } => x = arg, + _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)), + } + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Conversion from an unsigned int smaller than 64bit is easy on x86-64. + match xty { + ir::types::I8 | ir::types::I16 | ir::types::I32 => { + // TODO: This should be guarded by an ISA check. + let wide = pos.ins().uextend(ir::types::I64, x); + pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide); + return; + } + ir::types::I64 => {} + _ => unimplemented!(), + } + + let old_block = pos.func.layout.pp_block(inst); + + // block handling the case where x >= 0. + let poszero_block = pos.func.dfg.make_block(); + + // block handling the case where x < 0. + let neg_block = pos.func.dfg.make_block(); + + // Final block with one argument representing the final result value. + let done = pos.func.dfg.make_block(); + + // Move the `inst` result value onto the `done` block. + pos.func.dfg.clear_results(inst); + pos.func.dfg.attach_block_param(done, result); + + // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction. + let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0); + pos.ins().brnz(is_neg, neg_block, &[]); + pos.ins().jump(poszero_block, &[]); + + // Easy case: just use a signed conversion. + pos.insert_block(poszero_block); + let posres = pos.ins().fcvt_from_sint(ty, x); + pos.ins().jump(done, &[posres]); + + // Now handle the negative case. + pos.insert_block(neg_block); + + // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it + // back up on the FP side. + let ihalf = pos.ins().ushr_imm(x, 1); + let lsb = pos.ins().band_imm(x, 1); + let ifinal = pos.ins().bor(ihalf, lsb); + let fhalf = pos.ins().fcvt_from_sint(ty, ifinal); + let negres = pos.ins().fadd(fhalf, fhalf); + + // Recycle the original instruction as a jump. + pos.func.dfg.replace(inst).jump(done, &[negres]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_block(done); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, poszero_block); + cfg.recompute_block(pos.func, neg_block); + cfg.recompute_block(pos.func, done); +} + +fn expand_fcvt_to_sint( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + use crate::ir::immediates::{Ieee32, Ieee64}; + + let x = match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtToSint, + arg, + } => arg, + _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)), + }; + let old_block = func.layout.pp_block(inst); + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + // Final block after the bad value checks. + let done = func.dfg.make_block(); + + // block for checking failure cases. + let maybe_trap_block = func.dfg.make_block(); + + // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow. + // It produces an INT_MIN result instead. + func.dfg.replace(inst).x86_cvtt2si(ty, x); + + let mut pos = FuncCursor::new(func).after_inst(inst); + pos.use_srcloc(inst); + + let is_done = pos + .ins() + .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1)); + pos.ins().brnz(is_done, done, &[]); + pos.ins().jump(maybe_trap_block, &[]); + + // We now have the following possibilities: + // + // 1. INT_MIN was actually the correct conversion result. + // 2. The input was NaN -> trap bad_toint + // 3. The input was out of range -> trap int_ovf + // + pos.insert_block(maybe_trap_block); + + // Check for NaN. + let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); + pos.ins() + .trapnz(is_nan, ir::TrapCode::BadConversionToInteger); + + // Check for case 1: INT_MIN is the correct result. + // Determine the smallest floating point number that would convert to INT_MIN. + let mut overflow_cc = FloatCC::LessThan; + let output_bits = ty.lane_bits(); + let flimit = match xty { + ir::types::F32 => + // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + { + pos.ins().f32const(if output_bits < 32 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee32::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee32::pow2(output_bits - 1).neg() + }) + } + ir::types::F64 => + // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + { + pos.ins().f64const(if output_bits < 64 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee64::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee64::pow2(output_bits - 1).neg() + }) + } + _ => panic!("Can't convert {}", xty), + }; + let overflow = pos.ins().fcmp(overflow_cc, x, flimit); + pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); + + // Finally, we could have a positive value that is too large. + let fzero = match xty { + ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), + ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), + _ => panic!("Can't convert {}", xty), + }; + let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); + pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); + + pos.ins().jump(done, &[]); + pos.insert_block(done); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, maybe_trap_block); + cfg.recompute_block(pos.func, done); +} + +fn expand_fcvt_to_sint_sat( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + use crate::ir::immediates::{Ieee32, Ieee64}; + + let x = match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtToSintSat, + arg, + } => arg, + _ => panic!( + "Need fcvt_to_sint_sat: {}", + func.dfg.display_inst(inst, None) + ), + }; + + let old_block = func.layout.pp_block(inst); + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + // Final block after the bad value checks. + let done_block = func.dfg.make_block(); + let intmin_block = func.dfg.make_block(); + let minsat_block = func.dfg.make_block(); + let maxsat_block = func.dfg.make_block(); + func.dfg.clear_results(inst); + func.dfg.attach_block_param(done_block, result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or + // overflow. It produces an INT_MIN result instead. + let cvtt2si = pos.ins().x86_cvtt2si(ty, x); + + let is_done = pos + .ins() + .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1)); + pos.ins().brnz(is_done, done_block, &[cvtt2si]); + pos.ins().jump(intmin_block, &[]); + + // We now have the following possibilities: + // + // 1. INT_MIN was actually the correct conversion result. + // 2. The input was NaN -> replace the result value with 0. + // 3. The input was out of range -> saturate the result to the min/max value. + pos.insert_block(intmin_block); + + // Check for NaN, which is truncated to 0. + let zero = pos.ins().iconst(ty, 0); + let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); + pos.ins().brnz(is_nan, done_block, &[zero]); + pos.ins().jump(minsat_block, &[]); + + // Check for case 1: INT_MIN is the correct result. + // Determine the smallest floating point number that would convert to INT_MIN. + pos.insert_block(minsat_block); + let mut overflow_cc = FloatCC::LessThan; + let output_bits = ty.lane_bits(); + let flimit = match xty { + ir::types::F32 => + // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + { + pos.ins().f32const(if output_bits < 32 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee32::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee32::pow2(output_bits - 1).neg() + }) + } + ir::types::F64 => + // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + { + pos.ins().f64const(if output_bits < 64 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee64::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee64::pow2(output_bits - 1).neg() + }) + } + _ => panic!("Can't convert {}", xty), + }; + + let overflow = pos.ins().fcmp(overflow_cc, x, flimit); + let min_imm = match ty { + ir::types::I32 => i32::min_value() as i64, + ir::types::I64 => i64::min_value(), + _ => panic!("Don't know the min value for {}", ty), + }; + let min_value = pos.ins().iconst(ty, min_imm); + pos.ins().brnz(overflow, done_block, &[min_value]); + pos.ins().jump(maxsat_block, &[]); + + // Finally, we could have a positive value that is too large. + pos.insert_block(maxsat_block); + let fzero = match xty { + ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), + ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), + _ => panic!("Can't convert {}", xty), + }; + + let max_imm = match ty { + ir::types::I32 => i32::max_value() as i64, + ir::types::I64 => i64::max_value(), + _ => panic!("Don't know the max value for {}", ty), + }; + let max_value = pos.ins().iconst(ty, max_imm); + + let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); + pos.ins().brnz(overflow, done_block, &[max_value]); + + // Recycle the original instruction. + pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_block(done_block); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, intmin_block); + cfg.recompute_block(pos.func, minsat_block); + cfg.recompute_block(pos.func, maxsat_block); + cfg.recompute_block(pos.func, done_block); +} + +fn expand_fcvt_to_uint( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + use crate::ir::immediates::{Ieee32, Ieee64}; + + let x = match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtToUint, + arg, + } => arg, + _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)), + }; + + let old_block = func.layout.pp_block(inst); + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + // block handle numbers < 2^(N-1). + let below_uint_max_block = func.dfg.make_block(); + + // block handle numbers < 0. + let below_zero_block = func.dfg.make_block(); + + // block handling numbers >= 2^(N-1). + let large = func.dfg.make_block(); + + // Final block after the bad value checks. + let done = func.dfg.make_block(); + + // Move the `inst` result value onto the `done` block. + func.dfg.clear_results(inst); + func.dfg.attach_block_param(done, result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in + // the destination integer type. + let pow2nm1 = match xty { + ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), + ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), + _ => panic!("Can't convert {}", xty), + }; + let is_large = pos.ins().ffcmp(x, pow2nm1); + pos.ins() + .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); + pos.ins().jump(below_uint_max_block, &[]); + + // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the + // previous comparison. + pos.insert_block(below_uint_max_block); + pos.ins().trapff( + FloatCC::Unordered, + is_large, + ir::TrapCode::BadConversionToInteger, + ); + + // Now we know that x < 2^(N-1) and not NaN. + let sres = pos.ins().x86_cvtt2si(ty, x); + let is_neg = pos.ins().ifcmp_imm(sres, 0); + pos.ins() + .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); + pos.ins().jump(below_zero_block, &[]); + + pos.insert_block(below_zero_block); + pos.ins().trap(ir::TrapCode::IntegerOverflow); + + // Handle the case where x >= 2^(N-1) and not NaN. + pos.insert_block(large); + let adjx = pos.ins().fsub(x, pow2nm1); + let lres = pos.ins().x86_cvtt2si(ty, adjx); + let is_neg = pos.ins().ifcmp_imm(lres, 0); + pos.ins() + .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow); + let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); + + // Recycle the original instruction as a jump. + pos.func.dfg.replace(inst).jump(done, &[lfinal]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_block(done); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, below_uint_max_block); + cfg.recompute_block(pos.func, below_zero_block); + cfg.recompute_block(pos.func, large); + cfg.recompute_block(pos.func, done); +} + +fn expand_fcvt_to_uint_sat( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + use crate::ir::immediates::{Ieee32, Ieee64}; + + let x = match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtToUintSat, + arg, + } => arg, + _ => panic!( + "Need fcvt_to_uint_sat: {}", + func.dfg.display_inst(inst, None) + ), + }; + + let old_block = func.layout.pp_block(inst); + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + // block handle numbers < 2^(N-1). + let below_pow2nm1_or_nan_block = func.dfg.make_block(); + let below_pow2nm1_block = func.dfg.make_block(); + + // block handling numbers >= 2^(N-1). + let large = func.dfg.make_block(); + + // block handling numbers < 2^N. + let uint_large_block = func.dfg.make_block(); + + // Final block after the bad value checks. + let done = func.dfg.make_block(); + + // Move the `inst` result value onto the `done` block. + func.dfg.clear_results(inst); + func.dfg.attach_block_param(done, result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in + // the destination integer type. + let pow2nm1 = match xty { + ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), + ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), + _ => panic!("Can't convert {}", xty), + }; + let zero = pos.ins().iconst(ty, 0); + let is_large = pos.ins().ffcmp(x, pow2nm1); + pos.ins() + .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); + pos.ins().jump(below_pow2nm1_or_nan_block, &[]); + + // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison. + pos.insert_block(below_pow2nm1_or_nan_block); + pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]); + pos.ins().jump(below_pow2nm1_block, &[]); + + // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're + // done; otherwise saturate to the minimum unsigned value, that is 0. + pos.insert_block(below_pow2nm1_block); + let sres = pos.ins().x86_cvtt2si(ty, x); + let is_neg = pos.ins().ifcmp_imm(sres, 0); + pos.ins() + .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); + pos.ins().jump(done, &[zero]); + + // Handle the case where x >= 2^(N-1) and not NaN. + pos.insert_block(large); + let adjx = pos.ins().fsub(x, pow2nm1); + let lres = pos.ins().x86_cvtt2si(ty, adjx); + let max_value = pos.ins().iconst( + ty, + match ty { + ir::types::I32 => u32::max_value() as i64, + ir::types::I64 => u64::max_value() as i64, + _ => panic!("Can't convert {}", ty), + }, + ); + let is_neg = pos.ins().ifcmp_imm(lres, 0); + pos.ins() + .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]); + pos.ins().jump(uint_large_block, &[]); + + pos.insert_block(uint_large_block); + let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); + + // Recycle the original instruction as a jump. + pos.func.dfg.replace(inst).jump(done, &[lfinal]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_block(done); + + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block); + cfg.recompute_block(pos.func, below_pow2nm1_block); + cfg.recompute_block(pos.func, large); + cfg.recompute_block(pos.func, uint_large_block); + cfg.recompute_block(pos.func, done); +} + +/// Convert shuffle instructions. +fn convert_shuffle( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] { + // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1 + // in the most significant position zeroes the lane. + let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b }; + + // We only have to worry about aliasing here because copies will be introduced later (in + // regalloc). + let a = pos.func.dfg.resolve_aliases(args[0]); + let b = pos.func.dfg.resolve_aliases(args[1]); + let mask = pos + .func + .dfg + .immediates + .get(mask) + .expect("The shuffle immediate should have been recorded before this point") + .clone(); + if a == b { + // PSHUFB the first argument (since it is the same as the second). + let constructed_mask = mask + .iter() + // If the mask is greater than 15 it still may be referring to a lane in b. + .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b }) + .map(zero_unknown_lane_index) + .collect(); + let handle = pos.func.dfg.constants.insert(constructed_mask); + // Move the built mask into another XMM register. + let a_type = pos.func.dfg.value_type(a); + let mask_value = pos.ins().vconst(a_type, handle); + // Shuffle the single incoming argument. + pos.func.dfg.replace(inst).x86_pshufb(a, mask_value); + } else { + // PSHUFB the first argument, placing zeroes for unused lanes. + let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); + let handle = pos.func.dfg.constants.insert(constructed_mask); + // Move the built mask into another XMM register. + let a_type = pos.func.dfg.value_type(a); + let mask_value = pos.ins().vconst(a_type, handle); + // Shuffle the first argument. + let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value); + + // PSHUFB the second argument, placing zeroes for unused lanes. + let constructed_mask = mask + .iter() + .map(|b| b.wrapping_sub(16)) + .map(zero_unknown_lane_index) + .collect(); + let handle = pos.func.dfg.constants.insert(constructed_mask); + // Move the built mask into another XMM register. + let b_type = pos.func.dfg.value_type(b); + let mask_value = pos.ins().vconst(b_type, handle); + // Shuffle the second argument. + let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value); + + // OR the vectors together to form the final shuffled value. + pos.func + .dfg + .replace(inst) + .bor(shuffled_first_arg, shuffled_second_arg); + + // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB + }; + } +} + +/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF +/// extractlane instruction +fn convert_extractlane( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + if let ir::InstructionData::ExtractLane { + opcode: ir::Opcode::Extractlane, + arg, + lane, + } = pos.func.dfg[inst] + { + // NOTE: the following legalization assumes that the upper bits of the XMM register do + // not need to be zeroed during extractlane. + let value_type = pos.func.dfg.value_type(arg); + if value_type.lane_type().is_float() { + // Floats are already in XMM registers and can stay there. + let shuffled = if lane != 0 { + // Replace the extractlane with a PSHUFD to get the float in the right place. + match value_type { + F32X4 => { + // Move the selected lane to the 0 lane. + let shuffle_mask: u8 = 0b00_00_00_00 | lane; + pos.ins().x86_pshufd(arg, shuffle_mask) + } + F64X2 => { + assert_eq!(lane, 1); + // Because we know the lane == 1, we move the upper 64 bits to the lower + // 64 bits, leaving the top 64 bits as-is. + let shuffle_mask = 0b11_10_11_10; + let bitcast = pos.ins().raw_bitcast(F32X4, arg); + pos.ins().x86_pshufd(bitcast, shuffle_mask) + } + _ => unreachable!(), + } + } else { + // Remove the extractlane instruction, leaving the float where it is. + arg + }; + // Then we must bitcast to the right type. + pos.func + .dfg + .replace(inst) + .raw_bitcast(value_type.lane_type(), shuffled); + } else { + // For non-floats, lower with the usual PEXTR* instruction. + pos.func.dfg.replace(inst).x86_pextr(arg, lane); + } + } +} + +/// Because floats exist in XMM registers, we can keep them there when executing a CLIF +/// insertlane instruction +fn convert_insertlane( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + if let ir::InstructionData::InsertLane { + opcode: ir::Opcode::Insertlane, + args: [vector, replacement], + lane, + } = pos.func.dfg[inst] + { + let value_type = pos.func.dfg.value_type(vector); + if value_type.lane_type().is_float() { + // Floats are already in XMM registers and can stay there. + match value_type { + F32X4 => { + assert!(lane <= 3); + let immediate = 0b00_00_00_00 | lane << 4; + // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane + // shifted into bits 5:6). + pos.func + .dfg + .replace(inst) + .x86_insertps(vector, immediate, replacement) + } + F64X2 => { + let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types + if lane == 0 { + // Move the lowest quadword in replacement to vector without changing + // the upper bits. + pos.func + .dfg + .replace(inst) + .x86_movsd(vector, replacement_as_vector) + } else { + assert_eq!(lane, 1); + // Move the low 64 bits of replacement vector to the high 64 bits of the + // vector. + pos.func + .dfg + .replace(inst) + .x86_movlhps(vector, replacement_as_vector) + } + } + _ => unreachable!(), + }; + } else { + // For non-floats, lower with the usual PINSR* instruction. + pos.func + .dfg + .replace(inst) + .x86_pinsr(vector, lane, replacement); + } + } +} + +/// For SIMD or scalar integer negation, convert `ineg` to `vconst + isub` or `iconst + isub`. +fn convert_ineg( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + if let ir::InstructionData::Unary { + opcode: ir::Opcode::Ineg, + arg, + } = pos.func.dfg[inst] + { + let value_type = pos.func.dfg.value_type(arg); + let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() { + let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into()); + pos.ins().vconst(value_type, zero_immediate) // this should be legalized to a PXOR + } else if value_type.is_int() { + pos.ins().iconst(value_type, 0) + } else { + panic!("Can't convert ineg of type {}", value_type) + }; + pos.func.dfg.replace(inst).isub(zero_value, arg); + } else { + unreachable!() + } +} + +fn expand_tls_value( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + use crate::settings::TlsModel; + + assert!( + isa.triple().architecture == target_lexicon::Architecture::X86_64, + "Not yet implemented for {:?}", + isa.triple(), + ); + + if let ir::InstructionData::UnaryGlobalValue { + opcode: ir::Opcode::TlsValue, + global_value, + } = func.dfg[inst] + { + let ctrl_typevar = func.dfg.ctrl_typevar(inst); + assert_eq!(ctrl_typevar, ir::types::I64); + + match isa.flags().tls_model() { + TlsModel::None => panic!("tls_model flag is not set."), + TlsModel::ElfGd => { + func.dfg.replace(inst).x86_elf_tls_get_addr(global_value); + } + TlsModel::Macho => { + func.dfg.replace(inst).x86_macho_tls_get_addr(global_value); + } + model => unimplemented!("tls_value for tls model {:?}", model), + } + } else { + unreachable!(); + } +} diff --git a/cranelift/codegen/src/isa/x86/fde.rs b/cranelift/codegen/src/isa/x86/fde.rs new file mode 100644 index 0000000000..6687f532b0 --- /dev/null +++ b/cranelift/codegen/src/isa/x86/fde.rs @@ -0,0 +1,432 @@ +//! Support for FDE data generation. + +use crate::binemit::{FrameUnwindOffset, FrameUnwindSink, Reloc}; +use crate::ir::{FrameLayoutChange, Function}; +use crate::isa::{CallConv, RegUnit, TargetIsa}; +use alloc::vec::Vec; +use core::convert::TryInto; +use gimli::write::{ + Address, CallFrameInstruction, CommonInformationEntry, EhFrame, EndianVec, + FrameDescriptionEntry, FrameTable, Result, Writer, +}; +use gimli::{Encoding, Format, LittleEndian, Register, X86_64}; + +pub type FDERelocEntry = (FrameUnwindOffset, Reloc); + +const FUNCTION_ENTRY_ADDRESS: Address = Address::Symbol { + symbol: 0, + addend: 0, +}; + +#[derive(Clone)] +struct FDEWriter { + vec: EndianVec, + relocs: Vec, +} + +impl FDEWriter { + fn new() -> Self { + Self { + vec: EndianVec::new(LittleEndian), + relocs: Vec::new(), + } + } + fn into_vec_and_relocs(self) -> (Vec, Vec) { + (self.vec.into_vec(), self.relocs) + } +} + +impl Writer for FDEWriter { + type Endian = LittleEndian; + fn endian(&self) -> Self::Endian { + LittleEndian + } + fn len(&self) -> usize { + self.vec.len() + } + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.vec.write(bytes) + } + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + self.vec.write_at(offset, bytes) + } + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Constant(_) => self.vec.write_address(address, size), + Address::Symbol { .. } => { + assert_eq!(address, FUNCTION_ENTRY_ADDRESS); + let rt = match size { + 4 => Reloc::Abs4, + 8 => Reloc::Abs8, + _ => { + panic!("Unexpected address size at FDEWriter::write_address"); + } + }; + self.relocs.push((self.vec.len().try_into().unwrap(), rt)); + self.vec.write_udata(0, size) + } + } + } +} + +fn return_address_reg(isa: &dyn TargetIsa) -> Register { + assert!(isa.name() == "x86" && isa.pointer_bits() == 64); + X86_64::RA +} + +fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Register { + assert!(isa.name() == "x86" && isa.pointer_bits() == 64); + // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow + const X86_GP_REG_MAP: [gimli::Register; 16] = [ + X86_64::RAX, + X86_64::RCX, + X86_64::RDX, + X86_64::RBX, + X86_64::RSP, + X86_64::RBP, + X86_64::RSI, + X86_64::RDI, + X86_64::R8, + X86_64::R9, + X86_64::R10, + X86_64::R11, + X86_64::R12, + X86_64::R13, + X86_64::R14, + X86_64::R15, + ]; + const X86_XMM_REG_MAP: [gimli::Register; 16] = [ + X86_64::XMM0, + X86_64::XMM1, + X86_64::XMM2, + X86_64::XMM3, + X86_64::XMM4, + X86_64::XMM5, + X86_64::XMM6, + X86_64::XMM7, + X86_64::XMM8, + X86_64::XMM9, + X86_64::XMM10, + X86_64::XMM11, + X86_64::XMM12, + X86_64::XMM13, + X86_64::XMM14, + X86_64::XMM15, + ]; + let reg_info = isa.register_info(); + let bank = reg_info.bank_containing_regunit(reg).unwrap(); + match bank.name { + "IntRegs" => { + // x86 GP registers have a weird mapping to DWARF registers, so we use a + // lookup table. + X86_GP_REG_MAP[(reg - bank.first_unit) as usize] + } + "FloatRegs" => X86_XMM_REG_MAP[(reg - bank.first_unit) as usize], + _ => { + panic!("unsupported register bank: {}", bank.name); + } + } +} + +fn to_cfi( + isa: &dyn TargetIsa, + change: &FrameLayoutChange, + cfa_def_reg: &mut Register, + cfa_def_offset: &mut i32, +) -> Option { + Some(match change { + FrameLayoutChange::CallFrameAddressAt { reg, offset } => { + let mapped = map_reg(isa, *reg); + let offset = (*offset) as i32; + if mapped != *cfa_def_reg && offset != *cfa_def_offset { + *cfa_def_reg = mapped; + *cfa_def_offset = offset; + CallFrameInstruction::Cfa(mapped, offset) + } else if offset != *cfa_def_offset { + *cfa_def_offset = offset; + CallFrameInstruction::CfaOffset(offset) + } else if mapped != *cfa_def_reg { + *cfa_def_reg = mapped; + CallFrameInstruction::CfaRegister(mapped) + } else { + return None; + } + } + FrameLayoutChange::RegAt { reg, cfa_offset } => { + assert!(cfa_offset % -8 == 0); + let cfa_offset = *cfa_offset as i32; + let mapped = map_reg(isa, *reg); + CallFrameInstruction::Offset(mapped, cfa_offset) + } + FrameLayoutChange::ReturnAddressAt { cfa_offset } => { + assert!(cfa_offset % -8 == 0); + let cfa_offset = *cfa_offset as i32; + CallFrameInstruction::Offset(X86_64::RA, cfa_offset) + } + FrameLayoutChange::Preserve => CallFrameInstruction::RememberState, + FrameLayoutChange::Restore => CallFrameInstruction::RestoreState, + }) +} + +/// Creates FDE structure from FrameLayout. +pub fn emit_fde(func: &Function, isa: &dyn TargetIsa, sink: &mut dyn FrameUnwindSink) { + assert!(isa.name() == "x86"); + + // Expecting function with System V prologue + assert!( + func.signature.call_conv == CallConv::Fast + || func.signature.call_conv == CallConv::Cold + || func.signature.call_conv == CallConv::SystemV + ); + + assert!(func.frame_layout.is_some(), "expected func.frame_layout"); + let frame_layout = func.frame_layout.as_ref().unwrap(); + + let mut blocks = func.layout.blocks().collect::>(); + blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase + + let encinfo = isa.encoding_info(); + let mut last_offset = 0; + let mut changes = Vec::new(); + for block in blocks { + for (offset, inst, size) in func.inst_offsets(block, &encinfo) { + let address_offset = (offset + size) as usize; + assert!(last_offset <= address_offset); + if let Some(cmds) = frame_layout.instructions.get(&inst) { + for cmd in cmds.iter() { + changes.push((address_offset, *cmd)); + } + } + last_offset = address_offset; + } + } + + let len = last_offset as u32; + + let word_size = isa.pointer_bytes() as i32; + + let encoding = Encoding { + format: Format::Dwarf32, + version: 1, + address_size: word_size as u8, + }; + let mut frames = FrameTable::default(); + + let mut cfa_def_reg = return_address_reg(isa); + let mut cfa_def_offset = 0i32; + + let mut cie = CommonInformationEntry::new( + encoding, + /* code_alignment_factor = */ 1, + /* data_alignment_factor = */ -word_size as i8, + return_address_reg(isa), + ); + for ch in frame_layout.initial.iter() { + if let Some(cfi) = to_cfi(isa, ch, &mut cfa_def_reg, &mut cfa_def_offset) { + cie.add_instruction(cfi); + } + } + + let cie_id = frames.add_cie(cie); + + let mut fde = FrameDescriptionEntry::new(FUNCTION_ENTRY_ADDRESS, len); + + for (addr, ch) in changes.iter() { + if let Some(cfi) = to_cfi(isa, ch, &mut cfa_def_reg, &mut cfa_def_offset) { + fde.add_instruction((*addr) as u32, cfi); + } + } + + frames.add_fde(cie_id, fde); + + let mut eh_frame = EhFrame::from(FDEWriter::new()); + frames.write_eh_frame(&mut eh_frame).unwrap(); + + let (bytes, relocs) = eh_frame.clone().into_vec_and_relocs(); + + let unwind_start = sink.len(); + sink.bytes(&bytes); + + for (off, r) in relocs { + sink.reloc(r, off + unwind_start); + } + + let cie_len = u32::from_le_bytes(bytes.as_slice()[..4].try_into().unwrap()); + let fde_offset = cie_len as usize + 4; + sink.set_entry_offset(unwind_start + fde_offset); + + // Need 0 marker for GCC unwind to end FDE "list". + sink.bytes(&[0, 0, 0, 0]); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::binemit::{FrameUnwindOffset, Reloc}; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{ + types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, + TrapCode, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use std::str::FromStr; + use target_lexicon::triple; + + struct SimpleUnwindSink(pub Vec, pub usize, pub Vec<(Reloc, usize)>); + impl FrameUnwindSink for SimpleUnwindSink { + fn len(&self) -> FrameUnwindOffset { + self.0.len() + } + fn bytes(&mut self, b: &[u8]) { + self.0.extend_from_slice(b); + } + fn reloc(&mut self, r: Reloc, off: FrameUnwindOffset) { + self.2.push((r, off)); + } + fn set_entry_offset(&mut self, off: FrameUnwindOffset) { + self.1 = off; + } + } + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("x86_64")) + .expect("expect x86 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + context.func.collect_frame_layout_info(); + + context.compile(&*isa).expect("expected compilation"); + + let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new()); + emit_fde(&context.func, &*isa, &mut sink); + + assert_eq!( + sink.0, + vec![ + 20, 0, 0, 0, // CIE len + 0, 0, 0, 0, // CIE marker + 1, // version + 0, // augmentation string + 1, // code aligment = 1 + 120, // data alignment = -8 + 16, // RA = r16 + 0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8 + 0x90, 0x01, // DW_CFA_offset r16, -8 * 1 + 0, 0, 0, 0, 0, 0, // padding + 36, 0, 0, 0, // FDE len + 28, 0, 0, 0, // CIE offset + 0, 0, 0, 0, 0, 0, 0, 0, // addr reloc + 16, 0, 0, 0, 0, 0, 0, 0, // function length + 0x42, // DW_CFA_advance_loc 2 + 0x0e, 0x10, // DW_CFA_def_cfa_offset 16 + 0x86, 0x02, // DW_CFA_offset r6, -8 * 2 + 0x43, // DW_CFA_advance_loc 3 + 0x0d, 0x06, // DW_CFA_def_cfa_register + 0x4a, // DW_CFA_advance_loc 10 + 0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8 + 0, 0, 0, 0, // padding + 0, 0, 0, 0, // End of FDEs + ] + ); + assert_eq!(sink.1, 24); + assert_eq!(sink.2.len(), 1); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("x86_64")) + .expect("expect x86 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); + context.func.collect_frame_layout_info(); + + context.compile(&*isa).expect("expected compilation"); + + let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new()); + emit_fde(&context.func, &*isa, &mut sink); + + assert_eq!( + sink.0, + vec![ + 20, 0, 0, 0, // CIE len + 0, 0, 0, 0, // CIE marker + 1, // version + 0, // augmentation string + 1, // code aligment = 1 + 120, // data alignment = -8 + 16, // RA = r16 + 0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8 + 0x90, 0x01, // DW_CFA_offset r16, -8 * 1 + 0, 0, 0, 0, 0, 0, // padding + 36, 0, 0, 0, // FDE len + 28, 0, 0, 0, // CIE offset + 0, 0, 0, 0, 0, 0, 0, 0, // addr reloc + 15, 0, 0, 0, 0, 0, 0, 0, // function length + 0x42, // DW_CFA_advance_loc 2 + 0x0e, 0x10, // DW_CFA_def_cfa_offset 16 + 0x86, 0x02, // DW_CFA_offset r6, -8 * 2 + 0x43, // DW_CFA_advance_loc 3 + 0x0d, 0x06, // DW_CFA_def_cfa_register + 0x47, // DW_CFA_advance_loc 10 + 0x0a, // DW_CFA_preserve_state + 0x0c, 0x07, 0x08, // DW_CFA_def_cfa r7, 8 + 0x41, // DW_CFA_advance_loc 1 + 0x0b, // DW_CFA_restore_state + // NOTE: no additional CFA directives -- DW_CFA_restore_state + // is done before trap and it is last instruction in the function. + 0, // padding + 0, 0, 0, 0, // End of FDEs + ] + ); + assert_eq!(sink.1, 24); + assert_eq!(sink.2.len(), 1); + } + + fn create_multi_return_function(call_conv: CallConv) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brnz(v0, block2, &[]); + pos.ins().jump(block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().trap(TrapCode::User(0)); + + func + } +} diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs new file mode 100644 index 0000000000..c3b871ab42 --- /dev/null +++ b/cranelift/codegen/src/isa/x86/mod.rs @@ -0,0 +1,179 @@ +//! x86 Instruction Set Architectures. + +mod abi; +mod binemit; +mod enc_tables; +#[cfg(feature = "unwind")] +mod fde; +mod registers; +pub mod settings; +#[cfg(feature = "unwind")] +mod unwind; + +use super::super::settings as shared_settings; +#[cfg(feature = "testing_hooks")] +use crate::binemit::CodeSink; +use crate::binemit::{emit_function, MemoryCodeSink}; +#[cfg(feature = "unwind")] +use crate::binemit::{FrameUnwindKind, FrameUnwindSink}; +use crate::ir; +use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; +use crate::isa::Builder as IsaBuilder; +use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; +use crate::regalloc; +use crate::result::CodegenResult; +use crate::timing; +use alloc::borrow::Cow; +use alloc::boxed::Box; +use core::fmt; +use target_lexicon::{PointerWidth, Triple}; + +#[allow(dead_code)] +struct Isa { + triple: Triple, + shared_flags: shared_settings::Flags, + isa_flags: settings::Flags, + cpumode: &'static [shared_enc_tables::Level1Entry], +} + +/// Get an ISA builder for creating x86 targets. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + IsaBuilder { + triple, + setup: settings::builder(), + constructor: isa_constructor, + } +} + +fn isa_constructor( + triple: Triple, + shared_flags: shared_settings::Flags, + builder: shared_settings::Builder, +) -> Box { + let level1 = match triple.pointer_width().unwrap() { + PointerWidth::U16 => unimplemented!("x86-16"), + PointerWidth::U32 => &enc_tables::LEVEL1_I32[..], + PointerWidth::U64 => &enc_tables::LEVEL1_I64[..], + }; + Box::new(Isa { + triple, + isa_flags: settings::Flags::new(&shared_flags, builder), + shared_flags, + cpumode: level1, + }) +} + +impl TargetIsa for Isa { + fn name(&self) -> &'static str { + "x86" + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &shared_settings::Flags { + &self.shared_flags + } + + fn uses_cpu_flags(&self) -> bool { + true + } + + fn uses_complex_addresses(&self) -> bool { + true + } + + fn register_info(&self) -> RegInfo { + registers::INFO.clone() + } + + fn encoding_info(&self) -> EncInfo { + enc_tables::INFO.clone() + } + + fn legal_encodings<'a>( + &'a self, + func: &'a ir::Function, + inst: &'a ir::InstructionData, + ctrl_typevar: ir::Type, + ) -> Encodings<'a> { + lookup_enclist( + ctrl_typevar, + inst, + func, + self.cpumode, + &enc_tables::LEVEL2[..], + &enc_tables::ENCLISTS[..], + &enc_tables::LEGALIZE_ACTIONS[..], + &enc_tables::RECIPE_PREDICATES[..], + &enc_tables::INST_PREDICATES[..], + self.isa_flags.predicate_view(), + ) + } + + fn legalize_signature(&self, sig: &mut Cow, current: bool) { + abi::legalize_signature( + sig, + &self.triple, + current, + &self.shared_flags, + &self.isa_flags, + ) + } + + fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { + abi::regclass_for_abi_type(ty) + } + + fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet { + abi::allocatable_registers(&self.triple, &self.shared_flags) + } + + #[cfg(feature = "testing_hooks")] + fn emit_inst( + &self, + func: &ir::Function, + inst: ir::Inst, + divert: &mut regalloc::RegDiversions, + sink: &mut dyn CodeSink, + ) { + binemit::emit_inst(func, inst, divert, sink, self) + } + + fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { + emit_function(func, binemit::emit_inst, sink, self) + } + + fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { + let _tt = timing::prologue_epilogue(); + abi::prologue_epilogue(func, self) + } + + fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { + ir::condcodes::IntCC::UnsignedLessThan + } + + fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { + ir::condcodes::IntCC::UnsignedLessThan + } + + /// Emit unwind information for the given function. + /// + /// Only some calling conventions (e.g. Windows fastcall) will have unwind information. + #[cfg(feature = "unwind")] + fn emit_unwind_info( + &self, + func: &ir::Function, + kind: FrameUnwindKind, + sink: &mut dyn FrameUnwindSink, + ) { + abi::emit_unwind_info(func, self, kind, sink); + } +} + +impl fmt::Display for Isa { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}\n{}", self.shared_flags, self.isa_flags) + } +} diff --git a/cranelift/codegen/src/isa/x86/registers.rs b/cranelift/codegen/src/isa/x86/registers.rs new file mode 100644 index 0000000000..130cf41d79 --- /dev/null +++ b/cranelift/codegen/src/isa/x86/registers.rs @@ -0,0 +1,63 @@ +//! x86 register descriptions. + +use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; + +include!(concat!(env!("OUT_DIR"), "/registers-x86.rs")); + +#[cfg(test)] +mod tests { + use super::*; + use crate::isa::RegUnit; + use alloc::string::{String, ToString}; + + #[test] + fn unit_encodings() { + // The encoding of integer registers is not alphabetical. + assert_eq!(INFO.parse_regunit("rax"), Some(0)); + assert_eq!(INFO.parse_regunit("rbx"), Some(3)); + assert_eq!(INFO.parse_regunit("rcx"), Some(1)); + assert_eq!(INFO.parse_regunit("rdx"), Some(2)); + assert_eq!(INFO.parse_regunit("rsi"), Some(6)); + assert_eq!(INFO.parse_regunit("rdi"), Some(7)); + assert_eq!(INFO.parse_regunit("rbp"), Some(5)); + assert_eq!(INFO.parse_regunit("rsp"), Some(4)); + assert_eq!(INFO.parse_regunit("r8"), Some(8)); + assert_eq!(INFO.parse_regunit("r15"), Some(15)); + + assert_eq!(INFO.parse_regunit("xmm0"), Some(16)); + assert_eq!(INFO.parse_regunit("xmm15"), Some(31)); + } + + #[test] + fn unit_names() { + fn uname(ru: RegUnit) -> String { + INFO.display_regunit(ru).to_string() + } + + assert_eq!(uname(0), "%rax"); + assert_eq!(uname(3), "%rbx"); + assert_eq!(uname(1), "%rcx"); + assert_eq!(uname(2), "%rdx"); + assert_eq!(uname(6), "%rsi"); + assert_eq!(uname(7), "%rdi"); + assert_eq!(uname(5), "%rbp"); + assert_eq!(uname(4), "%rsp"); + assert_eq!(uname(8), "%r8"); + assert_eq!(uname(15), "%r15"); + assert_eq!(uname(16), "%xmm0"); + assert_eq!(uname(31), "%xmm15"); + } + + #[test] + fn regclasses() { + assert_eq!(GPR.intersect_index(GPR), Some(GPR.into())); + assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into())); + assert_eq!(GPR.intersect_index(FPR), None); + assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into())); + assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into())); + assert_eq!(ABCD.intersect_index(FPR), None); + assert_eq!(FPR.intersect_index(FPR), Some(FPR.into())); + assert_eq!(FPR.intersect_index(GPR), None); + assert_eq!(FPR.intersect_index(ABCD), None); + } +} diff --git a/cranelift/codegen/src/isa/x86/settings.rs b/cranelift/codegen/src/isa/x86/settings.rs new file mode 100644 index 0000000000..2d3a3f6698 --- /dev/null +++ b/cranelift/codegen/src/isa/x86/settings.rs @@ -0,0 +1,52 @@ +//! x86 Settings. + +use crate::settings::{self, detail, Builder}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/x86/settings.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-x86.rs")); + +#[cfg(test)] +mod tests { + use super::{builder, Flags}; + use crate::settings::{self, Configurable}; + + #[test] + fn presets() { + let shared = settings::Flags::new(settings::builder()); + + // Nehalem has SSE4.1 but not BMI1. + let mut b0 = builder(); + b0.enable("nehalem").unwrap(); + let f0 = Flags::new(&shared, b0); + assert_eq!(f0.has_sse41(), true); + assert_eq!(f0.has_bmi1(), false); + + let mut b1 = builder(); + b1.enable("haswell").unwrap(); + let f1 = Flags::new(&shared, b1); + assert_eq!(f1.has_sse41(), true); + assert_eq!(f1.has_bmi1(), true); + } + #[test] + fn display_presets() { + // Spot check that the flags Display impl does not cause a panic + let shared = settings::Flags::new(settings::builder()); + + let b0 = builder(); + let f0 = Flags::new(&shared, b0); + let _ = format!("{}", f0); + + let mut b1 = builder(); + b1.enable("nehalem").unwrap(); + let f1 = Flags::new(&shared, b1); + let _ = format!("{}", f1); + + let mut b2 = builder(); + b2.enable("haswell").unwrap(); + let f2 = Flags::new(&shared, b2); + let _ = format!("{}", f2); + } +} diff --git a/cranelift/codegen/src/isa/x86/unwind.rs b/cranelift/codegen/src/isa/x86/unwind.rs new file mode 100644 index 0000000000..693693ab37 --- /dev/null +++ b/cranelift/codegen/src/isa/x86/unwind.rs @@ -0,0 +1,533 @@ +//! Unwind information for x64 Windows. + +use super::registers::RU; +use crate::binemit::FrameUnwindSink; +use crate::ir::{Function, InstructionData, Opcode}; +use crate::isa::{CallConv, RegUnit, TargetIsa}; +use alloc::vec::Vec; +use byteorder::{ByteOrder, LittleEndian}; + +/// Maximum (inclusive) size of a "small" stack allocation +const SMALL_ALLOC_MAX_SIZE: u32 = 128; +/// Maximum (inclusive) size of a "large" stack allocation that can represented in 16-bits +const LARGE_ALLOC_16BIT_MAX_SIZE: u32 = 524280; + +fn write_u8(sink: &mut dyn FrameUnwindSink, v: u8) { + sink.bytes(&[v]); +} + +fn write_u16(sink: &mut dyn FrameUnwindSink, v: u16) { + let mut buf = [0; 2]; + T::write_u16(&mut buf, v); + sink.bytes(&buf); +} + +fn write_u32(sink: &mut dyn FrameUnwindSink, v: u32) { + let mut buf = [0; 4]; + T::write_u32(&mut buf, v); + sink.bytes(&buf); +} + +/// The supported unwind codes for the x64 Windows ABI. +/// +/// See: https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 +/// Only what is needed to describe the prologues generated by the Cranelift x86 ISA are represented here. +/// Note: the Cranelift x86 ISA RU enum matches the Windows unwind GPR encoding values. +#[derive(Debug, PartialEq, Eq)] +enum UnwindCode { + PushRegister { offset: u8, reg: RegUnit }, + StackAlloc { offset: u8, size: u32 }, + SetFramePointer { offset: u8, sp_offset: u8 }, +} + +impl UnwindCode { + fn emit(&self, sink: &mut dyn FrameUnwindSink) { + enum UnwindOperation { + PushNonvolatileRegister, + LargeStackAlloc, + SmallStackAlloc, + SetFramePointer, + } + + match self { + Self::PushRegister { offset, reg } => { + write_u8(sink, *offset); + write_u8( + sink, + ((*reg as u8) << 4) | (UnwindOperation::PushNonvolatileRegister as u8), + ); + } + Self::StackAlloc { offset, size } => { + // Stack allocations on Windows must be a multiple of 8 and be at least 1 slot + assert!(*size >= 8); + assert!((*size % 8) == 0); + + write_u8(sink, *offset); + if *size <= SMALL_ALLOC_MAX_SIZE { + write_u8( + sink, + ((((*size - 8) / 8) as u8) << 4) | UnwindOperation::SmallStackAlloc as u8, + ); + } else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE { + write_u8(sink, UnwindOperation::LargeStackAlloc as u8); + write_u16::(sink, (*size / 8) as u16); + } else { + write_u8(sink, (1 << 4) | (UnwindOperation::LargeStackAlloc as u8)); + write_u32::(sink, *size); + } + } + Self::SetFramePointer { offset, sp_offset } => { + write_u8(sink, *offset); + write_u8( + sink, + (*sp_offset << 4) | (UnwindOperation::SetFramePointer as u8), + ); + } + }; + } + + fn node_count(&self) -> usize { + match self { + Self::StackAlloc { size, .. } => { + if *size <= SMALL_ALLOC_MAX_SIZE { + 1 + } else if *size <= LARGE_ALLOC_16BIT_MAX_SIZE { + 2 + } else { + 3 + } + } + _ => 1, + } + } +} + +/// Represents Windows x64 unwind information. +/// +/// For information about Windows x64 unwind info, see: +/// https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64 +#[derive(Debug, PartialEq, Eq)] +pub struct UnwindInfo { + flags: u8, + prologue_size: u8, + frame_register: Option, + frame_register_offset: u8, + unwind_codes: Vec, +} + +impl UnwindInfo { + pub fn try_from_func( + func: &Function, + isa: &dyn TargetIsa, + frame_register: Option, + ) -> Option { + // Only Windows fastcall is supported for unwind information + if func.signature.call_conv != CallConv::WindowsFastcall || func.prologue_end.is_none() { + return None; + } + + let prologue_end = func.prologue_end.unwrap(); + let entry_block = func.layout.blocks().nth(0).expect("missing entry block"); + + // Stores the stack size when SP is not adjusted via an immediate value + let mut stack_size = None; + let mut prologue_size = 0; + let mut unwind_codes = Vec::new(); + let mut found_end = false; + + for (offset, inst, size) in func.inst_offsets(entry_block, &isa.encoding_info()) { + // x64 ABI prologues cannot exceed 255 bytes in length + if (offset + size) > 255 { + panic!("function prologues cannot exceed 255 bytes in size for Windows x64"); + } + + prologue_size += size; + + let unwind_offset = (offset + size) as u8; + + match func.dfg[inst] { + InstructionData::Unary { opcode, arg } => { + match opcode { + Opcode::X86Push => { + unwind_codes.push(UnwindCode::PushRegister { + offset: unwind_offset, + reg: func.locations[arg].unwrap_reg(), + }); + } + Opcode::AdjustSpDown => { + // This is used when calling a stack check function + // We need to track the assignment to RAX which has the size of the stack + unwind_codes.push(UnwindCode::StackAlloc { + offset: unwind_offset, + size: stack_size + .expect("expected a previous stack size instruction"), + }); + } + _ => {} + } + } + InstructionData::CopySpecial { src, dst, .. } => { + if let Some(frame_register) = frame_register { + if src == (RU::rsp as RegUnit) && dst == frame_register { + unwind_codes.push(UnwindCode::SetFramePointer { + offset: unwind_offset, + sp_offset: 0, + }); + } + } + } + InstructionData::UnaryImm { opcode, imm } => { + match opcode { + Opcode::Iconst => { + let imm: i64 = imm.into(); + assert!(imm <= core::u32::MAX as i64); + assert!(stack_size.is_none()); + + // This instruction should only appear in a prologue to pass an + // argument of the stack size to a stack check function. + // Record the stack size so we know what it is when we encounter the adjustment + // instruction (which will adjust via the register assigned to this instruction). + stack_size = Some(imm as u32); + } + Opcode::AdjustSpDownImm => { + let imm: i64 = imm.into(); + assert!(imm <= core::u32::MAX as i64); + + unwind_codes.push(UnwindCode::StackAlloc { + offset: unwind_offset, + size: imm as u32, + }); + } + _ => {} + } + } + _ => {} + }; + + if inst == prologue_end { + found_end = true; + break; + } + } + + if !found_end { + return None; + } + + Some(Self { + flags: 0, // this assumes cranelift functions have no SEH handlers + prologue_size: prologue_size as u8, + frame_register, + frame_register_offset: 0, + unwind_codes, + }) + } + + pub fn size(&self) -> usize { + let node_count = self.node_count(); + + // Calculation of the size requires no SEH handler or chained info + assert!(self.flags == 0); + + // Size of fixed part of UNWIND_INFO is 4 bytes + // Then comes the UNWIND_CODE nodes (2 bytes each) + // Then comes 2 bytes of padding for the unwind codes if necessary + // Next would come the SEH data, but we assert above that the function doesn't have SEH data + + 4 + (node_count * 2) + if (node_count & 1) == 1 { 2 } else { 0 } + } + + pub fn node_count(&self) -> usize { + self.unwind_codes + .iter() + .fold(0, |nodes, c| nodes + c.node_count()) + } + + pub fn emit(&self, sink: &mut dyn FrameUnwindSink) { + const UNWIND_INFO_VERSION: u8 = 1; + + let size = self.size(); + let offset = sink.len(); + + // Ensure the memory is 32-bit aligned + assert_eq!(offset % 4, 0); + + sink.reserve(offset + size); + + let node_count = self.node_count(); + assert!(node_count <= 256); + + write_u8(sink, (self.flags << 3) | UNWIND_INFO_VERSION); + write_u8(sink, self.prologue_size); + write_u8(sink, node_count as u8); + + if let Some(reg) = self.frame_register { + write_u8(sink, (self.frame_register_offset << 4) | reg as u8); + } else { + write_u8(sink, 0); + } + + // Unwind codes are written in reverse order (prologue offset descending) + for code in self.unwind_codes.iter().rev() { + code.emit(sink); + } + + // To keep a 32-bit alignment, emit 2 bytes of padding if there's an odd number of 16-bit nodes + if (node_count & 1) == 1 { + write_u16::(sink, 0); + } + + // Ensure the correct number of bytes was emitted + assert_eq!(sink.len() - offset, size); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::binemit::{FrameUnwindOffset, Reloc}; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind}; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use std::str::FromStr; + use target_lexicon::triple; + + struct SimpleUnwindSink(pub Vec); + impl FrameUnwindSink for SimpleUnwindSink { + fn len(&self) -> FrameUnwindOffset { + self.0.len() + } + fn bytes(&mut self, b: &[u8]) { + self.0.extend_from_slice(b); + } + fn reloc(&mut self, _: Reloc, _: FrameUnwindOffset) {} + fn set_entry_offset(&mut self, _: FrameUnwindOffset) {} + } + + #[test] + fn test_wrong_calling_convention() { + let isa = lookup(triple!("x86_64")) + .expect("expect x86 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function(CallConv::SystemV, None)); + + context.compile(&*isa).expect("expected compilation"); + + assert_eq!(UnwindInfo::try_from_func(&context.func, &*isa, None), None); + } + + #[test] + fn test_small_alloc() { + let isa = lookup(triple!("x86_64")) + .expect("expect x86 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::WindowsFastcall, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into())) + .expect("expected unwind info"); + + assert_eq!( + unwind, + UnwindInfo { + flags: 0, + prologue_size: 9, + frame_register: Some(RU::rbp.into()), + frame_register_offset: 0, + unwind_codes: vec![ + UnwindCode::PushRegister { + offset: 2, + reg: RU::rbp.into() + }, + UnwindCode::SetFramePointer { + offset: 5, + sp_offset: 0 + }, + UnwindCode::StackAlloc { + offset: 9, + size: 64 + 32 + } + ] + } + ); + + assert_eq!(unwind.size(), 12); + + let mut sink = SimpleUnwindSink(Vec::new()); + unwind.emit(&mut sink); + + assert_eq!( + sink.0, + [ + 0x01, // Version and flags (version 1, no flags) + 0x09, // Prologue size + 0x03, // Unwind code count (1 for stack alloc, 1 for save frame reg, 1 for push reg) + 0x05, // Frame register + offset (RBP with 0 offset) + 0x09, // Prolog offset + 0xB2, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0xB * 8) + 8 = 96 (64 + 32) bytes) + 0x05, // Prolog offset + 0x03, // Operation 3 (save frame register), stack pointer offset = 0 + 0x02, // Prolog offset + 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP) + 0x00, // Padding byte + 0x00, // Padding byte + ] + ); + } + + #[test] + fn test_medium_alloc() { + let isa = lookup(triple!("x86_64")) + .expect("expect x86 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::WindowsFastcall, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into())) + .expect("expected unwind info"); + + assert_eq!( + unwind, + UnwindInfo { + flags: 0, + prologue_size: 27, + frame_register: Some(RU::rbp.into()), + frame_register_offset: 0, + unwind_codes: vec![ + UnwindCode::PushRegister { + offset: 2, + reg: RU::rbp.into() + }, + UnwindCode::SetFramePointer { + offset: 5, + sp_offset: 0 + }, + UnwindCode::StackAlloc { + offset: 27, + size: 10000 + 32 + } + ] + } + ); + + assert_eq!(unwind.size(), 12); + + let mut sink = SimpleUnwindSink(Vec::new()); + unwind.emit(&mut sink); + + assert_eq!( + sink.0, + [ + 0x01, // Version and flags (version 1, no flags) + 0x1B, // Prologue size + 0x04, // Unwind code count (2 for stack alloc, 1 for save frame reg, 1 for push reg) + 0x05, // Frame register + offset (RBP with 0 offset) + 0x1B, // Prolog offset + 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0) + 0xE6, // Low size byte + 0x04, // High size byte (e.g. 0x04E6 * 8 = 100032 (10000 + 32) bytes) + 0x05, // Prolog offset + 0x03, // Operation 3 (save frame register), stack pointer offset = 0 + 0x02, // Prolog offset + 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) + ] + ); + } + + #[test] + fn test_large_alloc() { + let isa = lookup(triple!("x86_64")) + .expect("expect x86 ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::WindowsFastcall, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let unwind = UnwindInfo::try_from_func(&context.func, &*isa, Some(RU::rbp.into())) + .expect("expected unwind info"); + + assert_eq!( + unwind, + UnwindInfo { + flags: 0, + prologue_size: 27, + frame_register: Some(RU::rbp.into()), + frame_register_offset: 0, + unwind_codes: vec![ + UnwindCode::PushRegister { + offset: 2, + reg: RU::rbp.into() + }, + UnwindCode::SetFramePointer { + offset: 5, + sp_offset: 0 + }, + UnwindCode::StackAlloc { + offset: 27, + size: 1000000 + 32 + } + ] + } + ); + + assert_eq!(unwind.size(), 16); + + let mut sink = SimpleUnwindSink(Vec::new()); + unwind.emit(&mut sink); + + assert_eq!( + sink.0, + [ + 0x01, // Version and flags (version 1, no flags) + 0x1B, // Prologue size + 0x05, // Unwind code count (3 for stack alloc, 1 for save frame reg, 1 for push reg) + 0x05, // Frame register + offset (RBP with 0 offset) + 0x1B, // Prolog offset + 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1) + 0x60, // Byte 1 of size + 0x42, // Byte 2 of size + 0x0F, // Byte 3 of size + 0x00, // Byte 4 of size (size is 0xF4260 = 1000032 (1000000 + 32) bytes) + 0x05, // Prolog offset + 0x03, // Operation 3 (save frame register), stack pointer offset = 0 + 0x02, // Prolog offset + 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) + 0x00, // Padding byte + 0x00, // Padding byte + ] + ); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } +} diff --git a/cranelift/codegen/src/iterators.rs b/cranelift/codegen/src/iterators.rs new file mode 100644 index 0000000000..ca9c4ab26b --- /dev/null +++ b/cranelift/codegen/src/iterators.rs @@ -0,0 +1,93 @@ +//! Iterator utilities. + +/// Extra methods for iterators. +pub trait IteratorExtras: Iterator { + /// Create an iterator that produces adjacent pairs of elements from the iterator. + fn adjacent_pairs(mut self) -> AdjacentPairs + where + Self: Sized, + Self::Item: Clone, + { + let elem = self.next(); + AdjacentPairs { iter: self, elem } + } +} + +impl IteratorExtras for T where T: Iterator {} + +/// Adjacent pairs iterator returned by `adjacent_pairs()`. +/// +/// This wraps another iterator and produces a sequence of adjacent pairs of elements. +pub struct AdjacentPairs +where + I: Iterator, + I::Item: Clone, +{ + iter: I, + elem: Option, +} + +impl Iterator for AdjacentPairs +where + I: Iterator, + I::Item: Clone, +{ + type Item = (I::Item, I::Item); + + fn next(&mut self) -> Option { + self.elem.take().and_then(|e| { + self.elem = self.iter.next(); + self.elem.clone().map(|n| (e, n)) + }) + } +} + +#[cfg(test)] +mod tests { + use alloc::vec::Vec; + + #[test] + fn adjpairs() { + use super::IteratorExtras; + + assert_eq!( + [1, 2, 3, 4] + .iter() + .cloned() + .adjacent_pairs() + .collect::>(), + vec![(1, 2), (2, 3), (3, 4)] + ); + assert_eq!( + [2, 3, 4] + .iter() + .cloned() + .adjacent_pairs() + .collect::>(), + vec![(2, 3), (3, 4)] + ); + assert_eq!( + [2, 3, 4] + .iter() + .cloned() + .adjacent_pairs() + .collect::>(), + vec![(2, 3), (3, 4)] + ); + assert_eq!( + [3, 4].iter().cloned().adjacent_pairs().collect::>(), + vec![(3, 4)] + ); + assert_eq!( + [4].iter().cloned().adjacent_pairs().collect::>(), + vec![] + ); + assert_eq!( + [].iter() + .cloned() + .adjacent_pairs() + .collect::>(), + vec![] + ); + } +} diff --git a/cranelift/codegen/src/legalizer/boundary.rs b/cranelift/codegen/src/legalizer/boundary.rs new file mode 100644 index 0000000000..7fb977a06a --- /dev/null +++ b/cranelift/codegen/src/legalizer/boundary.rs @@ -0,0 +1,1067 @@ +//! Legalize ABI boundaries. +//! +//! This legalizer sub-module contains code for dealing with ABI boundaries: +//! +//! - Function arguments passed to the entry block. +//! - Function arguments passed to call instructions. +//! - Return values from call instructions. +//! - Return values passed to return instructions. +//! +//! The ABI boundary legalization happens in two phases: +//! +//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information +//! and possibly new argument types. It also rewrites the entry block arguments to match. +//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions +//! to match the new ABI signatures. +//! +//! Between the two phases, preamble signatures and call/return arguments don't match. This +//! intermediate state doesn't type check. + +use crate::abi::{legalize_abi_value, ValueConversion}; +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::instructions::CallInfo; +use crate::ir::{ + AbiParam, ArgumentLoc, ArgumentPurpose, Block, DataFlowGraph, Function, Inst, InstBuilder, + MemFlags, SigRef, Signature, StackSlotData, StackSlotKind, Type, Value, ValueLoc, +}; +use crate::isa::TargetIsa; +use crate::legalizer::split::{isplit, vsplit}; +use alloc::borrow::Cow; +use alloc::vec::Vec; +use core::mem; +use cranelift_entity::EntityList; +use log::debug; + +/// Legalize all the function signatures in `func`. +/// +/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't +/// change the entry block arguments, calls, or return instructions, so this can leave the function +/// in a state with type discrepancies. +pub fn legalize_signatures(func: &mut Function, isa: &dyn TargetIsa) { + if let Some(new) = legalize_signature(&func.signature, true, isa) { + let old = mem::replace(&mut func.signature, new); + func.old_signature = Some(old); + } + + for (sig_ref, sig_data) in func.dfg.signatures.iter_mut() { + if let Some(new) = legalize_signature(sig_data, false, isa) { + let old = mem::replace(sig_data, new); + func.dfg.old_signatures[sig_ref] = Some(old); + } + } + + if let Some(entry) = func.layout.entry_block() { + legalize_entry_params(func, entry); + spill_entry_params(func, entry); + } +} + +/// Legalize the libcall signature, which we may generate on the fly after +/// `legalize_signatures` has been called. +pub fn legalize_libcall_signature(signature: &mut Signature, isa: &dyn TargetIsa) { + if let Some(s) = legalize_signature(signature, false, isa) { + *signature = s; + } +} + +/// Legalize the given signature. +/// +/// `current` is true if this is the signature for the current function. +fn legalize_signature( + signature: &Signature, + current: bool, + isa: &dyn TargetIsa, +) -> Option { + let mut cow = Cow::Borrowed(signature); + isa.legalize_signature(&mut cow, current); + match cow { + Cow::Borrowed(_) => None, + Cow::Owned(s) => Some(s), + } +} + +/// Legalize the entry block parameters after `func`'s signature has been legalized. +/// +/// The legalized signature may contain more parameters than the original signature, and the +/// parameter types have been changed. This function goes through the parameters of the entry block +/// and replaces them with parameters of the right type for the ABI. +/// +/// The original entry block parameters are computed from the new ABI parameters by code inserted at +/// the top of the entry block. +fn legalize_entry_params(func: &mut Function, entry: Block) { + let mut has_sret = false; + let mut has_link = false; + let mut has_vmctx = false; + let mut has_sigid = false; + let mut has_stack_limit = false; + + // Insert position for argument conversion code. + // We want to insert instructions before the first instruction in the entry block. + // If the entry block is empty, append instructions to it instead. + let mut pos = FuncCursor::new(func).at_first_inst(entry); + + // Keep track of the argument types in the ABI-legalized signature. + let mut abi_arg = 0; + + // Process the block parameters one at a time, possibly replacing one argument with multiple new + // ones. We do this by detaching the entry block parameters first. + let block_params = pos.func.dfg.detach_block_params(entry); + let mut old_arg = 0; + while let Some(arg) = block_params.get(old_arg, &pos.func.dfg.value_lists) { + old_arg += 1; + + let abi_type = pos.func.signature.params[abi_arg]; + let arg_type = pos.func.dfg.value_type(arg); + if arg_type == abi_type.value_type { + // No value translation is necessary, this argument matches the ABI type. + // Just use the original block argument value. This is the most common case. + pos.func.dfg.attach_block_param(entry, arg); + match abi_type.purpose { + ArgumentPurpose::Normal => {} + ArgumentPurpose::FramePointer => {} + ArgumentPurpose::CalleeSaved => {} + ArgumentPurpose::StructReturn => { + debug_assert!(!has_sret, "Multiple sret arguments found"); + has_sret = true; + } + ArgumentPurpose::VMContext => { + debug_assert!(!has_vmctx, "Multiple vmctx arguments found"); + has_vmctx = true; + } + ArgumentPurpose::SignatureId => { + debug_assert!(!has_sigid, "Multiple sigid arguments found"); + has_sigid = true; + } + ArgumentPurpose::StackLimit => { + debug_assert!(!has_stack_limit, "Multiple stack_limit arguments found"); + has_stack_limit = true; + } + _ => panic!("Unexpected special-purpose arg {}", abi_type), + } + abi_arg += 1; + } else { + // Compute the value we want for `arg` from the legalized ABI parameters. + let mut get_arg = |func: &mut Function, ty| { + let abi_type = func.signature.params[abi_arg]; + debug_assert_eq!( + abi_type.purpose, + ArgumentPurpose::Normal, + "Can't legalize special-purpose argument" + ); + if ty == abi_type.value_type { + abi_arg += 1; + Ok(func.dfg.append_block_param(entry, ty)) + } else { + Err(abi_type) + } + }; + let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg); + // The old `arg` is no longer an attached block argument, but there are probably still + // uses of the value. + debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted); + } + } + + // The legalized signature may contain additional parameters representing special-purpose + // registers. + for &arg in &pos.func.signature.params[abi_arg..] { + match arg.purpose { + // Any normal parameters should have been processed above. + ArgumentPurpose::Normal => { + panic!("Leftover arg: {}", arg); + } + // The callee-save parameters should not appear until after register allocation is + // done. + ArgumentPurpose::FramePointer | ArgumentPurpose::CalleeSaved => { + panic!("Premature callee-saved arg {}", arg); + } + // These can be meaningfully added by `legalize_signature()`. + ArgumentPurpose::Link => { + debug_assert!(!has_link, "Multiple link parameters found"); + has_link = true; + } + ArgumentPurpose::StructReturn => { + debug_assert!(!has_sret, "Multiple sret parameters found"); + has_sret = true; + } + ArgumentPurpose::VMContext => { + debug_assert!(!has_vmctx, "Multiple vmctx parameters found"); + has_vmctx = true; + } + ArgumentPurpose::SignatureId => { + debug_assert!(!has_sigid, "Multiple sigid parameters found"); + has_sigid = true; + } + ArgumentPurpose::StackLimit => { + debug_assert!(!has_stack_limit, "Multiple stack_limit parameters found"); + has_stack_limit = true; + } + } + + // Just create entry block values to match here. We will use them in `handle_return_abi()` + // below. + pos.func.dfg.append_block_param(entry, arg.value_type); + } +} + +/// Legalize the results returned from a call instruction to match the ABI signature. +/// +/// The cursor `pos` points to a call instruction with at least one return value. The cursor will +/// be left pointing after the instructions inserted to convert the return values. +/// +/// This function is very similar to the `legalize_entry_params` function above. +/// +/// Returns the possibly new instruction representing the call. +fn legalize_inst_results(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst +where + ResType: FnMut(&Function, usize) -> AbiParam, +{ + let call = pos + .current_inst() + .expect("Cursor must point to a call instruction"); + + // We theoretically allow for call instructions that return a number of fixed results before + // the call return values. In practice, it doesn't happen. + debug_assert_eq!( + pos.func.dfg[call] + .opcode() + .constraints() + .num_fixed_results(), + 0, + "Fixed results on calls not supported" + ); + + let results = pos.func.dfg.detach_results(call); + let mut next_res = 0; + let mut abi_res = 0; + + // Point immediately after the call. + pos.next_inst(); + + while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) { + next_res += 1; + + let res_type = pos.func.dfg.value_type(res); + if res_type == get_abi_type(pos.func, abi_res).value_type { + // No value translation is necessary, this result matches the ABI type. + pos.func.dfg.attach_result(call, res); + abi_res += 1; + } else { + let mut get_res = |func: &mut Function, ty| { + let abi_type = get_abi_type(func, abi_res); + if ty == abi_type.value_type { + let last_res = func.dfg.append_result(call, ty); + abi_res += 1; + Ok(last_res) + } else { + Err(abi_type) + } + }; + let v = convert_from_abi(pos, res_type, Some(res), &mut get_res); + debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v); + } + } + + call +} + +fn assert_is_valid_sret_legalization( + old_ret_list: &EntityList, + old_sig: &Signature, + new_sig: &Signature, + pos: &FuncCursor, +) { + debug_assert_eq!( + old_sig.returns.len(), + old_ret_list.len(&pos.func.dfg.value_lists) + ); + + // Assert that the only difference in special parameters is that there + // is an appended struct return pointer parameter. + let old_special_params: Vec<_> = old_sig + .params + .iter() + .filter(|r| r.purpose != ArgumentPurpose::Normal) + .collect(); + let new_special_params: Vec<_> = new_sig + .params + .iter() + .filter(|r| r.purpose != ArgumentPurpose::Normal) + .collect(); + debug_assert_eq!(old_special_params.len() + 1, new_special_params.len()); + debug_assert!(old_special_params + .iter() + .zip(&new_special_params) + .all(|(old, new)| old.purpose == new.purpose)); + debug_assert_eq!( + new_special_params.last().unwrap().purpose, + ArgumentPurpose::StructReturn + ); + + // If the special returns have changed at all, then the only change + // should be that the struct return pointer is returned back out of the + // function, so that callers don't have to load its stack address again. + let old_special_returns: Vec<_> = old_sig + .returns + .iter() + .filter(|r| r.purpose != ArgumentPurpose::Normal) + .collect(); + let new_special_returns: Vec<_> = new_sig + .returns + .iter() + .filter(|r| r.purpose != ArgumentPurpose::Normal) + .collect(); + debug_assert!(old_special_returns + .iter() + .zip(&new_special_returns) + .all(|(old, new)| old.purpose == new.purpose)); + debug_assert!( + old_special_returns.len() == new_special_returns.len() + || (old_special_returns.len() + 1 == new_special_returns.len() + && new_special_returns.last().unwrap().purpose == ArgumentPurpose::StructReturn) + ); +} + +fn legalize_sret_call(isa: &dyn TargetIsa, pos: &mut FuncCursor, sig_ref: SigRef, call: Inst) { + let old_ret_list = pos.func.dfg.detach_results(call); + let old_sig = pos.func.dfg.old_signatures[sig_ref] + .take() + .expect("must have an old signature when using an `sret` parameter"); + + // We make a bunch of assumptions about the shape of the old, multi-return + // signature and the new, sret-using signature in this legalization + // function. Assert that these assumptions hold true in debug mode. + if cfg!(debug_assertions) { + assert_is_valid_sret_legalization( + &old_ret_list, + &old_sig, + &pos.func.dfg.signatures[sig_ref], + &pos, + ); + } + + // Go through and remove all normal return values from the `call` + // instruction's returns list. These will be stored into the stack slot that + // the sret points to. At the same time, calculate the size of the sret + // stack slot. + let mut sret_slot_size = 0; + for (i, ret) in old_sig.returns.iter().enumerate() { + let v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap(); + let ty = pos.func.dfg.value_type(v); + if ret.purpose == ArgumentPurpose::Normal { + debug_assert_eq!(ret.location, ArgumentLoc::Unassigned); + let ty = legalized_type_for_sret(ty); + let size = ty.bytes(); + sret_slot_size = round_up_to_multiple_of_type_align(sret_slot_size, ty) + size; + } else { + let new_v = pos.func.dfg.append_result(call, ty); + pos.func.dfg.change_to_alias(v, new_v); + } + } + + let stack_slot = pos.func.stack_slots.push(StackSlotData { + kind: StackSlotKind::StructReturnSlot, + size: sret_slot_size, + offset: None, + }); + + // Append the sret pointer to the `call` instruction's arguments. + let ptr_type = Type::triple_pointer_type(isa.triple()); + let sret_arg = pos.ins().stack_addr(ptr_type, stack_slot, 0); + pos.func.dfg.append_inst_arg(call, sret_arg); + + // The sret pointer might be returned by the signature as well. If so, we + // need to add it to the `call` instruction's results list. + // + // Additionally, when the sret is explicitly returned in this calling + // convention, then use it when loading the sret returns back into ssa + // values to avoid keeping the original `sret_arg` live and potentially + // having to do spills and fills. + let sret = + if pos.func.dfg.signatures[sig_ref].uses_special_return(ArgumentPurpose::StructReturn) { + pos.func.dfg.append_result(call, ptr_type) + } else { + sret_arg + }; + + // Finally, load each of the call's return values out of the sret stack + // slot. + pos.goto_after_inst(call); + let mut offset = 0; + for i in 0..old_ret_list.len(&pos.func.dfg.value_lists) { + if old_sig.returns[i].purpose != ArgumentPurpose::Normal { + continue; + } + + let old_v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap(); + let ty = pos.func.dfg.value_type(old_v); + let mut legalized_ty = legalized_type_for_sret(ty); + + offset = round_up_to_multiple_of_type_align(offset, legalized_ty); + + let new_legalized_v = + pos.ins() + .load(legalized_ty, MemFlags::trusted(), sret, offset as i32); + + // "Illegalize" the loaded value from the legalized type back to its + // original `ty`. This is basically the opposite of + // `legalize_type_for_sret_store`. + let mut new_v = new_legalized_v; + if ty.is_bool() { + legalized_ty = legalized_ty.as_bool_pedantic(); + new_v = pos.ins().raw_bitcast(legalized_ty, new_v); + + if ty.bits() < legalized_ty.bits() { + legalized_ty = ty; + new_v = pos.ins().breduce(legalized_ty, new_v); + } + } + + pos.func.dfg.change_to_alias(old_v, new_v); + + offset += legalized_ty.bytes(); + } + + pos.func.dfg.old_signatures[sig_ref] = Some(old_sig); +} + +/// Compute original value of type `ty` from the legalized ABI arguments. +/// +/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an +/// ABI argument. It returns: +/// +/// - `Ok(arg)` if the requested type matches the next ABI argument. +/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`. +/// +/// If the `into_result` value is provided, the converted result will be written into that value. +fn convert_from_abi( + pos: &mut FuncCursor, + ty: Type, + into_result: Option, + get_arg: &mut GetArg, +) -> Value +where + GetArg: FnMut(&mut Function, Type) -> Result, +{ + // Terminate the recursion when we get the desired type. + let arg_type = match get_arg(pos.func, ty) { + Ok(v) => { + debug_assert_eq!(pos.func.dfg.value_type(v), ty); + debug_assert_eq!(into_result, None); + return v; + } + Err(t) => t, + }; + + // Reconstruct how `ty` was legalized into the `arg_type` argument. + let conversion = legalize_abi_value(ty, &arg_type); + + debug!("convert_from_abi({}): {:?}", ty, conversion); + + // The conversion describes value to ABI argument. We implement the reverse conversion here. + match conversion { + // Construct a `ty` by concatenating two ABI integers. + ValueConversion::IntSplit => { + let abi_ty = ty.half_width().expect("Invalid type for conversion"); + let lo = convert_from_abi(pos, abi_ty, None, get_arg); + let hi = convert_from_abi(pos, abi_ty, None, get_arg); + debug!( + "intsplit {}: {}, {}: {}", + lo, + pos.func.dfg.value_type(lo), + hi, + pos.func.dfg.value_type(hi) + ); + pos.ins().with_results([into_result]).iconcat(lo, hi) + } + // Construct a `ty` by concatenating two halves of a vector. + ValueConversion::VectorSplit => { + let abi_ty = ty.half_vector().expect("Invalid type for conversion"); + let lo = convert_from_abi(pos, abi_ty, None, get_arg); + let hi = convert_from_abi(pos, abi_ty, None, get_arg); + pos.ins().with_results([into_result]).vconcat(lo, hi) + } + // Construct a `ty` by bit-casting from an integer type. + ValueConversion::IntBits => { + debug_assert!(!ty.is_int()); + let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion"); + let arg = convert_from_abi(pos, abi_ty, None, get_arg); + pos.ins().with_results([into_result]).bitcast(ty, arg) + } + // ABI argument is a sign-extended version of the value we want. + ValueConversion::Sext(abi_ty) => { + let arg = convert_from_abi(pos, abi_ty, None, get_arg); + // TODO: Currently, we don't take advantage of the ABI argument being sign-extended. + // We could insert an `assert_sreduce` which would fold with a following `sextend` of + // this value. + pos.ins().with_results([into_result]).ireduce(ty, arg) + } + ValueConversion::Uext(abi_ty) => { + let arg = convert_from_abi(pos, abi_ty, None, get_arg); + // TODO: Currently, we don't take advantage of the ABI argument being sign-extended. + // We could insert an `assert_ureduce` which would fold with a following `uextend` of + // this value. + pos.ins().with_results([into_result]).ireduce(ty, arg) + } + } +} + +/// Convert `value` to match an ABI signature by inserting instructions at `pos`. +/// +/// This may require expanding the value to multiple ABI arguments. The conversion process is +/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented +/// to the closure, it will perform one of two actions: +/// +/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list +/// of arguments and return `Ok(())`. +/// 2. If the suggested argument doesn't have the right value type, don't change anything, but +/// return the `Err(AbiParam)` that is needed. +/// +fn convert_to_abi( + pos: &mut FuncCursor, + cfg: &ControlFlowGraph, + value: Value, + put_arg: &mut PutArg, +) where + PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>, +{ + // Start by invoking the closure to either terminate the recursion or get the argument type + // we're trying to match. + let arg_type = match put_arg(pos.func, value) { + Ok(_) => return, + Err(t) => t, + }; + + let ty = pos.func.dfg.value_type(value); + match legalize_abi_value(ty, &arg_type) { + ValueConversion::IntSplit => { + let curpos = pos.position(); + let srcloc = pos.srcloc(); + let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value); + convert_to_abi(pos, cfg, lo, put_arg); + convert_to_abi(pos, cfg, hi, put_arg); + } + ValueConversion::VectorSplit => { + let curpos = pos.position(); + let srcloc = pos.srcloc(); + let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value); + convert_to_abi(pos, cfg, lo, put_arg); + convert_to_abi(pos, cfg, hi, put_arg); + } + ValueConversion::IntBits => { + debug_assert!(!ty.is_int()); + let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion"); + let arg = pos.ins().bitcast(abi_ty, value); + convert_to_abi(pos, cfg, arg, put_arg); + } + ValueConversion::Sext(abi_ty) => { + let arg = pos.ins().sextend(abi_ty, value); + convert_to_abi(pos, cfg, arg, put_arg); + } + ValueConversion::Uext(abi_ty) => { + let arg = pos.ins().uextend(abi_ty, value); + convert_to_abi(pos, cfg, arg, put_arg); + } + } +} + +/// Check if a sequence of arguments match a desired sequence of argument types. +fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool { + let arg_types = args.iter().map(|&v| dfg.value_type(v)); + let sig_types = types.iter().map(|&at| at.value_type); + arg_types.eq(sig_types) +} + +/// Check if the arguments of the call `inst` match the signature. +/// +/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the +/// signature doesn't match. +fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> { + // Extract the signature and argument values. + let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) { + CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args), + CallInfo::Indirect(sig_ref, args) => (sig_ref, args), + CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]), + }; + let sig = &dfg.signatures[sig_ref]; + + if check_arg_types(dfg, args, &sig.params[..]) + && check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..]) + { + // All types check out. + Ok(()) + } else { + // Call types need fixing. + Err(sig_ref) + } +} + +/// Check if the arguments of the return `inst` match the signature. +fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool { + check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns) +} + +/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`. +/// +/// - `abi_args` is the number of arguments that the ABI signature requires. +/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI +/// argument number in `0..abi_args`. +/// +fn legalize_inst_arguments( + pos: &mut FuncCursor, + cfg: &ControlFlowGraph, + abi_args: usize, + mut get_abi_type: ArgType, +) where + ArgType: FnMut(&Function, usize) -> AbiParam, +{ + let inst = pos + .current_inst() + .expect("Cursor must point to a call instruction"); + + // Lift the value list out of the call instruction so we modify it. + let mut vlist = pos.func.dfg[inst] + .take_value_list() + .expect("Call must have a value list"); + + // The value list contains all arguments to the instruction, including the callee on an + // indirect call which isn't part of the call arguments that must match the ABI signature. + // Figure out how many fixed values are at the front of the list. We won't touch those. + let num_fixed_values = pos.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let have_args = vlist.len(&pos.func.dfg.value_lists) - num_fixed_values; + if abi_args < have_args { + // This happens with multiple return values after we've legalized the + // signature but haven't legalized the return instruction yet. This + // legalization is handled in `handle_return_abi`. + pos.func.dfg[inst].put_value_list(vlist); + return; + } + + // Grow the value list to the right size and shift all the existing arguments to the right. + // This lets us write the new argument values into the list without overwriting the old + // arguments. + // + // Before: + // + // <--> fixed_values + // <-----------> have_args + // [FFFFOOOOOOOOOOOOO] + // + // After grow_at(): + // + // <--> fixed_values + // <-----------> have_args + // <------------------> abi_args + // [FFFF-------OOOOOOOOOOOOO] + // ^ + // old_arg_offset + // + // After writing the new arguments: + // + // <--> fixed_values + // <------------------> abi_args + // [FFFFNNNNNNNNNNNNNNNNNNNN] + // + vlist.grow_at( + num_fixed_values, + abi_args - have_args, + &mut pos.func.dfg.value_lists, + ); + let old_arg_offset = num_fixed_values + abi_args - have_args; + + let mut abi_arg = 0; + for old_arg in 0..have_args { + let old_value = vlist + .get(old_arg_offset + old_arg, &pos.func.dfg.value_lists) + .unwrap(); + let mut put_arg = |func: &mut Function, arg| { + let abi_type = get_abi_type(func, abi_arg); + if func.dfg.value_type(arg) == abi_type.value_type { + // This is the argument type we need. + vlist.as_mut_slice(&mut func.dfg.value_lists)[num_fixed_values + abi_arg] = arg; + abi_arg += 1; + Ok(()) + } else { + Err(abi_type) + } + }; + convert_to_abi(pos, cfg, old_value, &mut put_arg); + } + + // Put the modified value list back. + pos.func.dfg[inst].put_value_list(vlist); +} + +/// Ensure that the `ty` being returned is a type that can be loaded and stored +/// (potentially after another narrowing legalization) from memory, since it +/// will go into the `sret` space. +fn legalized_type_for_sret(ty: Type) -> Type { + if ty.is_bool() { + let bits = std::cmp::max(8, ty.bits()); + Type::int(bits).unwrap() + } else { + ty + } +} + +/// Insert any legalization code required to ensure that `val` can be stored +/// into the `sret` memory. Returns the (potentially new, potentially +/// unmodified) legalized value and its type. +fn legalize_type_for_sret_store(pos: &mut FuncCursor, val: Value, ty: Type) -> (Value, Type) { + if ty.is_bool() { + let bits = std::cmp::max(8, ty.bits()); + let ty = Type::int(bits).unwrap(); + let val = pos.ins().bint(ty, val); + (val, ty) + } else { + (val, ty) + } +} + +/// Insert ABI conversion code before and after the call instruction at `pos`. +/// +/// Instructions inserted before the call will compute the appropriate ABI values for the +/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to +/// match the new signature. +/// +/// Instructions will be inserted after the call to convert returned ABI values back to the +/// original return values. The call's result values will be adapted to match the new signature. +/// +/// Returns `true` if any instructions were inserted. +pub fn handle_call_abi( + isa: &dyn TargetIsa, + mut inst: Inst, + func: &mut Function, + cfg: &ControlFlowGraph, +) -> bool { + let pos = &mut FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start by checking if the argument types already match the signature. + let sig_ref = match check_call_signature(&pos.func.dfg, inst) { + Ok(_) => return spill_call_arguments(pos), + Err(s) => s, + }; + + let sig = &pos.func.dfg.signatures[sig_ref]; + let old_sig = &pos.func.dfg.old_signatures[sig_ref]; + + if sig.uses_struct_return_param() + && old_sig + .as_ref() + .map_or(false, |s| !s.uses_struct_return_param()) + { + legalize_sret_call(isa, pos, sig_ref, inst); + } else { + // OK, we need to fix the call arguments to match the ABI signature. + let abi_args = pos.func.dfg.signatures[sig_ref].params.len(); + legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { + func.dfg.signatures[sig_ref].params[abi_arg] + }); + + if !pos.func.dfg.signatures[sig_ref].returns.is_empty() { + inst = legalize_inst_results(pos, |func, abi_res| { + func.dfg.signatures[sig_ref].returns[abi_res] + }); + } + } + + debug_assert!( + check_call_signature(&pos.func.dfg, inst).is_ok(), + "Signature still wrong: {}, {}{}", + pos.func.dfg.display_inst(inst, None), + sig_ref, + pos.func.dfg.signatures[sig_ref] + ); + + // Go back and insert spills for any stack arguments. + pos.goto_inst(inst); + spill_call_arguments(pos); + + // Yes, we changed stuff. + true +} + +/// Insert ABI conversion code before and after the return instruction at `inst`. +/// +/// Return `true` if any instructions were inserted. +pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool { + // Check if the returned types already match the signature. + if check_return_signature(&func.dfg, inst, &func.signature) { + return false; + } + + // Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to + // the legalized signature. + let special_args = func + .signature + .returns + .iter() + .rev() + .take_while(|&rt| { + rt.purpose == ArgumentPurpose::Link + || rt.purpose == ArgumentPurpose::StructReturn + || rt.purpose == ArgumentPurpose::VMContext + }) + .count(); + let abi_args = func.signature.returns.len() - special_args; + + let pos = &mut FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { + func.signature.returns[abi_arg] + }); + // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to + // the legalized signature. These values should simply be propagated from the entry block + // arguments. + if special_args > 0 { + debug!( + "Adding {} special-purpose arguments to {}", + special_args, + pos.func.dfg.display_inst(inst, None) + ); + let mut vlist = pos.func.dfg[inst].take_value_list().unwrap(); + let mut sret = None; + + for arg in &pos.func.signature.returns[abi_args..] { + match arg.purpose { + ArgumentPurpose::Link + | ArgumentPurpose::StructReturn + | ArgumentPurpose::VMContext => {} + ArgumentPurpose::Normal => panic!("unexpected return value {}", arg), + _ => panic!("Unsupported special purpose return value {}", arg), + } + // A `link`/`sret`/`vmctx` return value can only appear in a signature that has a + // unique matching argument. They are appended at the end, so search the signature from + // the end. + let idx = pos + .func + .signature + .params + .iter() + .rposition(|t| t.purpose == arg.purpose) + .expect("No matching special purpose argument."); + // Get the corresponding entry block value and add it to the return instruction's + // arguments. + let val = pos + .func + .dfg + .block_params(pos.func.layout.entry_block().unwrap())[idx]; + debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type); + vlist.push(val, &mut pos.func.dfg.value_lists); + + if let ArgumentPurpose::StructReturn = arg.purpose { + sret = Some(val); + } + } + + // Store all the regular returns into the retptr space and remove them + // from the `return` instruction's value list. + if let Some(sret) = sret { + let mut offset = 0; + let num_regular_rets = vlist.len(&pos.func.dfg.value_lists) - special_args; + for i in 0..num_regular_rets { + debug_assert_eq!( + pos.func.old_signature.as_ref().unwrap().returns[i].purpose, + ArgumentPurpose::Normal, + ); + + // The next return value to process is always at `0`, since the + // list is emptied as we iterate. + let v = vlist.get(0, &pos.func.dfg.value_lists).unwrap(); + let ty = pos.func.dfg.value_type(v); + let (v, ty) = legalize_type_for_sret_store(pos, v, ty); + + let size = ty.bytes(); + offset = round_up_to_multiple_of_type_align(offset, ty); + + pos.ins().store(MemFlags::trusted(), v, sret, offset as i32); + vlist.remove(0, &mut pos.func.dfg.value_lists); + + offset += size; + } + } + pos.func.dfg[inst].put_value_list(vlist); + } + + debug_assert_eq!( + pos.func.dfg.inst_variable_args(inst).len(), + abi_args + special_args + ); + debug_assert!( + check_return_signature(&pos.func.dfg, inst, &pos.func.signature), + "Signature still wrong: {} / signature {}", + pos.func.dfg.display_inst(inst, None), + pos.func.signature + ); + + // Yes, we changed stuff. + true +} + +fn round_up_to_multiple_of_type_align(bytes: u32, ty: Type) -> u32 { + // We don't have a dedicated alignment for types, so assume they are + // size-aligned. + let align = ty.bytes(); + round_up_to_multiple_of_pow2(bytes, align) +} + +/// Round `n` up to the next multiple of `to` that is greater than or equal to +/// `n`. +/// +/// `to` must be a power of two and greater than zero. +/// +/// This is useful for rounding an offset or pointer up to some type's required +/// alignment. +fn round_up_to_multiple_of_pow2(n: u32, to: u32) -> u32 { + debug_assert!(to > 0); + debug_assert!(to.is_power_of_two()); + + // The simple version of this function is + // + // (n + to - 1) / to * to + // + // Consider the numerator: `n + to - 1`. This is ensuring that if there is + // any remainder for `n / to`, then the result of the division is one + // greater than `n / to`, and that otherwise we get exactly the same result + // as `n / to` due to integer division rounding off the remainder. In other + // words, we only round up if `n` is not aligned to `to`. + // + // However, we know `to` is a power of two, and therefore `anything / to` is + // equivalent to `anything >> log2(to)` and `anything * to` is equivalent to + // `anything << log2(to)`. We can therefore rewrite our simplified function + // into the following: + // + // (n + to - 1) >> log2(to) << log2(to) + // + // But shifting a value right by some number of bits `b` and then shifting + // it left by that same number of bits `b` is equivalent to clearing the + // bottom `b` bits of the number. We can clear the bottom `b` bits of a + // number by bit-wise and'ing the number with the bit-wise not of `2^b - 1`. + // Plugging this into our function and simplifying, we get: + // + // (n + to - 1) >> log2(to) << log2(to) + // = (n + to - 1) & !(2^log2(to) - 1) + // = (n + to - 1) & !(to - 1) + // + // And now we have the final version of this function! + + (n + to - 1) & !(to - 1) +} + +/// Assign stack slots to incoming function parameters on the stack. +/// +/// Values that are passed into the function on the stack must be assigned to an `IncomingArg` +/// stack slot already during legalization. +fn spill_entry_params(func: &mut Function, entry: Block) { + for (abi, &arg) in func + .signature + .params + .iter() + .zip(func.dfg.block_params(entry)) + { + if let ArgumentLoc::Stack(offset) = abi.location { + let ss = func.stack_slots.make_incoming_arg(abi.value_type, offset); + func.locations[arg] = ValueLoc::Stack(ss); + } + } +} + +/// Assign stack slots to outgoing function arguments on the stack. +/// +/// Values that are passed to a called function on the stack must be assigned to a matching +/// `OutgoingArg` stack slot. The assignment must happen immediately before the call. +/// +/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches +/// or calls between writing the stack slots and the call instruction. Writing the slots earlier +/// could help reduce register pressure before the call. +fn spill_call_arguments(pos: &mut FuncCursor) -> bool { + let inst = pos + .current_inst() + .expect("Cursor must point to a call instruction"); + let sig_ref = pos + .func + .dfg + .call_signature(inst) + .expect("Call instruction expected."); + + // Start by building a list of stack slots and arguments to be replaced. + // This requires borrowing `pos.func.dfg`, so we can't change anything. + let arglist = { + let locations = &pos.func.locations; + let stack_slots = &mut pos.func.stack_slots; + pos.func + .dfg + .inst_variable_args(inst) + .iter() + .zip(&pos.func.dfg.signatures[sig_ref].params) + .enumerate() + .filter_map(|(idx, (&arg, abi))| { + match abi.location { + ArgumentLoc::Stack(offset) => { + // Assign `arg` to a new stack slot, unless it's already in the correct + // slot. The legalization needs to be idempotent, so we should see a + // correct outgoing slot on the second pass. + let ss = stack_slots.get_outgoing_arg(abi.value_type, offset); + if locations[arg] != ValueLoc::Stack(ss) { + Some((idx, arg, ss)) + } else { + None + } + } + _ => None, + } + }) + .collect::>() + }; + + if arglist.is_empty() { + return false; + } + + // Insert the spill instructions and rewrite call arguments. + for (idx, arg, ss) in arglist { + let stack_val = pos.ins().spill(arg); + pos.func.locations[stack_val] = ValueLoc::Stack(ss); + pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val; + } + + // We changed stuff. + true +} + +#[cfg(test)] +mod tests { + use super::round_up_to_multiple_of_pow2; + + #[test] + fn round_up_to_multiple_of_pow2_works() { + for (n, to, expected) in vec![ + (0, 1, 0), + (1, 1, 1), + (2, 1, 2), + (0, 2, 0), + (1, 2, 2), + (2, 2, 2), + (3, 2, 4), + (0, 4, 0), + (1, 4, 4), + (2, 4, 4), + (3, 4, 4), + (4, 4, 4), + (5, 4, 8), + ] { + let actual = round_up_to_multiple_of_pow2(n, to); + assert_eq!( + actual, expected, + "round_up_to_multiple_of_pow2(n = {}, to = {}) = {} (expected {})", + n, to, actual, expected + ); + } + } +} diff --git a/cranelift/codegen/src/legalizer/call.rs b/cranelift/codegen/src/legalizer/call.rs new file mode 100644 index 0000000000..4321dbb90b --- /dev/null +++ b/cranelift/codegen/src/legalizer/call.rs @@ -0,0 +1,54 @@ +//! Legalization of calls. +//! +//! This module exports the `expand_call` function which transforms a `call` +//! instruction into `func_addr` and `call_indirect` instructions. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{self, InstBuilder}; +use crate::isa::TargetIsa; + +/// Expand a `call` instruction. This lowers it to a `call_indirect`, which +/// is only done if the ABI doesn't support direct calls. +pub fn expand_call( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + // Unpack the instruction. + let (func_ref, old_args) = match func.dfg[inst] { + ir::InstructionData::Call { + opcode, + ref args, + func_ref, + } => { + debug_assert_eq!(opcode, ir::Opcode::Call); + (func_ref, args.clone()) + } + _ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)), + }; + + let ptr_ty = isa.pointer_type(); + + let sig = func.dfg.ext_funcs[func_ref].signature; + + let callee = { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + pos.ins().func_addr(ptr_ty, func_ref) + }; + + let mut new_args = ir::ValueList::default(); + new_args.push(callee, &mut func.dfg.value_lists); + for i in 0..old_args.len(&func.dfg.value_lists) { + new_args.push( + old_args.as_slice(&func.dfg.value_lists)[i], + &mut func.dfg.value_lists, + ); + } + + func.dfg + .replace(inst) + .CallIndirect(ir::Opcode::CallIndirect, ptr_ty, sig, new_args); +} diff --git a/cranelift/codegen/src/legalizer/globalvalue.rs b/cranelift/codegen/src/legalizer/globalvalue.rs new file mode 100644 index 0000000000..5c7a72b45c --- /dev/null +++ b/cranelift/codegen/src/legalizer/globalvalue.rs @@ -0,0 +1,140 @@ +//! Legalization of global values. +//! +//! This module exports the `expand_global_value` function which transforms a `global_value` +//! instruction into code that depends on the kind of global value referenced. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{self, InstBuilder}; +use crate::isa::TargetIsa; + +/// Expand a `global_value` instruction according to the definition of the global value. +pub fn expand_global_value( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + // Unpack the instruction. + let gv = match func.dfg[inst] { + ir::InstructionData::UnaryGlobalValue { + opcode, + global_value, + } => { + debug_assert_eq!(opcode, ir::Opcode::GlobalValue); + global_value + } + _ => panic!("Wanted global_value: {}", func.dfg.display_inst(inst, None)), + }; + + match func.global_values[gv] { + ir::GlobalValueData::VMContext => vmctx_addr(inst, func), + ir::GlobalValueData::IAddImm { + base, + offset, + global_type, + } => iadd_imm_addr(inst, func, base, offset.into(), global_type), + ir::GlobalValueData::Load { + base, + offset, + global_type, + readonly, + } => load_addr(inst, func, base, offset, global_type, readonly, isa), + ir::GlobalValueData::Symbol { tls, .. } => symbol(inst, func, gv, isa, tls), + } +} + +/// Expand a `global_value` instruction for a vmctx global. +fn vmctx_addr(inst: ir::Inst, func: &mut ir::Function) { + // Get the value representing the `vmctx` argument. + let vmctx = func + .special_param(ir::ArgumentPurpose::VMContext) + .expect("Missing vmctx parameter"); + + // Replace the `global_value` instruction's value with an alias to the vmctx arg. + let result = func.dfg.first_result(inst); + func.dfg.clear_results(inst); + func.dfg.change_to_alias(result, vmctx); + func.layout.remove_inst(inst); +} + +/// Expand a `global_value` instruction for an iadd_imm global. +fn iadd_imm_addr( + inst: ir::Inst, + func: &mut ir::Function, + base: ir::GlobalValue, + offset: i64, + global_type: ir::Type, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + + // Get the value for the lhs. For tidiness, expand VMContext here so that we avoid + // `vmctx_addr` which creates an otherwise unneeded value alias. + let lhs = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] { + pos.func + .special_param(ir::ArgumentPurpose::VMContext) + .expect("Missing vmctx parameter") + } else { + pos.ins().global_value(global_type, base) + }; + + // Simply replace the `global_value` instruction with an `iadd_imm`, reusing the result value. + pos.func.dfg.replace(inst).iadd_imm(lhs, offset); +} + +/// Expand a `global_value` instruction for a load global. +fn load_addr( + inst: ir::Inst, + func: &mut ir::Function, + base: ir::GlobalValue, + offset: ir::immediates::Offset32, + global_type: ir::Type, + readonly: bool, + isa: &dyn TargetIsa, +) { + // We need to load a pointer from the `base` global value, so insert a new `global_value` + // instruction. This depends on the iterative legalization loop. Note that the IR verifier + // detects any cycles in the `load` globals. + let ptr_ty = isa.pointer_type(); + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Get the value for the base. For tidiness, expand VMContext here so that we avoid + // `vmctx_addr` which creates an otherwise unneeded value alias. + let base_addr = if let ir::GlobalValueData::VMContext = pos.func.global_values[base] { + pos.func + .special_param(ir::ArgumentPurpose::VMContext) + .expect("Missing vmctx parameter") + } else { + pos.ins().global_value(ptr_ty, base) + }; + + // Global-value loads are always notrap and aligned. They may be readonly. + let mut mflags = ir::MemFlags::trusted(); + if readonly { + mflags.set_readonly(); + } + + // Perform the load. + pos.func + .dfg + .replace(inst) + .load(global_type, mflags, base_addr, offset); +} + +/// Expand a `global_value` instruction for a symbolic name global. +fn symbol( + inst: ir::Inst, + func: &mut ir::Function, + gv: ir::GlobalValue, + isa: &dyn TargetIsa, + tls: bool, +) { + let ptr_ty = isa.pointer_type(); + + if tls { + func.dfg.replace(inst).tls_value(ptr_ty, gv); + } else { + func.dfg.replace(inst).symbol_value(ptr_ty, gv); + } +} diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs new file mode 100644 index 0000000000..cc4308c268 --- /dev/null +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -0,0 +1,190 @@ +//! Legalization of heaps. +//! +//! This module exports the `expand_heap_addr` function which transforms a `heap_addr` +//! instruction into code that depends on the kind of heap referenced. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::condcodes::IntCC; +use crate::ir::{self, InstBuilder}; +use crate::isa::TargetIsa; + +/// Expand a `heap_addr` instruction according to the definition of the heap. +pub fn expand_heap_addr( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + // Unpack the instruction. + let (heap, offset, access_size) = match func.dfg[inst] { + ir::InstructionData::HeapAddr { + opcode, + heap, + arg, + imm, + } => { + debug_assert_eq!(opcode, ir::Opcode::HeapAddr); + (heap, arg, imm.into()) + } + _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)), + }; + + match func.heaps[heap].style { + ir::HeapStyle::Dynamic { bound_gv } => { + dynamic_addr(isa, inst, heap, offset, access_size, bound_gv, func) + } + ir::HeapStyle::Static { bound } => static_addr( + isa, + inst, + heap, + offset, + access_size, + bound.into(), + func, + cfg, + ), + } +} + +/// Expand a `heap_addr` for a dynamic heap. +fn dynamic_addr( + isa: &dyn TargetIsa, + inst: ir::Inst, + heap: ir::Heap, + offset: ir::Value, + access_size: u32, + bound_gv: ir::GlobalValue, + func: &mut ir::Function, +) { + let access_size = u64::from(access_size); + let offset_ty = func.dfg.value_type(offset); + let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); + let min_size = func.heaps[heap].min_size.into(); + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start with the bounds check. Trap if `offset + access_size > bound`. + let bound = pos.ins().global_value(offset_ty, bound_gv); + let oob; + if access_size == 1 { + // `offset > bound - 1` is the same as `offset >= bound`. + oob = pos + .ins() + .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound); + } else if access_size <= min_size { + // We know that bound >= min_size, so here we can compare `offset > bound - access_size` + // without wrapping. + let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64)); + oob = pos + .ins() + .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound); + } else { + // We need an overflow check for the adjusted offset. + let access_size_val = pos.ins().iconst(offset_ty, access_size as i64); + let (adj_offset, overflow) = pos.ins().iadd_ifcout(offset, access_size_val); + pos.ins().trapif( + isa.unsigned_add_overflow_condition(), + overflow, + ir::TrapCode::HeapOutOfBounds, + ); + oob = pos + .ins() + .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound); + } + pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + + compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func); +} + +/// Expand a `heap_addr` for a static heap. +fn static_addr( + isa: &dyn TargetIsa, + inst: ir::Inst, + heap: ir::Heap, + offset: ir::Value, + access_size: u32, + bound: u64, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, +) { + let access_size = u64::from(access_size); + let offset_ty = func.dfg.value_type(offset); + let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start with the bounds check. Trap if `offset + access_size > bound`. + if access_size > bound { + // This will simply always trap since `offset >= 0`. + pos.ins().trap(ir::TrapCode::HeapOutOfBounds); + pos.func.dfg.replace(inst).iconst(addr_ty, 0); + + // Split Block, as the trap is a terminator instruction. + let curr_block = pos.current_block().expect("Cursor is not in an block"); + let new_block = pos.func.dfg.make_block(); + pos.insert_block(new_block); + cfg.recompute_block(pos.func, curr_block); + cfg.recompute_block(pos.func, new_block); + return; + } + + // Check `offset > limit` which is now known non-negative. + let limit = bound - access_size; + + // We may be able to omit the check entirely for 32-bit offsets if the heap bound is 4 GB or + // more. + if offset_ty != ir::types::I32 || limit < 0xffff_ffff { + let oob = if limit & 1 == 1 { + // Prefer testing `offset >= limit - 1` when limit is odd because an even number is + // likely to be a convenient constant on ARM and other RISC architectures. + pos.ins() + .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1) + } else { + pos.ins() + .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64) + }; + pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + } + + compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func); +} + +/// Emit code for the base address computation of a `heap_addr` instruction. +fn compute_addr( + isa: &dyn TargetIsa, + inst: ir::Inst, + heap: ir::Heap, + addr_ty: ir::Type, + mut offset: ir::Value, + offset_ty: ir::Type, + func: &mut ir::Function, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Convert `offset` to `addr_ty`. + if offset_ty != addr_ty { + let labels_value = offset; + offset = pos.ins().uextend(addr_ty, offset); + if let Some(values_labels) = pos.func.dfg.values_labels.as_mut() { + values_labels.insert( + offset, + ir::ValueLabelAssignments::Alias { + from: pos.func.srclocs[inst], + value: labels_value, + }, + ); + } + } + + // Add the heap base address base + let base = if isa.flags().enable_pinned_reg() && isa.flags().use_pinned_reg_as_heap_base() { + pos.ins().get_pinned_reg(isa.pointer_type()) + } else { + let base_gv = pos.func.heaps[heap].base; + pos.ins().global_value(addr_ty, base_gv) + }; + + pos.func.dfg.replace(inst).iadd(base, offset); +} diff --git a/cranelift/codegen/src/legalizer/libcall.rs b/cranelift/codegen/src/legalizer/libcall.rs new file mode 100644 index 0000000000..2a4aeb4912 --- /dev/null +++ b/cranelift/codegen/src/legalizer/libcall.rs @@ -0,0 +1,40 @@ +//! Expanding instructions as runtime library calls. + +use crate::ir; +use crate::ir::{libcall::get_libcall_funcref, InstBuilder}; +use crate::isa::{CallConv, TargetIsa}; +use crate::legalizer::boundary::legalize_libcall_signature; +use alloc::vec::Vec; + +/// Try to expand `inst` as a library call, returning true is successful. +pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn TargetIsa) -> bool { + // Does the opcode/ctrl_type combo even have a well-known runtime library name. + let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst)) + { + Some(lc) => lc, + None => return false, + }; + + // Now we convert `inst` to a call. First save the arguments. + let mut args = Vec::new(); + args.extend_from_slice(func.dfg.inst_args(inst)); + + let call_conv = CallConv::for_libcall(isa); + if call_conv.extends_baldrdash() { + let vmctx = func + .special_param(ir::ArgumentPurpose::VMContext) + .expect("Missing vmctx parameter for baldrdash libcall"); + args.push(vmctx); + } + + // The replace builder will preserve the instruction result values. + let funcref = get_libcall_funcref(libcall, call_conv, func, inst, isa); + func.dfg.replace(inst).call(funcref, &args); + + // Ask the ISA to legalize the signature. + let fn_data = &func.dfg.ext_funcs[funcref]; + let sig_data = &mut func.dfg.signatures[fn_data.signature]; + legalize_libcall_signature(sig_data, isa); + + true +} diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs new file mode 100644 index 0000000000..781767336a --- /dev/null +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -0,0 +1,741 @@ +//! Legalize instructions. +//! +//! A legal instruction is one that can be mapped directly to a machine code instruction for the +//! target ISA. The `legalize_function()` function takes as input any function and transforms it +//! into an equivalent function using only legal instructions. +//! +//! The characteristics of legal instructions depend on the target ISA, so any given instruction +//! can be legal for one ISA and illegal for another. +//! +//! Besides transforming instructions, the legalizer also fills out the `function.encodings` map +//! which provides a legal encoding recipe for every instruction. +//! +//! The legalizer does not deal with register allocation constraints. These constraints are derived +//! from the encoding recipes, and solved later by the register allocator. + +use crate::bitset::BitSet; +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::types::{I32, I64}; +use crate::ir::{self, InstBuilder, MemFlags}; +use crate::isa::TargetIsa; +use crate::predicates; +use crate::timing; +use alloc::collections::BTreeSet; +use alloc::vec::Vec; + +mod boundary; +mod call; +mod globalvalue; +mod heap; +mod libcall; +mod split; +mod table; + +use self::call::expand_call; +use self::globalvalue::expand_global_value; +use self::heap::expand_heap_addr; +use self::libcall::expand_as_libcall; +use self::table::expand_table_addr; + +enum LegalizeInstResult { + Done, + Legalized, + SplitLegalizePending, +} + +/// Legalize `inst` for `isa`. +fn legalize_inst( + inst: ir::Inst, + pos: &mut FuncCursor, + cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) -> LegalizeInstResult { + let opcode = pos.func.dfg[inst].opcode(); + + // Check for ABI boundaries that need to be converted to the legalized signature. + if opcode.is_call() { + if boundary::handle_call_abi(isa, inst, pos.func, cfg) { + return LegalizeInstResult::Legalized; + } + } else if opcode.is_return() { + if boundary::handle_return_abi(inst, pos.func, cfg) { + return LegalizeInstResult::Legalized; + } + } else if opcode.is_branch() { + split::simplify_branch_arguments(&mut pos.func.dfg, inst); + } else if opcode == ir::Opcode::Isplit { + pos.use_srcloc(inst); + + let arg = match pos.func.dfg[inst] { + ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg), + _ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)), + }; + + match pos.func.dfg.value_def(arg) { + ir::ValueDef::Result(inst, _num) => { + if let ir::InstructionData::Binary { + opcode: ir::Opcode::Iconcat, + .. + } = pos.func.dfg[inst] + { + // `arg` was created by an `iconcat` instruction. + } else { + // `arg` was not created by an `iconcat` instruction. Don't try to resolve it, + // as otherwise `split::isplit` will re-insert the original `isplit`, causing + // an endless loop. + return LegalizeInstResult::SplitLegalizePending; + } + } + ir::ValueDef::Param(_block, _num) => {} + } + + let res = pos.func.dfg.inst_results(inst).to_vec(); + assert_eq!(res.len(), 2); + let (resl, resh) = (res[0], res[1]); // Prevent borrowck error + + // Remove old isplit + pos.func.dfg.clear_results(inst); + pos.remove_inst(); + + let curpos = pos.position(); + let srcloc = pos.srcloc(); + let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg); + + pos.func.dfg.change_to_alias(resl, xl); + pos.func.dfg.change_to_alias(resh, xh); + + return LegalizeInstResult::Legalized; + } + + match pos.func.update_encoding(inst, isa) { + Ok(()) => LegalizeInstResult::Done, + Err(action) => { + // We should transform the instruction into legal equivalents. + // If the current instruction was replaced, we need to double back and revisit + // the expanded sequence. This is both to assign encodings and possible to + // expand further. + // There's a risk of infinite looping here if the legalization patterns are + // unsound. Should we attempt to detect that? + if action(inst, pos.func, cfg, isa) { + return LegalizeInstResult::Legalized; + } + + // We don't have any pattern expansion for this instruction either. + // Try converting it to a library call as a last resort. + if expand_as_libcall(inst, pos.func, isa) { + LegalizeInstResult::Legalized + } else { + LegalizeInstResult::Done + } + } + } +} + +/// Legalize `func` for `isa`. +/// +/// - Transform any instructions that don't have a legal representation in `isa`. +/// - Fill out `func.encodings`. +/// +pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { + let _tt = timing::legalize(); + debug_assert!(cfg.is_valid()); + + boundary::legalize_signatures(func, isa); + + func.encodings.resize(func.dfg.num_insts()); + + let mut pos = FuncCursor::new(func); + let func_begin = pos.position(); + + // Split block params before trying to legalize instructions, so that the newly introduced + // isplit instructions get legalized. + while let Some(block) = pos.next_block() { + split::split_block_params(pos.func, cfg, block); + } + + pos.set_position(func_begin); + + // This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases. + let mut pending_splits = BTreeSet::new(); + + // Process blocks in layout order. Some legalization actions may split the current block or append + // new ones to the end. We need to make sure we visit those new blocks too. + while let Some(_block) = pos.next_block() { + // Keep track of the cursor position before the instruction being processed, so we can + // double back when replacing instructions. + let mut prev_pos = pos.position(); + + while let Some(inst) = pos.next_inst() { + match legalize_inst(inst, &mut pos, cfg, isa) { + // Remember this position in case we need to double back. + LegalizeInstResult::Done => prev_pos = pos.position(), + + // Go back and legalize the inserted return value conversion instructions. + LegalizeInstResult::Legalized => pos.set_position(prev_pos), + + // The argument of a `isplit` or `vsplit` instruction didn't resolve to a + // `iconcat` or `vconcat` instruction. Try again after legalizing the rest of + // the instructions. + LegalizeInstResult::SplitLegalizePending => { + pending_splits.insert(inst); + } + } + } + } + + // Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized. + for inst in pending_splits { + pos.goto_inst(inst); + legalize_inst(inst, &mut pos, cfg, isa); + } + + // Now that we've lowered all br_tables, we don't need the jump tables anymore. + if !isa.flags().enable_jump_tables() { + pos.func.jump_tables.clear(); + } +} + +// Include legalization patterns that were generated by `gen_legalizer.rs` from the +// `TransformGroup` in `cranelift-codegen/meta/shared/legalize.rs`. +// +// Concretely, this defines private functions `narrow()`, and `expand()`. +include!(concat!(env!("OUT_DIR"), "/legalizer.rs")); + +/// Custom expansion for conditional trap instructions. +/// TODO: Add CFG support to the Rust DSL patterns so we won't have to do this. +fn expand_cond_trap( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + // Parse the instruction. + let trapz; + let (arg, code) = match func.dfg[inst] { + ir::InstructionData::CondTrap { opcode, arg, code } => { + // We want to branch *over* an unconditional trap. + trapz = match opcode { + ir::Opcode::Trapz => true, + ir::Opcode::Trapnz => false, + _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)), + }; + (arg, code) + } + _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)), + }; + + // Split the block after `inst`: + // + // trapnz arg + // .. + // + // Becomes: + // + // brz arg, new_block_resume + // jump new_block_trap + // + // new_block_trap: + // trap + // + // new_block_resume: + // .. + let old_block = func.layout.pp_block(inst); + let new_block_trap = func.dfg.make_block(); + let new_block_resume = func.dfg.make_block(); + + // Replace trap instruction by the inverted condition. + if trapz { + func.dfg.replace(inst).brnz(arg, new_block_resume, &[]); + } else { + func.dfg.replace(inst).brz(arg, new_block_resume, &[]); + } + + // Add jump instruction after the inverted branch. + let mut pos = FuncCursor::new(func).after_inst(inst); + pos.use_srcloc(inst); + pos.ins().jump(new_block_trap, &[]); + + // Insert the new label and the unconditional trap terminator. + pos.insert_block(new_block_trap); + pos.ins().trap(code); + + // Insert the new label and resume the execution when the trap fails. + pos.insert_block(new_block_resume); + + // Finally update the CFG. + cfg.recompute_block(pos.func, old_block); + cfg.recompute_block(pos.func, new_block_resume); + cfg.recompute_block(pos.func, new_block_trap); +} + +/// Jump tables. +fn expand_br_table( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + if isa.flags().enable_jump_tables() { + expand_br_table_jt(inst, func, cfg, isa); + } else { + expand_br_table_conds(inst, func, cfg, isa); + } +} + +/// Expand br_table to jump table. +fn expand_br_table_jt( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + use crate::ir::condcodes::IntCC; + + let (arg, default_block, table) = match func.dfg[inst] { + ir::InstructionData::BranchTable { + opcode: ir::Opcode::BrTable, + arg, + destination, + table, + } => (arg, destination, table), + _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)), + }; + + // Rewrite: + // + // br_table $idx, default_block, $jt + // + // To: + // + // $oob = ifcmp_imm $idx, len($jt) + // brif uge $oob, default_block + // jump fallthrough_block + // + // fallthrough_block: + // $base = jump_table_base.i64 $jt + // $rel_addr = jump_table_entry.i64 $idx, $base, 4, $jt + // $addr = iadd $base, $rel_addr + // indirect_jump_table_br $addr, $jt + + let block = func.layout.pp_block(inst); + let jump_table_block = func.dfg.make_block(); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Bounds check. + let table_size = pos.func.jump_tables[table].len() as i64; + let oob = pos + .ins() + .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size); + + pos.ins().brnz(oob, default_block, &[]); + pos.ins().jump(jump_table_block, &[]); + pos.insert_block(jump_table_block); + + let addr_ty = isa.pointer_type(); + + let arg = if pos.func.dfg.value_type(arg) == addr_ty { + arg + } else { + pos.ins().uextend(addr_ty, arg) + }; + + let base_addr = pos.ins().jump_table_base(addr_ty, table); + let entry = pos + .ins() + .jump_table_entry(arg, base_addr, I32.bytes() as u8, table); + + let addr = pos.ins().iadd(base_addr, entry); + pos.ins().indirect_jump_table_br(addr, table); + + pos.remove_inst(); + cfg.recompute_block(pos.func, block); + cfg.recompute_block(pos.func, jump_table_block); +} + +/// Expand br_table to series of conditionals. +fn expand_br_table_conds( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + use crate::ir::condcodes::IntCC; + + let (arg, default_block, table) = match func.dfg[inst] { + ir::InstructionData::BranchTable { + opcode: ir::Opcode::BrTable, + arg, + destination, + table, + } => (arg, destination, table), + _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)), + }; + + let block = func.layout.pp_block(inst); + + // This is a poor man's jump table using just a sequence of conditional branches. + let table_size = func.jump_tables[table].len(); + let mut cond_failed_block = vec![]; + if table_size >= 1 { + cond_failed_block = alloc::vec::Vec::with_capacity(table_size - 1); + for _ in 0..table_size - 1 { + cond_failed_block.push(func.dfg.make_block()); + } + } + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Ignore the lint for this loop as the range needs to be 0 to table_size + #[allow(clippy::needless_range_loop)] + for i in 0..table_size { + let dest = pos.func.jump_tables[table].as_slice()[i]; + let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64); + pos.ins().brnz(t, dest, &[]); + // Jump to the next case. + if i < table_size - 1 { + let block = cond_failed_block[i]; + pos.ins().jump(block, &[]); + pos.insert_block(block); + } + } + + // `br_table` jumps to the default destination if nothing matches + pos.ins().jump(default_block, &[]); + + pos.remove_inst(); + cfg.recompute_block(pos.func, block); + for failed_block in cond_failed_block.into_iter() { + cfg.recompute_block(pos.func, failed_block); + } +} + +/// Expand the select instruction. +/// +/// Conditional moves are available in some ISAs for some register classes. The remaining selects +/// are handled by a branch. +fn expand_select( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let (ctrl, tval, fval) = match func.dfg[inst] { + ir::InstructionData::Ternary { + opcode: ir::Opcode::Select, + args, + } => (args[0], args[1], args[2]), + _ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)), + }; + + // Replace `result = select ctrl, tval, fval` with: + // + // brnz ctrl, new_block(tval) + // jump new_block(fval) + // new_block(result): + let old_block = func.layout.pp_block(inst); + let result = func.dfg.first_result(inst); + func.dfg.clear_results(inst); + let new_block = func.dfg.make_block(); + func.dfg.attach_block_param(new_block, result); + + func.dfg.replace(inst).brnz(ctrl, new_block, &[tval]); + let mut pos = FuncCursor::new(func).after_inst(inst); + pos.use_srcloc(inst); + pos.ins().jump(new_block, &[fval]); + pos.insert_block(new_block); + + cfg.recompute_block(pos.func, new_block); + cfg.recompute_block(pos.func, old_block); +} + +fn expand_br_icmp( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let (cond, a, b, destination, block_args) = match func.dfg[inst] { + ir::InstructionData::BranchIcmp { + cond, + destination, + ref args, + .. + } => ( + cond, + args.get(0, &func.dfg.value_lists).unwrap(), + args.get(1, &func.dfg.value_lists).unwrap(), + destination, + args.as_slice(&func.dfg.value_lists)[2..].to_vec(), + ), + _ => panic!("Expected br_icmp {}", func.dfg.display_inst(inst, None)), + }; + + let old_block = func.layout.pp_block(inst); + func.dfg.clear_results(inst); + + let icmp_res = func.dfg.replace(inst).icmp(cond, a, b); + let mut pos = FuncCursor::new(func).after_inst(inst); + pos.use_srcloc(inst); + pos.ins().brnz(icmp_res, destination, &block_args); + + cfg.recompute_block(pos.func, destination); + cfg.recompute_block(pos.func, old_block); +} + +/// Expand illegal `f32const` and `f64const` instructions. +fn expand_fconst( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let ty = func.dfg.value_type(func.dfg.first_result(inst)); + debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty); + + // In the future, we may want to generate constant pool entries for these constants, but for + // now use an `iconst` and a bit cast. + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + let ival = match pos.func.dfg[inst] { + ir::InstructionData::UnaryIeee32 { + opcode: ir::Opcode::F32const, + imm, + } => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())), + ir::InstructionData::UnaryIeee64 { + opcode: ir::Opcode::F64const, + imm, + } => pos.ins().iconst(ir::types::I64, imm.bits() as i64), + _ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)), + }; + pos.func.dfg.replace(inst).bitcast(ty, ival); +} + +/// Expand illegal `stack_load` instructions. +fn expand_stack_load( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + let ty = func.dfg.value_type(func.dfg.first_result(inst)); + let addr_ty = isa.pointer_type(); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (stack_slot, offset) = match pos.func.dfg[inst] { + ir::InstructionData::StackLoad { + opcode: _opcode, + stack_slot, + offset, + } => (stack_slot, offset), + _ => panic!( + "Expected stack_load: {}", + pos.func.dfg.display_inst(inst, None) + ), + }; + + let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset); + + // Stack slots are required to be accessible and aligned. + let mflags = MemFlags::trusted(); + pos.func.dfg.replace(inst).load(ty, mflags, addr, 0); +} + +/// Expand illegal `stack_store` instructions. +fn expand_stack_store( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + let addr_ty = isa.pointer_type(); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (val, stack_slot, offset) = match pos.func.dfg[inst] { + ir::InstructionData::StackStore { + opcode: _opcode, + arg, + stack_slot, + offset, + } => (arg, stack_slot, offset), + _ => panic!( + "Expected stack_store: {}", + pos.func.dfg.display_inst(inst, None) + ), + }; + + let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset); + + let mut mflags = MemFlags::new(); + // Stack slots are required to be accessible and aligned. + mflags.set_notrap(); + mflags.set_aligned(); + pos.func.dfg.replace(inst).store(mflags, val, addr, 0); +} + +/// Split a load into two parts before `iconcat`ing the result together. +fn narrow_load( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (ptr, offset, flags) = match pos.func.dfg[inst] { + ir::InstructionData::Load { + opcode: ir::Opcode::Load, + arg, + offset, + flags, + } => (arg, offset, flags), + _ => panic!("Expected load: {}", pos.func.dfg.display_inst(inst, None)), + }; + + let res_ty = pos.func.dfg.ctrl_typevar(inst); + let small_ty = res_ty.half_width().expect("Can't narrow load"); + + let al = pos.ins().load(small_ty, flags, ptr, offset); + let ah = pos.ins().load( + small_ty, + flags, + ptr, + offset.try_add_i64(8).expect("load offset overflow"), + ); + pos.func.dfg.replace(inst).iconcat(al, ah); +} + +/// Split a store into two parts after `isplit`ing the value. +fn narrow_store( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (val, ptr, offset, flags) = match pos.func.dfg[inst] { + ir::InstructionData::Store { + opcode: ir::Opcode::Store, + args, + offset, + flags, + } => (args[0], args[1], offset, flags), + _ => panic!("Expected store: {}", pos.func.dfg.display_inst(inst, None)), + }; + + let (al, ah) = pos.ins().isplit(val); + pos.ins().store(flags, al, ptr, offset); + pos.ins().store( + flags, + ah, + ptr, + offset.try_add_i64(8).expect("store offset overflow"), + ); + pos.remove_inst(); +} + +/// Expands an illegal iconst value by splitting it into two. +fn narrow_iconst( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &dyn TargetIsa, +) { + let imm: i64 = if let ir::InstructionData::UnaryImm { + opcode: ir::Opcode::Iconst, + imm, + } = &func.dfg[inst] + { + (*imm).into() + } else { + panic!("unexpected instruction in narrow_iconst"); + }; + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let ty = pos.func.dfg.ctrl_typevar(inst); + if isa.pointer_bits() == 32 && ty == I64 { + let low = pos.ins().iconst(I32, imm & 0xffffffff); + let high = pos.ins().iconst(I32, imm >> 32); + // The instruction has as many results as iconcat, so no need to replace them. + pos.func.dfg.replace(inst).iconcat(low, high); + return; + } + + unimplemented!("missing encoding or legalization for iconst.{:?}", ty); +} + +fn narrow_icmp_imm( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + use crate::ir::condcodes::{CondCode, IntCC}; + + let (arg, cond, imm): (ir::Value, IntCC, i64) = match func.dfg[inst] { + ir::InstructionData::IntCompareImm { + opcode: ir::Opcode::IcmpImm, + arg, + cond, + imm, + } => (arg, cond, imm.into()), + _ => panic!("unexpected instruction in narrow_icmp_imm"), + }; + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let ty = pos.func.dfg.ctrl_typevar(inst); + let ty_half = ty.half_width().unwrap(); + + let imm_low = pos + .ins() + .iconst(ty_half, imm & ((1u128 << ty_half.bits()) - 1) as i64); + let imm_high = pos + .ins() + .iconst(ty_half, imm.wrapping_shr(ty_half.bits().into())); + let (arg_low, arg_high) = pos.ins().isplit(arg); + + match cond { + IntCC::Equal => { + let res_low = pos.ins().icmp(cond, arg_low, imm_low); + let res_high = pos.ins().icmp(cond, arg_high, imm_high); + pos.func.dfg.replace(inst).band(res_low, res_high); + } + IntCC::NotEqual => { + let res_low = pos.ins().icmp(cond, arg_low, imm_low); + let res_high = pos.ins().icmp(cond, arg_high, imm_high); + pos.func.dfg.replace(inst).bor(res_low, res_high); + } + IntCC::SignedGreaterThan + | IntCC::SignedGreaterThanOrEqual + | IntCC::SignedLessThan + | IntCC::SignedLessThanOrEqual + | IntCC::UnsignedGreaterThan + | IntCC::UnsignedGreaterThanOrEqual + | IntCC::UnsignedLessThan + | IntCC::UnsignedLessThanOrEqual => { + let b1 = pos.ins().icmp(cond.without_equal(), arg_high, imm_high); + let b2 = pos + .ins() + .icmp(cond.inverse().without_equal(), arg_high, imm_high); + let b3 = pos.ins().icmp(cond.unsigned(), arg_low, imm_low); + let c1 = pos.ins().bnot(b2); + let c2 = pos.ins().band(c1, b3); + pos.func.dfg.replace(inst).bor(b1, c2); + } + _ => unimplemented!("missing legalization for condition {:?}", cond), + } +} diff --git a/cranelift/codegen/src/legalizer/split.rs b/cranelift/codegen/src/legalizer/split.rs new file mode 100644 index 0000000000..ea4a032163 --- /dev/null +++ b/cranelift/codegen/src/legalizer/split.rs @@ -0,0 +1,405 @@ +//! Value splitting. +//! +//! Some value types are too large to fit in registers, so they need to be split into smaller parts +//! that the ISA can operate on. There's two dimensions of splitting, represented by two +//! complementary instruction pairs: +//! +//! - `isplit` and `iconcat` for splitting integer types into smaller integers. +//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same +//! lane types. +//! +//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably +//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers. +//! This breakdown is handled by the ABI lowering. +//! +//! When legalizing a single instruction, it is wrapped in splits and concatenations: +//! +//! ```clif +//! v1 = bxor.i64 v2, v3 +//! ``` +//! +//! becomes: +//! +//! ```clif +//! v20, v21 = isplit v2 +//! v30, v31 = isplit v3 +//! v10 = bxor.i32 v20, v30 +//! v11 = bxor.i32 v21, v31 +//! v1 = iconcat v10, v11 +//! ``` +//! +//! This local expansion approach still leaves the original `i64` values in the code as operands on +//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as +//! values are constantly split and concatenated. +//! +//! # Optimized splitting +//! +//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value, +//! first check if the value is defined by the corresponding concatenation. If so, then just use +//! the two concatenation inputs directly: +//! +//! ```clif +//! v4 = iadd_imm.i64 v1, 1 +//! ``` +//! +//! becomes, using the expanded code from above: +//! +//! ```clif +//! v40, v5 = iadd_imm_cout.i32 v10, 1 +//! v6 = bint.i32 +//! v41 = iadd.i32 v11, v6 +//! v4 = iconcat v40, v41 +//! ``` +//! +//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they +//! can be trivially deleted by a dead code elimination pass. +//! +//! # block arguments +//! +//! If all instructions that produce an `i64` value are legalized as above, we will eventually end +//! up with no `i64` values anywhere, except for block arguments. We can work around this by +//! iteratively splitting block arguments too. That should leave us with no illegal value types +//! anywhere. +//! +//! It is possible to have circular dependencies of block arguments that are never used by any real +//! instructions. These loops will remain in the program. + +use crate::cursor::{Cursor, CursorPosition, FuncCursor}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::{self, Block, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef}; +use alloc::vec::Vec; +use core::iter; +use smallvec::SmallVec; + +/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values +/// if possible. +pub fn isplit( + func: &mut ir::Function, + cfg: &ControlFlowGraph, + pos: CursorPosition, + srcloc: ir::SourceLoc, + value: Value, +) -> (Value, Value) { + split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat) +} + +/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if +/// possible. +pub fn vsplit( + func: &mut ir::Function, + cfg: &ControlFlowGraph, + pos: CursorPosition, + srcloc: ir::SourceLoc, + value: Value, +) -> (Value, Value) { + split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat) +} + +/// After splitting an block argument, we need to go back and fix up all of the predecessor +/// instructions. This is potentially a recursive operation, but we don't implement it recursively +/// since that could use up too muck stack. +/// +/// Instead, the repairs are deferred and placed on a work list in stack form. +struct Repair { + concat: Opcode, + // The argument type after splitting. + split_type: Type, + // The destination block whose arguments have been split. + block: Block, + // Number of the original block argument which has been replaced by the low part. + num: usize, + // Number of the new block argument which represents the high part after the split. + hi_num: usize, +} + +/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode. +fn split_any( + func: &mut ir::Function, + cfg: &ControlFlowGraph, + pos: CursorPosition, + srcloc: ir::SourceLoc, + value: Value, + concat: Opcode, +) -> (Value, Value) { + let mut repairs = Vec::new(); + let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc); + let result = split_value(pos, value, concat, &mut repairs); + + perform_repairs(pos, cfg, repairs); + + result +} + +pub fn split_block_params(func: &mut ir::Function, cfg: &ControlFlowGraph, block: Block) { + let pos = &mut FuncCursor::new(func).at_top(block); + let block_params = pos.func.dfg.block_params(block); + + // Add further splittable types here. + fn type_requires_splitting(ty: Type) -> bool { + ty == ir::types::I128 + } + + // A shortcut. If none of the param types require splitting, exit now. This helps because + // the loop below necessarily has to copy the block params into a new vector, so it's better to + // avoid doing so when possible. + if !block_params + .iter() + .any(|block_param| type_requires_splitting(pos.func.dfg.value_type(*block_param))) + { + return; + } + + let mut repairs = Vec::new(); + for (num, block_param) in block_params.to_vec().into_iter().enumerate() { + if !type_requires_splitting(pos.func.dfg.value_type(block_param)) { + continue; + } + + split_block_param(pos, block, num, block_param, Opcode::Iconcat, &mut repairs); + } + + perform_repairs(pos, cfg, repairs); +} + +fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec) { + // We have split the value requested, and now we may need to fix some block predecessors. + while let Some(repair) = repairs.pop() { + for BlockPredecessor { inst, .. } in cfg.pred_iter(repair.block) { + let branch_opc = pos.func.dfg[inst].opcode(); + debug_assert!( + branch_opc.is_branch(), + "Predecessor not a branch: {}", + pos.func.dfg.display_inst(inst, None) + ); + let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments(); + let mut args = pos.func.dfg[inst] + .take_value_list() + .expect("Branches must have value lists."); + let num_args = args.len(&pos.func.dfg.value_lists); + // Get the old value passed to the block argument we're repairing. + let old_arg = args + .get(num_fixed_args + repair.num, &pos.func.dfg.value_lists) + .expect("Too few branch arguments"); + + // It's possible that the CFG's predecessor list has duplicates. Detect them here. + if pos.func.dfg.value_type(old_arg) == repair.split_type { + pos.func.dfg[inst].put_value_list(args); + continue; + } + + // Split the old argument, possibly causing more repairs to be scheduled. + pos.goto_inst(inst); + + let inst_block = pos.func.layout.inst_block(inst).expect("inst in block"); + + // Insert split values prior to the terminal branch group. + let canonical = pos + .func + .layout + .canonical_branch_inst(&pos.func.dfg, inst_block); + if let Some(first_branch) = canonical { + pos.goto_inst(first_branch); + } + + let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs); + + // The `lo` part replaces the original argument. + *args + .get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists) + .unwrap() = lo; + + // The `hi` part goes at the end. Since multiple repairs may have been scheduled to the + // same block, there could be multiple arguments missing. + if num_args > num_fixed_args + repair.hi_num { + *args + .get_mut( + num_fixed_args + repair.hi_num, + &mut pos.func.dfg.value_lists, + ) + .unwrap() = hi; + } else { + // We need to append one or more arguments. If we're adding more than one argument, + // there must be pending repairs on the stack that will fill in the correct values + // instead of `hi`. + args.extend( + iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args), + &mut pos.func.dfg.value_lists, + ); + } + + // Put the value list back after manipulating it. + pos.func.dfg[inst].put_value_list(args); + } + } +} + +/// Split a single value using the integer or vector semantics given by the `concat` opcode. +/// +/// If the value is defined by a `concat` instruction, just reuse the operand values of that +/// instruction. +/// +/// Return the two new values representing the parts of `value`. +fn split_value( + pos: &mut FuncCursor, + value: Value, + concat: Opcode, + repairs: &mut Vec, +) -> (Value, Value) { + let value = pos.func.dfg.resolve_aliases(value); + let mut reuse = None; + + match pos.func.dfg.value_def(value) { + ValueDef::Result(inst, num) => { + // This is an instruction result. See if the value was created by a `concat` + // instruction. + if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] { + debug_assert_eq!(num, 0); + if opcode == concat { + reuse = Some((args[0], args[1])); + } + } + } + ValueDef::Param(block, num) => { + // This is an block parameter. + // We can split the parameter value unless this is the entry block. + if pos.func.layout.entry_block() != Some(block) { + reuse = Some(split_block_param(pos, block, num, value, concat, repairs)); + } + } + } + + // Did the code above succeed in finding values we can reuse? + if let Some(pair) = reuse { + pair + } else { + // No, we'll just have to insert the requested split instruction at `pos`. Note that `pos` + // has not been moved by the block argument code above when `reuse` is `None`. + match concat { + Opcode::Iconcat => pos.ins().isplit(value), + Opcode::Vconcat => pos.ins().vsplit(value), + _ => panic!("Unhandled concat opcode: {}", concat), + } + } +} + +fn split_block_param( + pos: &mut FuncCursor, + block: Block, + param_num: usize, + value: Value, + concat: Opcode, + repairs: &mut Vec, +) -> (Value, Value) { + // We are going to replace the parameter at `num` with two new arguments. + // Determine the new value types. + let ty = pos.func.dfg.value_type(value); + let split_type = match concat { + Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"), + Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"), + _ => panic!("Unhandled concat opcode: {}", concat), + }; + + // Since the `repairs` stack potentially contains other parameter numbers for + // `block`, avoid shifting and renumbering block parameters. It could invalidate other + // `repairs` entries. + // + // Replace the original `value` with the low part, and append the high part at the + // end of the argument list. + let lo = pos.func.dfg.replace_block_param(value, split_type); + let hi_num = pos.func.dfg.num_block_params(block); + let hi = pos.func.dfg.append_block_param(block, split_type); + + // Now the original value is dangling. Insert a concatenation instruction that can + // compute it from the two new parameters. This also serves as a record of what we + // did so a future call to this function doesn't have to redo the work. + // + // Note that it is safe to move `pos` here since `reuse` was set above, so we don't + // need to insert a split instruction before returning. + pos.goto_first_inst(block); + pos.ins() + .with_result(value) + .Binary(concat, split_type, lo, hi); + + // Finally, splitting the block parameter is not enough. We also have to repair all + // of the predecessor instructions that branch here. + add_repair(concat, split_type, block, param_num, hi_num, repairs); + + (lo, hi) +} + +// Add a repair entry to the work list. +fn add_repair( + concat: Opcode, + split_type: Type, + block: Block, + num: usize, + hi_num: usize, + repairs: &mut Vec, +) { + repairs.push(Repair { + concat, + split_type, + block, + num, + hi_num, + }); +} + +/// Strip concat-split chains. Return a simpler way of computing the same value. +/// +/// Given this input: +/// +/// ```clif +/// v10 = iconcat v1, v2 +/// v11, v12 = isplit v10 +/// ``` +/// +/// This function resolves `v11` to `v1` and `v12` to `v2`. +fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value { + let value = dfg.resolve_aliases(value); + + // Deconstruct a split instruction. + let split_res; + let concat_opc; + let split_arg; + if let ValueDef::Result(inst, num) = dfg.value_def(value) { + split_res = num; + concat_opc = match dfg[inst].opcode() { + Opcode::Isplit => Opcode::Iconcat, + Opcode::Vsplit => Opcode::Vconcat, + _ => return value, + }; + split_arg = dfg.inst_args(inst)[0]; + } else { + return value; + } + + // See if split_arg is defined by a concatenation instruction. + if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) { + if dfg[inst].opcode() == concat_opc { + return dfg.inst_args(inst)[split_res]; + } + } + + value +} + +/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been +/// legalized. +/// +/// The branch argument repairs performed by `split_any()` above may be performed on branches that +/// have not yet been legalized. The repaired arguments can be defined by actual split +/// instructions in that case. +/// +/// After legalizing the instructions computing the value that was split, it is likely that we can +/// avoid depending on the split instruction. Its input probably comes from a concatenation. +pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) { + let mut new_args = SmallVec::<[Value; 32]>::new(); + + for &arg in dfg.inst_args(branch) { + let new_arg = resolve_splits(dfg, arg); + new_args.push(new_arg); + } + + dfg.inst_args_mut(branch).copy_from_slice(&new_args); +} diff --git a/cranelift/codegen/src/legalizer/table.rs b/cranelift/codegen/src/legalizer/table.rs new file mode 100644 index 0000000000..0c4385e96b --- /dev/null +++ b/cranelift/codegen/src/legalizer/table.rs @@ -0,0 +1,113 @@ +//! Legalization of tables. +//! +//! This module exports the `expand_table_addr` function which transforms a `table_addr` +//! instruction into code that depends on the kind of table referenced. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::condcodes::IntCC; +use crate::ir::immediates::Offset32; +use crate::ir::{self, InstBuilder}; +use crate::isa::TargetIsa; + +/// Expand a `table_addr` instruction according to the definition of the table. +pub fn expand_table_addr( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + // Unpack the instruction. + let (table, index, element_offset) = match func.dfg[inst] { + ir::InstructionData::TableAddr { + opcode, + table, + arg, + offset, + } => { + debug_assert_eq!(opcode, ir::Opcode::TableAddr); + (table, arg, offset) + } + _ => panic!("Wanted table_addr: {}", func.dfg.display_inst(inst, None)), + }; + + dynamic_addr(inst, table, index, element_offset, func); +} + +/// Expand a `table_addr` for a dynamic table. +fn dynamic_addr( + inst: ir::Inst, + table: ir::Table, + index: ir::Value, + element_offset: Offset32, + func: &mut ir::Function, +) { + let bound_gv = func.tables[table].bound_gv; + let index_ty = func.dfg.value_type(index); + let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start with the bounds check. Trap if `index + 1 > bound`. + let bound = pos.ins().global_value(index_ty, bound_gv); + + // `index > bound - 1` is the same as `index >= bound`. + let oob = pos + .ins() + .icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound); + pos.ins().trapnz(oob, ir::TrapCode::TableOutOfBounds); + + compute_addr( + inst, + table, + addr_ty, + index, + index_ty, + element_offset, + pos.func, + ); +} + +/// Emit code for the base address computation of a `table_addr` instruction. +fn compute_addr( + inst: ir::Inst, + table: ir::Table, + addr_ty: ir::Type, + mut index: ir::Value, + index_ty: ir::Type, + element_offset: Offset32, + func: &mut ir::Function, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Convert `index` to `addr_ty`. + if index_ty != addr_ty { + index = pos.ins().uextend(addr_ty, index); + } + + // Add the table base address base + let base_gv = pos.func.tables[table].base_gv; + let base = pos.ins().global_value(addr_ty, base_gv); + + let element_size = pos.func.tables[table].element_size; + let mut offset; + let element_size: u64 = element_size.into(); + if element_size == 1 { + offset = index; + } else if element_size.is_power_of_two() { + offset = pos + .ins() + .ishl_imm(index, i64::from(element_size.trailing_zeros())); + } else { + offset = pos.ins().imul_imm(index, element_size as i64); + } + + if element_offset == Offset32::new(0) { + pos.func.dfg.replace(inst).iadd(base, offset); + } else { + let imm: i64 = element_offset.into(); + offset = pos.ins().iadd(base, offset); + pos.func.dfg.replace(inst).iadd_imm(offset, imm); + } +} diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs new file mode 100644 index 0000000000..772562b916 --- /dev/null +++ b/cranelift/codegen/src/lib.rs @@ -0,0 +1,110 @@ +//! Cranelift code generation library. + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "std", deny(unstable_features))] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature="cargo-clippy", allow( +// Produces only a false positive: + clippy::while_let_loop, +// Produces many false positives, but did produce some valid lints, now fixed: + clippy::needless_lifetimes, +// Generated code makes some style transgressions, but readability doesn't suffer much: + clippy::many_single_char_names, + clippy::identity_op, + clippy::needless_borrow, + clippy::cast_lossless, + clippy::unreadable_literal, + clippy::assign_op_pattern, + clippy::empty_line_after_outer_attr, +// Hard to avoid in generated code: + clippy::cognitive_complexity, + clippy::too_many_arguments, +// Code generator doesn't have a way to collapse identical arms: + clippy::match_same_arms, +// These are relatively minor style issues, but would be easy to fix: + clippy::new_without_default, + clippy::should_implement_trait, + clippy::len_without_is_empty))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +#[allow(unused_imports)] // #[macro_use] is required for no_std +#[macro_use] +extern crate alloc; + +#[cfg(feature = "std")] +#[macro_use] +extern crate std; + +#[cfg(not(feature = "std"))] +use hashbrown::{hash_map, HashMap, HashSet}; +#[cfg(feature = "std")] +use std::collections::{hash_map, HashMap, HashSet}; + +pub use crate::context::Context; +pub use crate::legalizer::legalize_function; +pub use crate::value_label::{ValueLabelsRanges, ValueLocRange}; +pub use crate::verifier::verify_function; +pub use crate::write::write_function; + +pub use cranelift_bforest as bforest; +pub use cranelift_entity as entity; + +pub mod binemit; +pub mod cfg_printer; +pub mod cursor; +pub mod dbg; +pub mod dominator_tree; +pub mod flowgraph; +pub mod ir; +pub mod isa; +pub mod loop_analysis; +pub mod print_errors; +pub mod settings; +pub mod timing; +pub mod verifier; +pub mod write; + +pub use crate::entity::packed_option; + +mod abi; +mod bitset; +mod constant_hash; +mod context; +mod dce; +mod divconst_magic_numbers; +mod fx; +mod iterators; +mod legalizer; +mod licm; +mod nan_canonicalization; +mod partition_slice; +mod postopt; +mod predicates; +mod redundant_reload_remover; +mod regalloc; +mod result; +mod scoped_hash_map; +mod simple_gvn; +mod simple_preopt; +mod stack_layout; +mod topo_order; +mod unreachable_code; +mod value_label; + +pub use crate::result::{CodegenError, CodegenResult}; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/codegen/src/licm.rs b/cranelift/codegen/src/licm.rs new file mode 100644 index 0000000000..75000b5297 --- /dev/null +++ b/cranelift/codegen/src/licm.rs @@ -0,0 +1,247 @@ +//! A Loop Invariant Code Motion optimization pass + +use crate::cursor::{Cursor, EncCursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::entity::{EntityList, ListPool}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::fx::FxHashSet; +use crate::ir::{ + Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value, +}; +use crate::isa::TargetIsa; +use crate::loop_analysis::{Loop, LoopAnalysis}; +use crate::timing; +use alloc::vec::Vec; + +/// Performs the LICM pass by detecting loops within the CFG and moving +/// loop-invariant instructions out of them. +/// Changes the CFG and domtree in-place during the operation. +pub fn do_licm( + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + loop_analysis: &mut LoopAnalysis, +) { + let _tt = timing::licm(); + debug_assert!(cfg.is_valid()); + debug_assert!(domtree.is_valid()); + debug_assert!(loop_analysis.is_valid()); + + for lp in loop_analysis.loops() { + // For each loop that we want to optimize we determine the set of loop-invariant + // instructions + let invariant_insts = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis); + // Then we create the loop's pre-header and fill it with the invariant instructions + // Then we remove the invariant instructions from the loop body + if !invariant_insts.is_empty() { + // If the loop has a natural pre-header we use it, otherwise we create it. + let mut pos; + match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) { + None => { + let pre_header = + create_pre_header(isa, loop_analysis.loop_header(lp), func, cfg, domtree); + pos = FuncCursor::new(func).at_last_inst(pre_header); + } + // If there is a natural pre-header we insert new instructions just before the + // related jumping instruction (which is not necessarily at the end). + Some((_, last_inst)) => { + pos = FuncCursor::new(func).at_inst(last_inst); + } + }; + // The last instruction of the pre-header is the termination instruction (usually + // a jump) so we need to insert just before this. + for inst in invariant_insts { + pos.insert_inst(inst); + } + } + } + // We have to recompute the domtree to account for the changes + cfg.compute(func); + domtree.compute(func, cfg); +} + +// Insert a pre-header before the header, modifying the function layout and CFG to reflect it. +// A jump instruction to the header is placed at the end of the pre-header. +fn create_pre_header( + isa: &dyn TargetIsa, + header: Block, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &DominatorTree, +) -> Block { + let pool = &mut ListPool::::new(); + let header_args_values = func.dfg.block_params(header).to_vec(); + let header_args_types: Vec = header_args_values + .into_iter() + .map(|val| func.dfg.value_type(val)) + .collect(); + let pre_header = func.dfg.make_block(); + let mut pre_header_args_value: EntityList = EntityList::new(); + for typ in header_args_types { + pre_header_args_value.push(func.dfg.append_block_param(pre_header, typ), pool); + } + for BlockPredecessor { + inst: last_inst, .. + } in cfg.pred_iter(header) + { + // We only follow normal edges (not the back edges) + if !domtree.dominates(header, last_inst, &func.layout) { + func.change_branch_destination(last_inst, pre_header); + } + } + { + let mut pos = EncCursor::new(func, isa).at_top(header); + // Inserts the pre-header at the right place in the layout. + pos.insert_block(pre_header); + pos.next_inst(); + pos.ins().jump(header, pre_header_args_value.as_slice(pool)); + } + pre_header +} + +// Detects if a loop header has a natural pre-header. +// +// A loop header has a pre-header if there is only one predecessor that the header doesn't +// dominate. +// Returns the pre-header Block and the instruction jumping to the header. +fn has_pre_header( + layout: &Layout, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + header: Block, +) -> Option<(Block, Inst)> { + let mut result = None; + for BlockPredecessor { + block: pred_block, + inst: branch_inst, + } in cfg.pred_iter(header) + { + // We only count normal edges (not the back edges) + if !domtree.dominates(header, branch_inst, layout) { + if result.is_some() { + // We have already found one, there are more than one + return None; + } + if branch_inst != layout.last_inst(pred_block).unwrap() + || cfg.succ_iter(pred_block).nth(1).is_some() + { + // It's along a critical edge, so don't use it. + return None; + } + result = Some((pred_block, branch_inst)); + } + } + result +} + +/// Test whether the given opcode is unsafe to even consider for LICM. +fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { + opcode.can_store() + || opcode.is_call() + || opcode.is_branch() + || opcode.is_terminator() + || opcode.is_return() + || opcode.can_trap() + || opcode.other_side_effects() + || opcode.writes_cpu_flags() +} + +fn is_unsafe_load(inst_data: &InstructionData) -> bool { + match *inst_data { + InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => { + !flags.readonly() || !flags.notrap() + } + _ => inst_data.opcode().can_load(), + } +} + +/// Test whether the given instruction is loop-invariant. +fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet) -> bool { + if trivially_unsafe_for_licm(dfg[inst].opcode()) { + return false; + } + + if is_unsafe_load(&dfg[inst]) { + return false; + } + + let inst_args = dfg.inst_args(inst); + for arg in inst_args { + let arg = dfg.resolve_aliases(*arg); + if loop_values.contains(&arg) { + return false; + } + } + true +} + +// Traverses a loop in reverse post-order from a header block and identify loop-invariant +// instructions. These loop-invariant instructions are then removed from the code and returned +// (in reverse post-order) for later use. +fn remove_loop_invariant_instructions( + lp: Loop, + func: &mut Function, + cfg: &ControlFlowGraph, + loop_analysis: &LoopAnalysis, +) -> Vec { + let mut loop_values: FxHashSet = FxHashSet(); + let mut invariant_insts: Vec = Vec::new(); + let mut pos = FuncCursor::new(func); + // We traverse the loop block in reverse post-order. + for block in postorder_blocks_loop(loop_analysis, cfg, lp).iter().rev() { + // Arguments of the block are loop values + for val in pos.func.dfg.block_params(*block) { + loop_values.insert(*val); + } + pos.goto_top(*block); + #[cfg_attr(feature = "cargo-clippy", allow(clippy::block_in_if_condition_stmt))] + while let Some(inst) = pos.next_inst() { + if is_loop_invariant(inst, &pos.func.dfg, &loop_values) { + // If all the instruction's argument are defined outside the loop + // then this instruction is loop-invariant + invariant_insts.push(inst); + // We remove it from the loop + pos.remove_inst_and_step_back(); + } else { + // If the instruction is not loop-invariant we push its results in the set of + // loop values + for out in pos.func.dfg.inst_results(inst) { + loop_values.insert(*out); + } + } + } + } + invariant_insts +} + +/// Return blocks from a loop in post-order, starting from an entry point in the block. +fn postorder_blocks_loop( + loop_analysis: &LoopAnalysis, + cfg: &ControlFlowGraph, + lp: Loop, +) -> Vec { + let mut grey = FxHashSet(); + let mut black = FxHashSet(); + let mut stack = vec![loop_analysis.loop_header(lp)]; + let mut postorder = Vec::new(); + + while !stack.is_empty() { + let node = stack.pop().unwrap(); + if !grey.contains(&node) { + // This is a white node. Mark it as gray. + grey.insert(node); + stack.push(node); + // Get any children we've never seen before. + for child in cfg.succ_iter(node) { + if loop_analysis.is_in_loop(child, lp) && !grey.contains(&child) { + stack.push(child); + } + } + } else if !black.contains(&node) { + postorder.push(node); + black.insert(node); + } + } + postorder +} diff --git a/cranelift/codegen/src/loop_analysis.rs b/cranelift/codegen/src/loop_analysis.rs new file mode 100644 index 0000000000..dc659bc5f2 --- /dev/null +++ b/cranelift/codegen/src/loop_analysis.rs @@ -0,0 +1,349 @@ +//! A loop analysis represented as mappings of loops to their header Block +//! and parent in the loop tree. + +use crate::dominator_tree::DominatorTree; +use crate::entity::entity_impl; +use crate::entity::SecondaryMap; +use crate::entity::{Keys, PrimaryMap}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::{Block, Function, Layout}; +use crate::packed_option::PackedOption; +use crate::timing; +use alloc::vec::Vec; + +/// A opaque reference to a code loop. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Loop(u32); +entity_impl!(Loop, "loop"); + +/// Loop tree information for a single function. +/// +/// Loops are referenced by the Loop object, and for each loop you can access its header block, +/// its eventual parent in the loop tree and all the block belonging to the loop. +pub struct LoopAnalysis { + loops: PrimaryMap, + block_loop_map: SecondaryMap>, + valid: bool, +} + +struct LoopData { + header: Block, + parent: PackedOption, +} + +impl LoopData { + /// Creates a `LoopData` object with the loop header and its eventual parent in the loop tree. + pub fn new(header: Block, parent: Option) -> Self { + Self { + header, + parent: parent.into(), + } + } +} + +/// Methods for querying the loop analysis. +impl LoopAnalysis { + /// Allocate a new blank loop analysis struct. Use `compute` to compute the loop analysis for + /// a function. + pub fn new() -> Self { + Self { + valid: false, + loops: PrimaryMap::new(), + block_loop_map: SecondaryMap::new(), + } + } + + /// Returns all the loops contained in a function. + pub fn loops(&self) -> Keys { + self.loops.keys() + } + + /// Returns the header block of a particular loop. + /// + /// The characteristic property of a loop header block is that it dominates some of its + /// predecessors. + pub fn loop_header(&self, lp: Loop) -> Block { + self.loops[lp].header + } + + /// Return the eventual parent of a loop in the loop tree. + pub fn loop_parent(&self, lp: Loop) -> Option { + self.loops[lp].parent.expand() + } + + /// Determine if an Block belongs to a loop by running a finger along the loop tree. + /// + /// Returns `true` if `block` is in loop `lp`. + pub fn is_in_loop(&self, block: Block, lp: Loop) -> bool { + let block_loop = self.block_loop_map[block]; + match block_loop.expand() { + None => false, + Some(block_loop) => self.is_child_loop(block_loop, lp), + } + } + + /// Determines if a loop is contained in another loop. + /// + /// `is_child_loop(child,parent)` returns `true` if and only if `child` is a child loop of + /// `parent` (or `child == parent`). + pub fn is_child_loop(&self, child: Loop, parent: Loop) -> bool { + let mut finger = Some(child); + while let Some(finger_loop) = finger { + if finger_loop == parent { + return true; + } + finger = self.loop_parent(finger_loop); + } + false + } +} + +impl LoopAnalysis { + /// Detects the loops in a function. Needs the control flow graph and the dominator tree. + pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree) { + let _tt = timing::loop_analysis(); + self.loops.clear(); + self.block_loop_map.clear(); + self.block_loop_map.resize(func.dfg.num_blocks()); + self.find_loop_headers(cfg, domtree, &func.layout); + self.discover_loop_blocks(cfg, domtree, &func.layout); + self.valid = true; + } + + /// Check if the loop analysis is in a valid state. + /// + /// Note that this doesn't perform any kind of validity checks. It simply checks if the + /// `compute()` method has been called since the last `clear()`. It does not check that the + /// loop analysis is consistent with the CFG. + pub fn is_valid(&self) -> bool { + self.valid + } + + /// Clear all the data structures contained in the loop analysis. This will leave the + /// analysis in a similar state to a context returned by `new()` except that allocated + /// memory be retained. + pub fn clear(&mut self) { + self.loops.clear(); + self.block_loop_map.clear(); + self.valid = false; + } + + // Traverses the CFG in reverse postorder and create a loop object for every block having a + // back edge. + fn find_loop_headers( + &mut self, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + layout: &Layout, + ) { + // We traverse the CFG in reverse postorder + for &block in domtree.cfg_postorder().iter().rev() { + for BlockPredecessor { + inst: pred_inst, .. + } in cfg.pred_iter(block) + { + // If the block dominates one of its predecessors it is a back edge + if domtree.dominates(block, pred_inst, layout) { + // This block is a loop header, so we create its associated loop + let lp = self.loops.push(LoopData::new(block, None)); + self.block_loop_map[block] = lp.into(); + break; + // We break because we only need one back edge to identify a loop header. + } + } + } + } + + // Intended to be called after `find_loop_headers`. For each detected loop header, + // discovers all the block belonging to the loop and its inner loops. After a call to this + // function, the loop tree is fully constructed. + fn discover_loop_blocks( + &mut self, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + layout: &Layout, + ) { + let mut stack: Vec = Vec::new(); + // We handle each loop header in reverse order, corresponding to a pseudo postorder + // traversal of the graph. + for lp in self.loops().rev() { + for BlockPredecessor { + block: pred, + inst: pred_inst, + } in cfg.pred_iter(self.loops[lp].header) + { + // We follow the back edges + if domtree.dominates(self.loops[lp].header, pred_inst, layout) { + stack.push(pred); + } + } + while let Some(node) = stack.pop() { + let continue_dfs: Option; + match self.block_loop_map[node].expand() { + None => { + // The node hasn't been visited yet, we tag it as part of the loop + self.block_loop_map[node] = PackedOption::from(lp); + continue_dfs = Some(node); + } + Some(node_loop) => { + // We copy the node_loop into a mutable reference passed along the while + let mut node_loop = node_loop; + // The node is part of a loop, which can be lp or an inner loop + let mut node_loop_parent_option = self.loops[node_loop].parent; + while let Some(node_loop_parent) = node_loop_parent_option.expand() { + if node_loop_parent == lp { + // We have encountered lp so we stop (already visited) + break; + } else { + // + node_loop = node_loop_parent; + // We lookup the parent loop + node_loop_parent_option = self.loops[node_loop].parent; + } + } + // Now node_loop_parent is either: + // - None and node_loop is an new inner loop of lp + // - Some(...) and the initial node_loop was a known inner loop of lp + match node_loop_parent_option.expand() { + Some(_) => continue_dfs = None, + None => { + if node_loop != lp { + self.loops[node_loop].parent = lp.into(); + continue_dfs = Some(self.loops[node_loop].header) + } else { + // If lp is a one-block loop then we make sure we stop + continue_dfs = None + } + } + } + } + } + // Now we have handled the popped node and need to continue the DFS by adding the + // predecessors of that node + if let Some(continue_dfs) = continue_dfs { + for BlockPredecessor { block: pred, .. } in cfg.pred_iter(continue_dfs) { + stack.push(pred) + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + use crate::dominator_tree::DominatorTree; + use crate::flowgraph::ControlFlowGraph; + use crate::ir::{types, Function, InstBuilder}; + use crate::loop_analysis::{Loop, LoopAnalysis}; + use alloc::vec::Vec; + + #[test] + fn nested_loops_detection() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let block3 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block0, types::I32); + + { + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + cur.ins().jump(block1, &[]); + + cur.insert_block(block1); + cur.ins().jump(block2, &[]); + + cur.insert_block(block2); + cur.ins().brnz(cond, block1, &[]); + cur.ins().jump(block3, &[]); + + cur.insert_block(block3); + cur.ins().brnz(cond, block0, &[]); + } + + let mut loop_analysis = LoopAnalysis::new(); + let mut cfg = ControlFlowGraph::new(); + let mut domtree = DominatorTree::new(); + cfg.compute(&func); + domtree.compute(&func, &cfg); + loop_analysis.compute(&func, &cfg, &domtree); + + let loops = loop_analysis.loops().collect::>(); + assert_eq!(loops.len(), 2); + assert_eq!(loop_analysis.loop_header(loops[0]), block0); + assert_eq!(loop_analysis.loop_header(loops[1]), block1); + assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0])); + assert_eq!(loop_analysis.loop_parent(loops[0]), None); + assert_eq!(loop_analysis.is_in_loop(block0, loops[0]), true); + assert_eq!(loop_analysis.is_in_loop(block0, loops[1]), false); + assert_eq!(loop_analysis.is_in_loop(block1, loops[1]), true); + assert_eq!(loop_analysis.is_in_loop(block1, loops[0]), true); + assert_eq!(loop_analysis.is_in_loop(block2, loops[1]), true); + assert_eq!(loop_analysis.is_in_loop(block2, loops[0]), true); + assert_eq!(loop_analysis.is_in_loop(block3, loops[0]), true); + assert_eq!(loop_analysis.is_in_loop(block0, loops[1]), false); + } + + #[test] + fn complex_loop_detection() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + let block3 = func.dfg.make_block(); + let block4 = func.dfg.make_block(); + let block5 = func.dfg.make_block(); + let cond = func.dfg.append_block_param(block0, types::I32); + + { + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + cur.ins().brnz(cond, block1, &[]); + cur.ins().jump(block3, &[]); + + cur.insert_block(block1); + cur.ins().jump(block2, &[]); + + cur.insert_block(block2); + cur.ins().brnz(cond, block1, &[]); + cur.ins().jump(block5, &[]); + + cur.insert_block(block3); + cur.ins().jump(block4, &[]); + + cur.insert_block(block4); + cur.ins().brnz(cond, block3, &[]); + cur.ins().jump(block5, &[]); + + cur.insert_block(block5); + cur.ins().brnz(cond, block0, &[]); + } + + let mut loop_analysis = LoopAnalysis::new(); + let mut cfg = ControlFlowGraph::new(); + let mut domtree = DominatorTree::new(); + cfg.compute(&func); + domtree.compute(&func, &cfg); + loop_analysis.compute(&func, &cfg, &domtree); + + let loops = loop_analysis.loops().collect::>(); + assert_eq!(loops.len(), 3); + assert_eq!(loop_analysis.loop_header(loops[0]), block0); + assert_eq!(loop_analysis.loop_header(loops[1]), block1); + assert_eq!(loop_analysis.loop_header(loops[2]), block3); + assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0])); + assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0])); + assert_eq!(loop_analysis.loop_parent(loops[0]), None); + assert_eq!(loop_analysis.is_in_loop(block0, loops[0]), true); + assert_eq!(loop_analysis.is_in_loop(block1, loops[1]), true); + assert_eq!(loop_analysis.is_in_loop(block2, loops[1]), true); + assert_eq!(loop_analysis.is_in_loop(block3, loops[2]), true); + assert_eq!(loop_analysis.is_in_loop(block4, loops[2]), true); + assert_eq!(loop_analysis.is_in_loop(block5, loops[0]), true); + } +} diff --git a/cranelift/codegen/src/nan_canonicalization.rs b/cranelift/codegen/src/nan_canonicalization.rs new file mode 100644 index 0000000000..e7c0e53419 --- /dev/null +++ b/cranelift/codegen/src/nan_canonicalization.rs @@ -0,0 +1,85 @@ +//! A NaN-canonicalizing rewriting pass. Patch floating point arithmetic +//! instructions that may return a NaN result with a sequence of operations +//! that will replace nondeterministic NaN's with a single canonical NaN value. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::ir::condcodes::FloatCC; +use crate::ir::immediates::{Ieee32, Ieee64}; +use crate::ir::types; +use crate::ir::types::Type; +use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value}; +use crate::timing; + +// Canonical 32-bit and 64-bit NaN values. +static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000; +static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000; + +/// Perform the NaN canonicalization pass. +pub fn do_nan_canonicalization(func: &mut Function) { + let _tt = timing::canonicalize_nans(); + let mut pos = FuncCursor::new(func); + while let Some(_block) = pos.next_block() { + while let Some(inst) = pos.next_inst() { + if is_fp_arith(&mut pos, inst) { + add_nan_canon_seq(&mut pos, inst); + } + } + } +} + +/// Returns true/false based on whether the instruction is a floating-point +/// arithmetic operation. This ignores operations like `fneg`, `fabs`, or +/// `fcopysign` that only operate on the sign bit of a floating point value. +fn is_fp_arith(pos: &mut FuncCursor, inst: Inst) -> bool { + match pos.func.dfg[inst] { + InstructionData::Unary { opcode, .. } => { + opcode == Opcode::Ceil + || opcode == Opcode::Floor + || opcode == Opcode::Nearest + || opcode == Opcode::Sqrt + || opcode == Opcode::Trunc + } + InstructionData::Binary { opcode, .. } => { + opcode == Opcode::Fadd + || opcode == Opcode::Fdiv + || opcode == Opcode::Fmax + || opcode == Opcode::Fmin + || opcode == Opcode::Fmul + || opcode == Opcode::Fsub + } + InstructionData::Ternary { opcode, .. } => opcode == Opcode::Fma, + _ => false, + } +} + +/// Append a sequence of canonicalizing instructions after the given instruction. +fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) { + // Select the instruction result, result type. Replace the instruction + // result and step forward before inserting the canonicalization sequence. + let val = pos.func.dfg.first_result(inst); + let val_type = pos.func.dfg.value_type(val); + let new_res = pos.func.dfg.replace_result(val, val_type); + let _next_inst = pos.next_inst().expect("block missing terminator!"); + + // Insert a comparison instruction, to check if `inst_res` is NaN. Select + // the canonical NaN value if `val` is NaN, assign the result to `inst`. + let is_nan = pos.ins().fcmp(FloatCC::NotEqual, new_res, new_res); + let canon_nan = insert_nan_const(pos, val_type); + pos.ins() + .with_result(val) + .select(is_nan, canon_nan, new_res); + + pos.prev_inst(); // Step backwards so the pass does not skip instructions. +} + +/// Insert a canonical 32-bit or 64-bit NaN constant at the current position. +fn insert_nan_const(pos: &mut FuncCursor, nan_type: Type) -> Value { + match nan_type { + types::F32 => pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)), + types::F64 => pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)), + _ => { + // Panic if the type given was not an IEEE floating point type. + panic!("Could not canonicalize NaN: Unexpected result type found."); + } + } +} diff --git a/cranelift/codegen/src/partition_slice.rs b/cranelift/codegen/src/partition_slice.rs new file mode 100644 index 0000000000..959f8c1102 --- /dev/null +++ b/cranelift/codegen/src/partition_slice.rs @@ -0,0 +1,97 @@ +//! Rearrange the elements in a slice according to a predicate. + +use core::mem; + +/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede +/// the elements where `p(t)` is false. +/// +/// The order of elements is not preserved, unless the slice is already partitioned. +/// +/// Returns the number of elements where `p(t)` is true. +pub fn partition_slice(s: &mut [T], mut p: F) -> usize +where + F: FnMut(&T) -> bool, +{ + // The iterator works like a deque which we can pop from both ends. + let mut i = s.iter_mut(); + + // Number of elements for which the predicate is known to be true. + let mut pos = 0; + + loop { + // Find the first element for which the predicate fails. + let head = loop { + match i.next() { + Some(head) => { + if !p(&head) { + break head; + } + } + None => return pos, + } + pos += 1; + }; + + // Find the last element for which the predicate succeeds. + let tail = loop { + match i.next_back() { + Some(tail) => { + if p(&tail) { + break tail; + } + } + None => return pos, + } + }; + + // Swap the two elements into the right order. + mem::swap(head, tail); + pos += 1; + } +} + +#[cfg(test)] +mod tests { + use super::partition_slice; + use alloc::vec::Vec; + + fn check(x: &[u32], want: &[u32]) { + assert_eq!(x.len(), want.len()); + let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count(); + let mut v = Vec::new(); + v.extend(x.iter().cloned()); + let count = partition_slice(&mut v[..], |&x| x % 10 == 0); + assert_eq!(v, want); + assert_eq!(count, want_count); + } + + #[test] + fn empty() { + check(&[], &[]); + } + + #[test] + fn singles() { + check(&[0], &[0]); + check(&[1], &[1]); + check(&[10], &[10]); + } + + #[test] + fn doubles() { + check(&[0, 0], &[0, 0]); + check(&[0, 5], &[0, 5]); + check(&[5, 0], &[0, 5]); + check(&[5, 4], &[5, 4]); + } + + #[test] + fn longer() { + check(&[1, 2, 3], &[1, 2, 3]); + check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required. + check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required. + check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required. + check(&[1, 20, 3, 10], &[10, 20, 3, 1]); + check(&[20, 3, 10, 1], &[20, 10, 3, 1]); + } +} diff --git a/cranelift/codegen/src/postopt.rs b/cranelift/codegen/src/postopt.rs new file mode 100644 index 0000000000..42121817d5 --- /dev/null +++ b/cranelift/codegen/src/postopt.rs @@ -0,0 +1,386 @@ +//! A post-legalization rewriting pass. + +#![allow(non_snake_case)] + +use crate::cursor::{Cursor, EncCursor}; +use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; +use crate::ir::dfg::ValueDef; +use crate::ir::immediates::{Imm64, Offset32}; +use crate::ir::instructions::{Opcode, ValueList}; +use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value}; +use crate::isa::TargetIsa; +use crate::timing; + +/// Information collected about a compare+branch sequence. +struct CmpBrInfo { + /// The branch instruction. + br_inst: Inst, + /// The icmp, icmp_imm, or fcmp instruction. + cmp_inst: Inst, + /// The destination of the branch. + destination: Block, + /// The arguments of the branch. + args: ValueList, + /// The first argument to the comparison. The second is in the `kind` field. + cmp_arg: Value, + /// If the branch is `brz` rather than `brnz`, we need to invert the condition + /// before the branch. + invert_branch_cond: bool, + /// The kind of comparison, and the second argument. + kind: CmpBrKind, +} + +enum CmpBrKind { + Icmp { cond: IntCC, arg: Value }, + IcmpImm { cond: IntCC, imm: Imm64 }, + Fcmp { cond: FloatCC, arg: Value }, +} + +/// Optimize comparisons to use flags values, to avoid materializing conditions +/// in integer registers. +/// +/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff +/// sequences. +fn optimize_cpu_flags( + pos: &mut EncCursor, + inst: Inst, + last_flags_clobber: Option, + isa: &dyn TargetIsa, +) { + // Look for compare and branch patterns. + // This code could be considerably simplified with non-lexical lifetimes. + let info = match pos.func.dfg[inst] { + InstructionData::Branch { + opcode, + destination, + ref args, + } => { + let first_arg = args.first(&pos.func.dfg.value_lists).unwrap(); + let invert_branch_cond = match opcode { + Opcode::Brz => true, + Opcode::Brnz => false, + _ => panic!(), + }; + if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) { + match pos.func.dfg[cond_inst] { + InstructionData::IntCompare { + cond, + args: cmp_args, + .. + } => CmpBrInfo { + br_inst: inst, + cmp_inst: cond_inst, + destination, + args: args.clone(), + cmp_arg: cmp_args[0], + invert_branch_cond, + kind: CmpBrKind::Icmp { + cond, + arg: cmp_args[1], + }, + }, + InstructionData::IntCompareImm { + cond, + arg: cmp_arg, + imm: cmp_imm, + .. + } => CmpBrInfo { + br_inst: inst, + cmp_inst: cond_inst, + destination, + args: args.clone(), + cmp_arg, + invert_branch_cond, + kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm }, + }, + InstructionData::FloatCompare { + cond, + args: cmp_args, + .. + } => CmpBrInfo { + br_inst: inst, + cmp_inst: cond_inst, + destination, + args: args.clone(), + cmp_arg: cmp_args[0], + invert_branch_cond, + kind: CmpBrKind::Fcmp { + cond, + arg: cmp_args[1], + }, + }, + _ => return, + } + } else { + return; + } + } + // TODO: trapif, trueif, selectif, and their ff counterparts. + _ => return, + }; + + // If any instructions clobber the flags between the comparison and the branch, + // don't optimize them. + if last_flags_clobber != Some(info.cmp_inst) { + return; + } + + // We found a compare+branch pattern. Transform it to use flags. + let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec(); + pos.goto_inst(info.cmp_inst); + pos.use_srcloc(info.cmp_inst); + match info.kind { + CmpBrKind::Icmp { mut cond, arg } => { + let flags = pos.ins().ifcmp(info.cmp_arg, arg); + pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags); + if info.invert_branch_cond { + cond = cond.inverse(); + } + pos.func + .dfg + .replace(info.br_inst) + .brif(cond, flags, info.destination, &args); + } + CmpBrKind::IcmpImm { mut cond, imm } => { + let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm); + pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags); + if info.invert_branch_cond { + cond = cond.inverse(); + } + pos.func + .dfg + .replace(info.br_inst) + .brif(cond, flags, info.destination, &args); + } + CmpBrKind::Fcmp { mut cond, arg } => { + let flags = pos.ins().ffcmp(info.cmp_arg, arg); + pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags); + if info.invert_branch_cond { + cond = cond.inverse(); + } + pos.func + .dfg + .replace(info.br_inst) + .brff(cond, flags, info.destination, &args); + } + } + let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok(); + debug_assert!(ok); + let ok = pos.func.update_encoding(info.br_inst, isa).is_ok(); + debug_assert!(ok); +} + +struct MemOpInfo { + opcode: Opcode, + itype: Type, + arg: Value, + st_arg: Option, + flags: MemFlags, + offset: Offset32, +} + +fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) { + // Look for simple loads and stores we can optimize. + let info = match pos.func.dfg[inst] { + InstructionData::Load { + opcode, + arg, + flags, + offset, + } => MemOpInfo { + opcode, + itype: pos.func.dfg.ctrl_typevar(inst), + arg, + st_arg: None, + flags, + offset, + }, + InstructionData::Store { + opcode, + args, + flags, + offset, + } => MemOpInfo { + opcode, + itype: pos.func.dfg.ctrl_typevar(inst), + arg: args[1], + st_arg: Some(args[0]), + flags, + offset, + }, + _ => return, + }; + + // Examine the instruction that defines the address operand. + if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) { + match pos.func.dfg[result_inst] { + InstructionData::Binary { + opcode: Opcode::Iadd, + args, + } => match info.opcode { + // Operand is an iadd. Fold it into a memory address with a complex address mode. + Opcode::Load => { + pos.func.dfg.replace(inst).load_complex( + info.itype, + info.flags, + &args, + info.offset, + ); + } + Opcode::Uload8 => { + pos.func.dfg.replace(inst).uload8_complex( + info.itype, + info.flags, + &args, + info.offset, + ); + } + Opcode::Sload8 => { + pos.func.dfg.replace(inst).sload8_complex( + info.itype, + info.flags, + &args, + info.offset, + ); + } + Opcode::Uload16 => { + pos.func.dfg.replace(inst).uload16_complex( + info.itype, + info.flags, + &args, + info.offset, + ); + } + Opcode::Sload16 => { + pos.func.dfg.replace(inst).sload16_complex( + info.itype, + info.flags, + &args, + info.offset, + ); + } + Opcode::Uload32 => { + pos.func + .dfg + .replace(inst) + .uload32_complex(info.flags, &args, info.offset); + } + Opcode::Sload32 => { + pos.func + .dfg + .replace(inst) + .sload32_complex(info.flags, &args, info.offset); + } + Opcode::Store => { + pos.func.dfg.replace(inst).store_complex( + info.flags, + info.st_arg.unwrap(), + &args, + info.offset, + ); + } + Opcode::Istore8 => { + pos.func.dfg.replace(inst).istore8_complex( + info.flags, + info.st_arg.unwrap(), + &args, + info.offset, + ); + } + Opcode::Istore16 => { + pos.func.dfg.replace(inst).istore16_complex( + info.flags, + info.st_arg.unwrap(), + &args, + info.offset, + ); + } + Opcode::Istore32 => { + pos.func.dfg.replace(inst).istore32_complex( + info.flags, + info.st_arg.unwrap(), + &args, + info.offset, + ); + } + _ => panic!("Unsupported load or store opcode"), + }, + InstructionData::BinaryImm { + opcode: Opcode::IaddImm, + arg, + imm, + } => match pos.func.dfg[inst] { + // Operand is an iadd_imm. Fold the immediate into the offset if possible. + InstructionData::Load { + arg: ref mut load_arg, + ref mut offset, + .. + } => { + if let Some(imm) = offset.try_add_i64(imm.into()) { + *load_arg = arg; + *offset = imm; + } else { + // Overflow. + return; + } + } + InstructionData::Store { + args: ref mut store_args, + ref mut offset, + .. + } => { + if let Some(imm) = offset.try_add_i64(imm.into()) { + store_args[1] = arg; + *offset = imm; + } else { + // Overflow. + return; + } + } + _ => panic!(), + }, + _ => { + // Address value is defined by some other kind of instruction. + return; + } + } + } else { + // Address value is not the result of an instruction. + return; + } + + let ok = pos.func.update_encoding(inst, isa).is_ok(); + debug_assert!(ok); +} + +//---------------------------------------------------------------------- +// +// The main post-opt pass. + +pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) { + let _tt = timing::postopt(); + let mut pos = EncCursor::new(func, isa); + while let Some(_block) = pos.next_block() { + let mut last_flags_clobber = None; + while let Some(inst) = pos.next_inst() { + if isa.uses_cpu_flags() { + // Optimize instructions to make use of flags. + optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa); + + // Track the most recent seen instruction that clobbers the flags. + if let Some(constraints) = isa + .encoding_info() + .operand_constraints(pos.func.encodings[inst]) + { + if constraints.clobbers_flags { + last_flags_clobber = Some(inst) + } + } + } + + if isa.uses_complex_addresses() { + optimize_complex_addresses(&mut pos, inst, isa); + } + } + } +} diff --git a/cranelift/codegen/src/predicates.rs b/cranelift/codegen/src/predicates.rs new file mode 100644 index 0000000000..5812163e09 --- /dev/null +++ b/cranelift/codegen/src/predicates.rs @@ -0,0 +1,140 @@ +//! Predicate functions for testing instruction fields. +//! +//! This module defines functions that are used by the instruction predicates defined by +//! `cranelift-codegen/meta/src/cdsl/instructions.rs` classes. +//! +//! The predicates the operate on integer fields use `Into` as a shared trait bound. This +//! bound is implemented by all the native integer types as well as `Imm64`. +//! +//! Some of these predicates may be unused in certain ISA configurations, so we suppress the +//! dead code warning. + +use crate::ir; +use crate::ir::ConstantData; + +/// Check that an integer value is zero. +#[allow(dead_code)] +pub fn is_zero_int>(x: T) -> bool { + x.into() == 0 +} + +/// Check that a 64-bit floating point value is zero. +#[allow(dead_code)] +pub fn is_zero_64_bit_float>(x: T) -> bool { + let x64 = x.into(); + x64.bits() == 0 +} + +/// Check that a 32-bit floating point value is zero. +#[allow(dead_code)] +pub fn is_zero_32_bit_float>(x: T) -> bool { + let x32 = x.into(); + x32.bits() == 0 +} + +/// Check that a constant contains all zeroes. +#[allow(dead_code)] +pub fn is_all_zeroes(x: &ConstantData) -> bool { + x.iter().all(|&f| f == 0) +} + +/// Check that a constant contains all ones. +#[allow(dead_code)] +pub fn is_all_ones(x: &ConstantData) -> bool { + x.iter().all(|&f| f == 0xff) +} + +/// Check that `x` is the same as `y`. +#[allow(dead_code)] +pub fn is_equal + Copy>(x: T, y: O) -> bool { + x == y.into() +} + +/// Check that `x` can be represented as a `wd`-bit signed integer with `sc` low zero bits. +#[allow(dead_code)] +pub fn is_signed_int>(x: T, wd: u8, sc: u8) -> bool { + let s = x.into(); + s == (s >> sc << (64 - wd + sc) >> (64 - wd)) +} + +/// Check that `x` can be represented as a `wd`-bit unsigned integer with `sc` low zero bits. +#[allow(dead_code)] +pub fn is_unsigned_int>(x: T, wd: u8, sc: u8) -> bool { + let u = x.into() as u64; + // Bit-mask of the permitted bits. + let m = (1 << wd) - (1 << sc); + u == (u & m) +} + +#[allow(dead_code)] +pub fn is_colocated_func(func_ref: ir::FuncRef, func: &ir::Function) -> bool { + func.dfg.ext_funcs[func_ref].colocated +} + +#[allow(dead_code)] +pub fn is_colocated_data(global_value: ir::GlobalValue, func: &ir::Function) -> bool { + match func.global_values[global_value] { + ir::GlobalValueData::Symbol { colocated, .. } => colocated, + _ => panic!("is_colocated_data only makes sense for data with symbolic addresses"), + } +} + +#[allow(dead_code)] +pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool { + value_list.len(&func.dfg.value_lists) == num +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cvt_u32() { + let x1 = 0u32; + let x2 = 1u32; + let x3 = 0xffff_fff0u32; + + assert!(is_signed_int(x1, 1, 0)); + assert!(is_signed_int(x1, 2, 1)); + assert!(is_signed_int(x2, 2, 0)); + assert!(!is_signed_int(x2, 2, 1)); + + // `u32` doesn't sign-extend when converted to `i64`. + assert!(!is_signed_int(x3, 8, 0)); + + assert!(is_unsigned_int(x1, 1, 0)); + assert!(is_unsigned_int(x1, 8, 4)); + assert!(is_unsigned_int(x2, 1, 0)); + assert!(!is_unsigned_int(x2, 8, 4)); + assert!(!is_unsigned_int(x3, 1, 0)); + assert!(is_unsigned_int(x3, 32, 4)); + } + + #[test] + fn cvt_imm64() { + use crate::ir::immediates::Imm64; + + let x1 = Imm64::new(-8); + let x2 = Imm64::new(8); + + assert!(is_signed_int(x1, 16, 2)); + assert!(is_signed_int(x2, 16, 2)); + assert!(!is_signed_int(x1, 16, 4)); + assert!(!is_signed_int(x2, 16, 4)); + } + + #[test] + fn check_is_all_zeroes() { + assert!(is_all_zeroes(&[0; 16].as_ref().into())); + assert!(is_all_zeroes( + &vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0].into() + )); + assert!(!is_all_zeroes(&[1; 16].as_ref().into())); + } + + #[test] + fn check_is_all_ones() { + assert!(!is_all_ones(&[0; 16].as_ref().into())); + assert!(is_all_ones(&[0xff; 16].as_ref().into())); + } +} diff --git a/cranelift/codegen/src/print_errors.rs b/cranelift/codegen/src/print_errors.rs new file mode 100644 index 0000000000..e4f6234ebd --- /dev/null +++ b/cranelift/codegen/src/print_errors.rs @@ -0,0 +1,227 @@ +//! Utility routines for pretty-printing error messages. + +use crate::entity::SecondaryMap; +use crate::ir; +use crate::ir::entities::{AnyEntity, Block, Inst, Value}; +use crate::ir::function::Function; +use crate::isa::TargetIsa; +use crate::result::CodegenError; +use crate::verifier::{VerifierError, VerifierErrors}; +use crate::write::{decorate_function, FuncWriter, PlainWriter}; +use alloc::boxed::Box; +use alloc::string::{String, ToString}; +use alloc::vec::Vec; +use core::fmt; +use core::fmt::Write; + +/// Pretty-print a verifier error. +pub fn pretty_verifier_error<'a>( + func: &ir::Function, + isa: Option<&dyn TargetIsa>, + func_w: Option>, + errors: VerifierErrors, +) -> String { + let mut errors = errors.0; + let mut w = String::new(); + let num_errors = errors.len(); + + decorate_function( + &mut PrettyVerifierError(func_w.unwrap_or_else(|| Box::new(PlainWriter)), &mut errors), + &mut w, + func, + &isa.into(), + ) + .unwrap(); + + writeln!( + w, + "\n; {} verifier error{} detected (see above). Compilation aborted.", + num_errors, + if num_errors == 1 { "" } else { "s" } + ) + .unwrap(); + + w +} + +struct PrettyVerifierError<'a>(Box, &'a mut Vec); + +impl<'a> FuncWriter for PrettyVerifierError<'a> { + fn write_block_header( + &mut self, + w: &mut dyn Write, + func: &Function, + isa: Option<&dyn TargetIsa>, + block: Block, + indent: usize, + ) -> fmt::Result { + pretty_block_header_error(w, func, isa, block, indent, &mut *self.0, self.1) + } + + fn write_instruction( + &mut self, + w: &mut dyn Write, + func: &Function, + aliases: &SecondaryMap>, + isa: Option<&dyn TargetIsa>, + inst: Inst, + indent: usize, + ) -> fmt::Result { + pretty_instruction_error(w, func, aliases, isa, inst, indent, &mut *self.0, self.1) + } + + fn write_entity_definition( + &mut self, + w: &mut dyn Write, + func: &Function, + entity: AnyEntity, + value: &dyn fmt::Display, + ) -> fmt::Result { + pretty_preamble_error(w, func, entity, value, &mut *self.0, self.1) + } +} + +/// Pretty-print a function verifier error for a given block. +fn pretty_block_header_error( + w: &mut dyn Write, + func: &Function, + isa: Option<&dyn TargetIsa>, + cur_block: Block, + indent: usize, + func_w: &mut dyn FuncWriter, + errors: &mut Vec, +) -> fmt::Result { + let mut s = String::new(); + func_w.write_block_header(&mut s, func, isa, cur_block, indent)?; + write!(w, "{}", s)?; + + // TODO: Use drain_filter here when it gets stabilized + let mut i = 0; + let mut printed_error = false; + while i != errors.len() { + match errors[i].location { + ir::entities::AnyEntity::Block(block) if block == cur_block => { + if !printed_error { + print_arrow(w, &s)?; + printed_error = true; + } + let err = errors.remove(i); + print_error(w, err)?; + } + _ => i += 1, + } + } + + if printed_error { + w.write_char('\n')?; + } + + Ok(()) +} + +/// Pretty-print a function verifier error for a given instruction. +fn pretty_instruction_error( + w: &mut dyn Write, + func: &Function, + aliases: &SecondaryMap>, + isa: Option<&dyn TargetIsa>, + cur_inst: Inst, + indent: usize, + func_w: &mut dyn FuncWriter, + errors: &mut Vec, +) -> fmt::Result { + let mut s = String::new(); + func_w.write_instruction(&mut s, func, aliases, isa, cur_inst, indent)?; + write!(w, "{}", s)?; + + // TODO: Use drain_filter here when it gets stabilized + let mut i = 0; + let mut printed_error = false; + while i != errors.len() { + match errors[i].location { + ir::entities::AnyEntity::Inst(inst) if inst == cur_inst => { + if !printed_error { + print_arrow(w, &s)?; + printed_error = true; + } + let err = errors.remove(i); + print_error(w, err)?; + } + _ => i += 1, + } + } + + if printed_error { + w.write_char('\n')?; + } + + Ok(()) +} + +fn pretty_preamble_error( + w: &mut dyn Write, + func: &Function, + entity: AnyEntity, + value: &dyn fmt::Display, + func_w: &mut dyn FuncWriter, + errors: &mut Vec, +) -> fmt::Result { + let mut s = String::new(); + func_w.write_entity_definition(&mut s, func, entity, value)?; + write!(w, "{}", s)?; + + // TODO: Use drain_filter here when it gets stabilized + let mut i = 0; + let mut printed_error = false; + while i != errors.len() { + if entity == errors[i].location { + if !printed_error { + print_arrow(w, &s)?; + printed_error = true; + } + let err = errors.remove(i); + print_error(w, err)?; + } else { + i += 1 + } + } + + if printed_error { + w.write_char('\n')?; + } + + Ok(()) +} + +/// Prints: +/// ; ^~~~~~ +fn print_arrow(w: &mut dyn Write, entity: &str) -> fmt::Result { + write!(w, ";")?; + + let indent = entity.len() - entity.trim_start().len(); + if indent != 0 { + write!(w, "{1:0$}^", indent - 1, "")?; + } + + for _ in 0..entity.trim().len() - 1 { + write!(w, "~")?; + } + + writeln!(w) +} + +/// Prints: +/// ; error: [ERROR BODY] +fn print_error(w: &mut dyn Write, err: VerifierError) -> fmt::Result { + writeln!(w, "; error: {}", err.to_string())?; + Ok(()) +} + +/// Pretty-print a Cranelift error. +pub fn pretty_error(func: &ir::Function, isa: Option<&dyn TargetIsa>, err: CodegenError) -> String { + if let CodegenError::Verifier(e) = err { + pretty_verifier_error(func, isa, None, e) + } else { + err.to_string() + } +} diff --git a/cranelift/codegen/src/redundant_reload_remover.rs b/cranelift/codegen/src/redundant_reload_remover.rs new file mode 100644 index 0000000000..f33eb98fde --- /dev/null +++ b/cranelift/codegen/src/redundant_reload_remover.rs @@ -0,0 +1,900 @@ +//! This module implements a late-stage redundant-reload remover, which runs after registers have +//! been allocated and stack slots have been given specific offsets. + +use crate::cursor::{Cursor, CursorPosition, EncCursor, FuncCursor}; +use crate::entity::EntitySet; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::dfg::DataFlowGraph; +use crate::ir::instructions::BranchInfo; +use crate::ir::stackslot::{StackSlotKind, StackSlots}; +use crate::ir::{ + Block, Function, Inst, InstBuilder, InstructionData, Opcode, StackSlotData, Type, Value, + ValueLoc, +}; +use crate::isa::{RegInfo, RegUnit, TargetIsa}; +use crate::regalloc::RegDiversions; +use alloc::vec::Vec; +use core::convert::TryInto; +use cranelift_entity::{PrimaryMap, SecondaryMap}; + +// ============================================================================================= +// A description of the redundant-fill-removal algorithm +// +// +// The algorithm works forwards through each Block. It carries along and updates a table, +// AvailEnv, with which it tracks registers that are known to have the same value as some stack +// slot. The actions on encountering an instruction depend on the instruction, as follows: +// +// ss1 = spill r0: update the AvailEnv so as to note that slot `ss1` and register `r0` +// have the same value. +// +// r1 = fill ss0: look in the AvailEnv. If it tells us that register `r1` and slot `ss0` +// have the same value, then delete the instruction by converting it to a +// `fill_nop`. +// +// If it tells us that some other register `r2` has the same value as +// slot `ss0`, convert the instruction into a copy from `r2` to `r1`. +// +// any other insn: remove from the AvailEnv, any bindings associated with registers +// written by this instruction, since they will be invalidated by it. +// +// Tracking the effects of `copy` instructions in AvailEnv for the case when both source and +// destination are registers does not cause any more fills to be removed or converted to copies. +// It's not clear why. +// +// There are various other instruction-handling cases in `visit_inst`, which are documented +// in-line, and do not change the core algorithm, so are not described here. +// +// The registers tracked by AvailEnv are the post-diversion registers that are really used by the +// code; they are not the pre-diversion names associated with each SSA `Value`. The second +// `fill` case above opportunistically copies values from registers that may have been diversion +// targets in some predecessor block, and so are no longer associated with any specific SSA-level +// name at the point the copy is made. Hence those copies (from `r2` to `r1`) cannot be done +// with an ordinary `copy` instruction. Instead they have to be done using a new `copy_to_ssa` +// instruction, which copies from an arbitrary register to a register-resident `Value` (that is, +// "back to" SSA-world). +// +// That completes the description of the core algorithm. +// +// In the case where a block `A` jumps to `B` and `A` is the only predecessor of `B`, the +// AvailEnv at the end of `A` will still be valid at the entry to `B`. In such a case, we can +// profitably transform `B` using the AvailEnv "inherited" from `A`. In order to take full +// advantage of this, this module partitions the function's CFG into tree-shaped groups of +// blocks, and processes each tree as described above. So the AvailEnv is only initialised to +// empty at the start of blocks that form the root of each tree; that is, for blocks which have +// two or more predecessors. + +// ============================================================================================= +// Top level algorithm structure +// +// The overall algorithm, for a function, starts like this: +// +// * (once per function): finds Blocks that have two or more predecessors, since they will be the +// roots of Block trees. Also, the entry node for the function is considered to be a root. +// +// It then continues with a loop that first finds a tree of Blocks ("discovery") and then removes +// redundant fills as described above ("processing"): +// +// * (discovery; once per tree): for each root, performs a depth first search to find all the Blocks +// in the tree, guided by RedundantReloadRemover::discovery_stack. +// +// * (processing; once per tree): the just-discovered tree is then processed as described above, +// guided by RedundantReloadRemover::processing_stack. +// +// In this way, all Blocks reachable from the function's entry point are eventually processed. Note +// that each tree is processed as soon as it has been discovered, so the algorithm never creates a +// list of trees for the function. +// +// The running state is stored in `RedundantReloadRemover`. This is allocated once and can be +// reused for multiple functions so as to minimise heap turnover. The fields are, roughly: +// +// num_regunits -- constant for the whole function; used by the tree processing phase +// num_preds_per_block -- constant for the whole function; used by the tree discovery process +// +// discovery_stack -- used to guide the tree discovery process +// nodes_in_tree -- the discovered nodes are recorded here +// +// processing_stack -- used to guide the tree processing process +// nodes_already_visited -- used to ensure the tree processing logic terminates in the case +// where a tree has a branch back to its root node. +// +// There is further documentation in line below, as appropriate. + +// ============================================================================================= +// A side note on register choice heuristics + +// The core algorithm opportunistically replaces fill instructions when it knows of a register +// that already holds the required value. How effective this is largely depends on how long +// reloaded values happen to stay alive before the relevant register is overwritten. And that +// depends on the register allocator's register choice heuristics. The worst case is, when the +// register allocator reuses registers as soon as possible after they become free. Unfortunately +// that was indeed the selection scheme, prior to development of this pass. +// +// As part of this work, the register selection scheme has been changed as follows: for registers +// written by any instruction other than a fill, use the lowest numbered available register. But +// for registers written by a fill instruction, use the highest numbered available register. The +// aim is to try and keep reload- and non-reload registers disjoint to the extent possible. +// Several other schemes were tried, but this one is simple and can be worth an extra 2% of +// performance in some cases. +// +// The relevant change is more or less a one-line change in the solver. + +// ============================================================================================= +// Data structures used for discovery of trees + +// `ZeroOneOrMany` is used to record the number of predecessors an Block block has. The `Zero` case +// is included so as to cleanly handle the case where the incoming graph has unreachable Blocks. + +#[derive(Clone, PartialEq)] +enum ZeroOneOrMany { + Zero, + One, + Many, +} + +// ============================================================================================= +// Data structures used for processing of trees + +// `SlotInfo` describes a spill slot in the obvious way. Note that it doesn't indicate which +// register(s) are currently associated with the slot. That job is done by `AvailEnv` instead. +// +// In the CL framework, stack slots are partitioned into disjoint sets, one for each +// `StackSlotKind`. The offset and size only give a unique identity within any particular +// `StackSlotKind`. So, to uniquely identify a stack slot, all three fields are necessary. + +#[derive(Clone, Copy)] +struct SlotInfo { + kind: StackSlotKind, + offset: i32, + size: u32, +} + +// `AvailEnv` maps each possible register to a stack slot that holds the same value. The index +// space of `AvailEnv::map` is exactly the set of registers available on the current target. If +// (as is mostly the case) a register is not known to have the same value as a stack slot, then +// its entry is `None` rather than `Some(..)`. +// +// Invariants for AvailEnv: +// +// AvailEnv may have multiple different registers bound to the same stack slot -- that is, `(kind, +// offset, size)` triple. That's OK, and reflects the reality that those two registers contain +// the same value. This could happen, for example, in the case +// +// ss1 = spill r0 +// .. +// r2 = fill ss1 +// +// Then both `r0` and `r2` will have the same value as `ss1`, provided that ".." doesn't write to +// `r1`. +// +// To say that two different registers may be bound to the same stack slot is the same as saying +// that it is allowed to have two different entries in AvailEnv with the same `(kind, offset, +// size)` triple. What is *not* allowed is to have partial overlaps. That is, if two SlotInfos +// have the same `kind` field and have `offset` and `size` fields that overlap, then their +// `offset` and `size` fields must be identical. This is so as to make the algorithm safe against +// situations where, for example, a 64 bit register is spilled, but then only the bottom 32 bits +// are reloaded from the slot. +// +// Although in such a case it seems likely that the Cranelift IR would be ill-typed, and so this +// case could probably not occur in practice. + +#[derive(Clone)] +struct AvailEnv { + map: Vec>, +} + +// `ProcessingStackElem` combines AvailEnv with contextual information needed to "navigate" within +// an Block. +// +// A ProcessingStackElem conceptually has the lifetime of exactly one Block: once the current Block is +// completed, the ProcessingStackElem will be abandoned. In practice the top level state, +// RedundantReloadRemover, caches them, so as to avoid heap turnover. +// +// Note that ProcessingStackElem must contain a CursorPosition. The CursorPosition, which +// indicates where we are in the current Block, cannot be implicitly maintained by looping over all +// the instructions in an Block in turn, because we may choose to suspend processing the current Block +// at a side exit, continue by processing the subtree reached via the side exit, and only later +// resume the current Block. + +struct ProcessingStackElem { + /// Indicates the AvailEnv at the current point in the Block. + avail_env: AvailEnv, + + /// Shows where we currently are inside the Block. + cursor: CursorPosition, + + /// Indicates the currently active register diversions at the current point. + diversions: RegDiversions, +} + +// ============================================================================================= +// The top level data structure + +// `RedundantReloadRemover` contains data structures for the two passes: discovery of tree shaped +// regions, and processing of them. These are allocated once and stay alive for the entire +// function, even though they are cleared out for each new tree shaped region. It also caches +// `num_regunits` and `num_preds_per_block`, which are computed at the start of each function and +// then remain constant. + +/// The redundant reload remover's state. +pub struct RedundantReloadRemover { + /// The total number of RegUnits available on this architecture. This is unknown when the + /// RedundantReloadRemover is created. It becomes known at the beginning of processing of a + /// function. + num_regunits: Option, + + /// This stores, for each Block, a characterisation of the number of predecessors it has. + num_preds_per_block: PrimaryMap, + + /// The stack used for the first phase (discovery). There is one element on the discovery + /// stack for each currently unexplored Block in the tree being searched. + discovery_stack: Vec, + + /// The nodes in the discovered tree are inserted here. + nodes_in_tree: EntitySet, + + /// The stack used during the second phase (transformation). There is one element on the + /// processing stack for each currently-open node in the tree being transformed. + processing_stack: Vec, + + /// Used in the second phase to avoid visiting nodes more than once. + nodes_already_visited: EntitySet, +} + +// ============================================================================================= +// Miscellaneous small helper functions + +// Is this a kind of stack slot that is safe to track in AvailEnv? This is probably overly +// conservative, but tracking only the SpillSlot and IncomingArgument kinds catches almost all +// available redundancy in practice. +fn is_slot_kind_tracked(kind: StackSlotKind) -> bool { + match kind { + StackSlotKind::SpillSlot | StackSlotKind::IncomingArg => true, + _ => false, + } +} + +// Find out if the range `[offset, +size)` overlaps with the range in `si`. +fn overlaps(si: &SlotInfo, offset: i32, size: u32) -> bool { + let a_offset = si.offset as i64; + let a_size = si.size as i64; + let b_offset = offset as i64; + let b_size = size as i64; + let no_overlap = a_offset + a_size <= b_offset || b_offset + b_size <= a_offset; + !no_overlap +} + +// Find, in `reginfo`, the register bank that `reg` lives in, and return the lower limit and size +// of the bank. This is so the caller can conveniently iterate over all RegUnits in the bank that +// `reg` lives in. +fn find_bank_limits(reginfo: &RegInfo, reg: RegUnit) -> (RegUnit, u16) { + if let Some(bank) = reginfo.bank_containing_regunit(reg) { + return (bank.first_unit, bank.units); + } + // We should never get here, since `reg` must come from *some* RegBank. + panic!("find_regclass_limits: reg not found"); +} + +// Returns the register that `v` is allocated to. Assumes that `v` actually resides in a +// register. +fn reg_of_value(locations: &SecondaryMap, v: Value) -> RegUnit { + match locations[v] { + ValueLoc::Reg(ru) => ru, + _ => panic!("reg_of_value: value isn't in a reg"), + } +} + +// Returns the stack slot that `v` is allocated to. Assumes that `v` actually resides in a stack +// slot. +fn slot_of_value<'s>( + locations: &SecondaryMap, + stack_slots: &'s StackSlots, + v: Value, +) -> &'s StackSlotData { + match locations[v] { + ValueLoc::Stack(slot) => &stack_slots[slot], + _ => panic!("slot_of_value: value isn't in a stack slot"), + } +} + +// ============================================================================================= +// Top level: discovery of tree shaped regions + +impl RedundantReloadRemover { + // A helper for `add_nodes_to_tree` below. + fn discovery_stack_push_successors_of(&mut self, cfg: &ControlFlowGraph, node: Block) { + for successor in cfg.succ_iter(node) { + self.discovery_stack.push(successor); + } + } + + // Visit the tree of Blocks rooted at `starting_point` and add them to `self.nodes_in_tree`. + // `self.num_preds_per_block` guides the process, ensuring we don't leave the tree-ish region + // and indirectly ensuring that the process will terminate in the presence of cycles in the + // graph. `self.discovery_stack` holds the search state in this function. + fn add_nodes_to_tree(&mut self, cfg: &ControlFlowGraph, starting_point: Block) { + // One might well ask why this doesn't loop forever when it encounters cycles in the + // control flow graph. The reason is that any cycle in the graph that is reachable from + // anywhere outside the cycle -- in particular, that is reachable from the function's + // entry node -- must have at least one node that has two or more predecessors. So the + // logic below won't follow into it, because it regards any such node as the root of some + // other tree. + debug_assert!(self.discovery_stack.is_empty()); + debug_assert!(self.nodes_in_tree.is_empty()); + + self.nodes_in_tree.insert(starting_point); + self.discovery_stack_push_successors_of(cfg, starting_point); + + while let Some(node) = self.discovery_stack.pop() { + match self.num_preds_per_block[node] { + // We arrived at a node with multiple predecessors, so it's a new root. Ignore it. + ZeroOneOrMany::Many => {} + // This node has just one predecessor, so we should incorporate it in the tree and + // immediately transition into searching from it instead. + ZeroOneOrMany::One => { + self.nodes_in_tree.insert(node); + self.discovery_stack_push_successors_of(cfg, node); + } + // This is meaningless. We arrived at a node that doesn't point back at where we + // came from. + ZeroOneOrMany::Zero => panic!("add_nodes_to_tree: inconsistent graph"), + } + } + } +} + +// ============================================================================================= +// Operations relating to `AvailEnv` + +impl AvailEnv { + // Create a new one. + fn new(size: usize) -> Self { + let mut env = Self { + map: Vec::>::new(), + }; + env.map.resize(size, None); + env + } + + // Debug only: checks (some of) the required AvailEnv invariants. + #[cfg(debug_assertions)] + fn check_invariants(&self) -> bool { + // Check that any overlapping entries overlap exactly. This is super lame (quadratic), + // but it's only used in debug builds. + for i in 0..self.map.len() { + if let Some(si) = self.map[i] { + for j in i + 1..self.map.len() { + if let Some(sj) = self.map[j] { + // "si and sj overlap, but not exactly" + if si.kind == sj.kind + && overlaps(&si, sj.offset, sj.size) + && !(si.offset == sj.offset && si.size == sj.size) + { + return false; + } + } + } + } + } + true + } + + // Invalidates the binding associated with `reg`. Note that by construction of AvailEnv, + // `reg` can only be associated with one binding at once. + fn invalidate_by_reg(&mut self, reg: RegUnit) { + self.map[reg as usize] = None; + } + + // Invalidates any binding that has any overlap with `(kind, offset, size)`. + fn invalidate_by_offset(&mut self, kind: StackSlotKind, offset: i32, size: u32) { + debug_assert!(is_slot_kind_tracked(kind)); + for i in 0..self.map.len() { + if let Some(si) = &self.map[i] { + if si.kind == kind && overlaps(&si, offset, size) { + self.map[i] = None; + } + } + } + } + + // Invalidates all bindings. + fn invalidate_all(&mut self) { + for i in 0..self.map.len() { + self.map[i] = None; + } + } + + // Updates AvailEnv to track the effect of a `regmove` instruction. + fn copy_reg(&mut self, src: RegUnit, dst: RegUnit) { + self.map[dst as usize] = self.map[src as usize]; + } + + // Does `env` have the exact binding characterised by `(reg, kind, offset, size)` ? + fn has_exact_binding(&self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) -> bool { + debug_assert!(is_slot_kind_tracked(kind)); + if let Some(si) = &self.map[reg as usize] { + return si.kind == kind && si.offset == offset && si.size == size; + } + // No such binding. + false + } + + // Does `env` have a binding characterised by `(kind, offset, size)` but to a register, let's + // call it `other_reg`, that isn't `reg`? If so, return `other_reg`. Note that `other_reg` + // will have the same bank as `reg`. It is a checked error to call this function with a + // binding matching all four of `(reg, kind, offset, size)`. + fn has_inexact_binding( + &self, + reginfo: &RegInfo, + reg: RegUnit, + kind: StackSlotKind, + offset: i32, + size: u32, + ) -> Option { + debug_assert!(is_slot_kind_tracked(kind)); + // Find the range of RegUnit numbers for the bank that contains `reg`, and use that as our + // search space. This is so as to guarantee that any match is restricted to the same bank + // as `reg`. + let (first_unit, num_units) = find_bank_limits(reginfo, reg); + for other_reg in first_unit..first_unit + num_units { + if let Some(si) = &self.map[other_reg as usize] { + if si.kind == kind && si.offset == offset && si.size == size { + if other_reg == reg { + panic!("has_inexact_binding: binding *is* exact!"); + } + return Some(other_reg); + } + } + } + // No such binding. + None + } + + // Create the binding `(reg, kind, offset, size)` in `env`, and throw away any previous + // binding associated with either `reg` or the `(kind, offset, size)` triple. + fn bind(&mut self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) { + debug_assert!(is_slot_kind_tracked(kind)); + self.invalidate_by_offset(kind, offset, size); + self.map[reg as usize] = Some(SlotInfo { kind, offset, size }); + } +} + +// Invalidates in `avail_env`, any binding associated with a regunit that is written by `inst`. +fn invalidate_regs_written_by_inst( + locations: &SecondaryMap, + diversions: &RegDiversions, + dfg: &DataFlowGraph, + avail_env: &mut AvailEnv, + inst: Inst, +) { + for v in dfg.inst_results(inst).iter() { + if let ValueLoc::Reg(ru) = locations[*v] { + // This must be true. It would be meaningless for an SSA value to be diverted before + // the point where it is defined. + debug_assert!(diversions.reg(*v, locations) == ru); + avail_env.invalidate_by_reg(ru); + } + } +} + +// ============================================================================================= +// Processing of individual instructions + +impl RedundantReloadRemover { + // Process `inst`, possibly changing it into a different instruction, and possibly changing + // `self.avail_env` and `func.dfg`. + fn visit_inst( + &mut self, + func: &mut Function, + reginfo: &RegInfo, + isa: &dyn TargetIsa, + inst: Inst, + ) { + // Get hold of the top-of-stack work item. This is the state that we will mutate during + // processing of this instruction. + debug_assert!(!self.processing_stack.is_empty()); + let ProcessingStackElem { + avail_env, + diversions, + .. + } = self.processing_stack.last_mut().unwrap(); + + #[cfg(debug_assertions)] + debug_assert!( + avail_env.check_invariants(), + "visit_inst: env invariants not ok" + ); + + let dfg = &mut func.dfg; + let locations = &func.locations; + let stack_slots = &func.stack_slots; + + // To avoid difficulties with the borrow checker, do this in two stages. First, examine + // the instruction to see if it can be deleted or modified, and park the relevant + // information in `transform`. Update `self.avail_env` too. Later, use `transform` to + // actually do the transformation if necessary. + enum Transform { + NoChange, + ChangeToNopFill(Value), // delete this insn entirely + ChangeToCopyToSSA(Type, RegUnit), // change it into a copy from the specified reg + } + let mut transform = Transform::NoChange; + + // In this match { .. } statement, either we must treat the instruction specially, or we + // must call `invalidate_regs_written_by_inst` on it. + match &dfg[inst] { + InstructionData::Unary { + opcode: Opcode::Spill, + arg: src_value, + } => { + // Extract: (src_reg, kind, offset, size) + // Invalidate: (kind, offset, size) + // Add new binding: {src_reg -> (kind, offset, size)} + // Don't forget that src_value might be diverted, so we have to deref it. + let slot = slot_of_value(locations, stack_slots, dfg.inst_results(inst)[0]); + let src_reg = diversions.reg(*src_value, locations); + let kind = slot.kind; + if is_slot_kind_tracked(kind) { + let offset = slot.offset.expect("visit_inst: spill with no offset"); + let size = slot.size; + avail_env.bind(src_reg, kind, offset, size); + } else { + // We don't expect this insn to write any regs. But to be consistent with the + // rule above, do this anyway. + invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); + } + } + InstructionData::Unary { + opcode: Opcode::Fill, + arg: src_value, + } => { + // Extract: (dst_reg, kind, offset, size) + // Invalidate: (kind, offset, size) + // Add new: {dst_reg -> (dst_value, kind, offset, size)} + let slot = slot_of_value(locations, stack_slots, *src_value); + let dst_value = dfg.inst_results(inst)[0]; + let dst_reg = reg_of_value(locations, dst_value); + // This must be true. It would be meaningless for an SSA value to be diverted + // before it was defined. + debug_assert!(dst_reg == diversions.reg(dst_value, locations)); + let kind = slot.kind; + if is_slot_kind_tracked(kind) { + let offset = slot.offset.expect("visit_inst: fill with no offset"); + let size = slot.size; + if avail_env.has_exact_binding(dst_reg, kind, offset, size) { + // This instruction is an exact copy of a fill we saw earlier, and the + // loaded value is still valid. So we'll schedule this instruction for + // deletion (below). No need to make any changes to `avail_env`. + transform = Transform::ChangeToNopFill(*src_value); + } else if let Some(other_reg) = + avail_env.has_inexact_binding(reginfo, dst_reg, kind, offset, size) + { + // This fill is from the required slot, but into a different register + // `other_reg`. So replace it with a copy from `other_reg` to `dst_reg` + // and update `dst_reg`s binding to make it the same as `other_reg`'s, so + // as to maximise the chances of future matches after this instruction. + debug_assert!(other_reg != dst_reg); + transform = + Transform::ChangeToCopyToSSA(dfg.value_type(dst_value), other_reg); + avail_env.copy_reg(other_reg, dst_reg); + } else { + // This fill creates some new binding we don't know about. Update + // `avail_env` to track it. + avail_env.bind(dst_reg, kind, offset, size); + } + } else { + // Else it's "just another instruction that writes a reg", so we'd better + // treat it as such, just as we do below for instructions that we don't handle + // specially. + invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); + } + } + InstructionData::RegMove { src, dst, .. } => { + // These happen relatively rarely, but just frequently enough that it's worth + // tracking the copy (at the machine level, it's really a copy) in `avail_env`. + avail_env.copy_reg(*src, *dst); + } + InstructionData::RegSpill { .. } + | InstructionData::RegFill { .. } + | InstructionData::Call { .. } + | InstructionData::CallIndirect { .. } + | InstructionData::StackLoad { .. } + | InstructionData::StackStore { .. } + | InstructionData::Unary { + opcode: Opcode::AdjustSpDown, + .. + } + | InstructionData::UnaryImm { + opcode: Opcode::AdjustSpUpImm, + .. + } + | InstructionData::UnaryImm { + opcode: Opcode::AdjustSpDownImm, + .. + } => { + // All of these change, or might change, the memory-register bindings tracked in + // `avail_env` in some way we don't know about, or at least, we might be able to + // track, but for which the effort-to-benefit ratio seems too low to bother. So + // play safe: forget everything we know. + // + // For Call/CallIndirect, we could do better when compiling for calling + // conventions that have callee-saved registers, since bindings for them would + // remain valid across the call. + avail_env.invalidate_all(); + } + _ => { + // Invalidate: any `avail_env` entry associated with a reg written by `inst`. + invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); + } + } + + // Actually do the transformation. + match transform { + Transform::NoChange => {} + Transform::ChangeToNopFill(arg) => { + // Load is completely redundant. Convert it to a no-op. + dfg.replace(inst).fill_nop(arg); + let ok = func.update_encoding(inst, isa).is_ok(); + debug_assert!(ok, "fill_nop encoding missing for this type"); + } + Transform::ChangeToCopyToSSA(ty, reg) => { + // We already have the relevant value in some other register. Convert the + // load into a reg-reg copy. + dfg.replace(inst).copy_to_ssa(ty, reg); + let ok = func.update_encoding(inst, isa).is_ok(); + debug_assert!(ok, "copy_to_ssa encoding missing for type {}", ty); + } + } + } +} + +// ============================================================================================= +// Top level: processing of tree shaped regions + +impl RedundantReloadRemover { + // Push a clone of the top-of-stack ProcessingStackElem. This will be used to process exactly + // one Block. The diversions are created new, rather than cloned, to reflect the fact + // that diversions are local to each Block. + fn processing_stack_push(&mut self, cursor: CursorPosition) { + let avail_env = if let Some(stack_top) = self.processing_stack.last() { + stack_top.avail_env.clone() + } else { + AvailEnv::new( + self.num_regunits + .expect("processing_stack_push: num_regunits unknown!") + as usize, + ) + }; + self.processing_stack.push(ProcessingStackElem { + avail_env, + cursor, + diversions: RegDiversions::new(), + }); + } + + // This pushes the node `dst` onto the processing stack, and sets up the new + // ProcessingStackElem accordingly. But it does all that only if `dst` is part of the current + // tree *and* we haven't yet visited it. + fn processing_stack_maybe_push(&mut self, dst: Block) { + if self.nodes_in_tree.contains(dst) && !self.nodes_already_visited.contains(dst) { + if !self.processing_stack.is_empty() { + // If this isn't the outermost node in the tree (that is, the root), then it must + // have exactly one predecessor. Nodes with no predecessors are dead and not + // incorporated in any tree. Nodes with two or more predecessors are the root of + // some other tree, and visiting them as if they were part of the current tree + // would be a serious error. + debug_assert!(self.num_preds_per_block[dst] == ZeroOneOrMany::One); + } + self.processing_stack_push(CursorPosition::Before(dst)); + self.nodes_already_visited.insert(dst); + } + } + + // Perform redundant-reload removal on the tree shaped region of graph defined by `root` and + // `self.nodes_in_tree`. The following state is modified: `self.processing_stack`, + // `self.nodes_already_visited`, and `func.dfg`. + fn process_tree( + &mut self, + func: &mut Function, + reginfo: &RegInfo, + isa: &dyn TargetIsa, + root: Block, + ) { + debug_assert!(self.nodes_in_tree.contains(root)); + debug_assert!(self.processing_stack.is_empty()); + debug_assert!(self.nodes_already_visited.is_empty()); + + // Create the initial work item + self.processing_stack_maybe_push(root); + + while !self.processing_stack.is_empty() { + // It seems somewhat ridiculous to construct a whole new FuncCursor just so we can do + // next_inst() on it once, and then copy the resulting position back out. But use of + // a function-global FuncCursor, or of the EncCursor in struct Context, leads to + // borrow checker problems, as does including FuncCursor directly in + // ProcessingStackElem. In any case this is not as bad as it looks, since profiling + // shows that the build-insert-step-extract work is reduced to just 8 machine + // instructions in an optimised x86_64 build, presumably because rustc can inline and + // then optimise out almost all the work. + let tos = self.processing_stack.len() - 1; + let mut pos = FuncCursor::new(func).at_position(self.processing_stack[tos].cursor); + let maybe_inst = pos.next_inst(); + self.processing_stack[tos].cursor = pos.position(); + + if let Some(inst) = maybe_inst { + // Deal with this insn, possibly changing it, possibly updating the top item of + // `self.processing_stack`. + self.visit_inst(func, reginfo, isa, inst); + + // Update diversions after the insn. + self.processing_stack[tos].diversions.apply(&func.dfg[inst]); + + // If the insn can branch outside this Block, push work items on the stack for all + // target Blocks that are part of the same tree and that we haven't yet visited. + // The next iteration of this instruction-processing loop will immediately start + // work on the most recently pushed Block, and will eventually continue in this Block + // when those new items have been removed from the stack. + match func.dfg.analyze_branch(inst) { + BranchInfo::NotABranch => (), + BranchInfo::SingleDest(dst, _) => { + self.processing_stack_maybe_push(dst); + } + BranchInfo::Table(jt, default) => { + func.jump_tables[jt] + .iter() + .for_each(|dst| self.processing_stack_maybe_push(*dst)); + if let Some(dst) = default { + self.processing_stack_maybe_push(dst); + } + } + } + } else { + // We've come to the end of the current work-item (Block). We'll already have + // processed the fallthrough/continuation/whatever for it using the logic above. + // Pop it off the stack and resume work on its parent. + self.processing_stack.pop(); + } + } + } +} + +// ============================================================================================= +// Top level: perform redundant fill removal for a complete function + +impl RedundantReloadRemover { + /// Create a new remover state. + pub fn new() -> Self { + Self { + num_regunits: None, + num_preds_per_block: PrimaryMap::::with_capacity(8), + discovery_stack: Vec::::with_capacity(16), + nodes_in_tree: EntitySet::::new(), + processing_stack: Vec::::with_capacity(8), + nodes_already_visited: EntitySet::::new(), + } + } + + /// Clear the state of the remover. + pub fn clear(&mut self) { + self.clear_for_new_function(); + } + + fn clear_for_new_function(&mut self) { + self.num_preds_per_block.clear(); + self.clear_for_new_tree(); + } + + fn clear_for_new_tree(&mut self) { + self.discovery_stack.clear(); + self.nodes_in_tree.clear(); + self.processing_stack.clear(); + self.nodes_already_visited.clear(); + } + + #[inline(never)] + fn do_redundant_fill_removal_on_function( + &mut self, + func: &mut Function, + reginfo: &RegInfo, + isa: &dyn TargetIsa, + cfg: &ControlFlowGraph, + ) { + // Fail in an obvious way if there are more than (2^32)-1 Blocks in this function. + let num_blocks: u32 = func.dfg.num_blocks().try_into().unwrap(); + + // Clear out per-tree state. + self.clear_for_new_function(); + + // Create a PrimaryMap that summarises the number of predecessors for each block, as 0, 1 + // or "many", and that also claims the entry block as having "many" predecessors. + self.num_preds_per_block.clear(); + self.num_preds_per_block.reserve(num_blocks as usize); + + for i in 0..num_blocks { + let mut pi = cfg.pred_iter(Block::from_u32(i)); + let mut n_pi = ZeroOneOrMany::Zero; + if pi.next().is_some() { + n_pi = ZeroOneOrMany::One; + if pi.next().is_some() { + n_pi = ZeroOneOrMany::Many; + // We don't care if there are more than two preds, so stop counting now. + } + } + self.num_preds_per_block.push(n_pi); + } + debug_assert!(self.num_preds_per_block.len() == num_blocks as usize); + + // The entry block must be the root of some tree, so set up the state to reflect that. + let entry_block = func + .layout + .entry_block() + .expect("do_redundant_fill_removal_on_function: entry block unknown"); + debug_assert!(self.num_preds_per_block[entry_block] == ZeroOneOrMany::Zero); + self.num_preds_per_block[entry_block] = ZeroOneOrMany::Many; + + // Now build and process trees. + for root_ix in 0..self.num_preds_per_block.len() { + let root = Block::from_u32(root_ix as u32); + + // Build a tree for each node that has two or more preds, and ignore all other nodes. + if self.num_preds_per_block[root] != ZeroOneOrMany::Many { + continue; + } + + // Clear out per-tree state. + self.clear_for_new_tree(); + + // Discovery phase: build the tree, as `root` and `self.nodes_in_tree`. + self.add_nodes_to_tree(cfg, root); + debug_assert!(self.nodes_in_tree.cardinality() > 0); + debug_assert!(self.num_preds_per_block[root] == ZeroOneOrMany::Many); + + // Processing phase: do redundant-reload-removal. + self.process_tree(func, reginfo, isa, root); + debug_assert!( + self.nodes_in_tree.cardinality() == self.nodes_already_visited.cardinality() + ); + } + } +} + +// ============================================================================================= +// Top level: the external interface + +struct Context<'a> { + // Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + // Cached ISA information. We save it here to avoid frequent virtual function calls on the + // `TargetIsa` trait object. + reginfo: RegInfo, + + // References to contextual data structures we need. + cfg: &'a ControlFlowGraph, + + // The running state. + state: &'a mut RedundantReloadRemover, +} + +impl RedundantReloadRemover { + /// Run the remover. + pub fn run(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { + let ctx = Context { + cur: EncCursor::new(func, isa), + reginfo: isa.register_info(), + cfg, + state: self, + }; + let mut total_regunits = 0; + for rb in isa.register_info().banks { + total_regunits += rb.units; + } + ctx.state.num_regunits = Some(total_regunits); + ctx.state.do_redundant_fill_removal_on_function( + ctx.cur.func, + &ctx.reginfo, + ctx.cur.isa, + &ctx.cfg, + ); + } +} diff --git a/cranelift/codegen/src/regalloc/affinity.rs b/cranelift/codegen/src/regalloc/affinity.rs new file mode 100644 index 0000000000..efcc4dabfa --- /dev/null +++ b/cranelift/codegen/src/regalloc/affinity.rs @@ -0,0 +1,126 @@ +//! Value affinity for register allocation. +//! +//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class +//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy +//! instruction operand constraints. +//! +//! For values that want to be in registers, the affinity hint includes a register class or +//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a +//! larger register class instead. + +use crate::ir::{AbiParam, ArgumentLoc}; +use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa}; +use core::fmt; + +/// Preferred register allocation for an SSA value. +#[derive(Clone, Copy, Debug)] +pub enum Affinity { + /// No affinity. + /// + /// This indicates a value that is not defined or used by any real instructions. It is a ghost + /// value that won't appear in the final program. + Unassigned, + + /// This value should be placed in a spill slot on the stack. + Stack, + + /// This value prefers a register from the given register class. + Reg(RegClassIndex), +} + +impl Default for Affinity { + fn default() -> Self { + Self::Unassigned + } +} + +impl Affinity { + /// Create an affinity that satisfies a single constraint. + /// + /// This will never create an `Affinity::Unassigned`. + /// Use the `Default` implementation for that. + pub fn new(constraint: &OperandConstraint) -> Self { + if constraint.kind == ConstraintKind::Stack { + Self::Stack + } else { + Self::Reg(constraint.regclass.into()) + } + } + + /// Create an affinity that matches an ABI argument for `isa`. + pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self { + match arg.location { + ArgumentLoc::Unassigned => Self::Unassigned, + ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()), + ArgumentLoc::Stack(_) => Self::Stack, + } + } + + /// Is this the `Unassigned` affinity? + pub fn is_unassigned(self) -> bool { + match self { + Self::Unassigned => true, + _ => false, + } + } + + /// Is this the `Reg` affinity? + pub fn is_reg(self) -> bool { + match self { + Self::Reg(_) => true, + _ => false, + } + } + + /// Is this the `Stack` affinity? + pub fn is_stack(self) -> bool { + match self { + Self::Stack => true, + _ => false, + } + } + + /// Merge an operand constraint into this affinity. + /// + /// Note that this does not guarantee that the register allocator will pick a register that + /// satisfies the constraint. + pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) { + match *self { + Self::Unassigned => *self = Self::new(constraint), + Self::Reg(rc) => { + // If the preferred register class is a subclass of the constraint, there's no need + // to change anything. + if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc) + { + // If the register classes overlap, try to shrink our preferred register class. + if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) { + *self = Self::Reg(subclass); + } + } + } + Self::Stack => {} + } + } + + /// Return an object that can display this value affinity, using the register info from the + /// target ISA. + pub fn display<'a, R: Into>>(self, regs: R) -> DisplayAffinity<'a> { + DisplayAffinity(self, regs.into()) + } +} + +/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA. +pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayAffinity<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Affinity::Unassigned => write!(f, "unassigned"), + Affinity::Stack => write!(f, "stack"), + Affinity::Reg(rci) => match self.1 { + Some(regs) => write!(f, "{}", regs.rc(rci)), + None => write!(f, "{}", rci), + }, + } + } +} diff --git a/cranelift/codegen/src/regalloc/branch_splitting.rs b/cranelift/codegen/src/regalloc/branch_splitting.rs new file mode 100644 index 0000000000..4e9a159f3e --- /dev/null +++ b/cranelift/codegen/src/regalloc/branch_splitting.rs @@ -0,0 +1,169 @@ +//! Split the outgoing edges of conditional branches that pass parameters. +//! +//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions +//! between a conditional branch and the following terminator. +use alloc::vec::Vec; + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList}; +use crate::isa::TargetIsa; +use crate::topo_order::TopoOrder; + +pub fn run( + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + topo: &mut TopoOrder, +) { + let mut ctx = Context { + has_new_blocks: false, + cur: EncCursor::new(func, isa), + domtree, + topo, + cfg, + }; + ctx.run() +} + +struct Context<'a> { + /// True if new blocks were inserted. + has_new_blocks: bool, + + /// Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + /// References to contextual data structures we need. + domtree: &'a mut DominatorTree, + topo: &'a mut TopoOrder, + cfg: &'a mut ControlFlowGraph, +} + +impl<'a> Context<'a> { + fn run(&mut self) { + // Any block order will do. + self.topo.reset(self.cur.func.layout.blocks()); + while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { + // Branches can only be at the last or second to last position in an extended basic + // block. + self.cur.goto_last_inst(block); + let terminator_inst = self.cur.current_inst().expect("terminator"); + if let Some(inst) = self.cur.prev_inst() { + let opcode = self.cur.func.dfg[inst].opcode(); + if opcode.is_branch() { + self.visit_conditional_branch(inst, opcode); + self.cur.goto_inst(terminator_inst); + self.visit_terminator_branch(terminator_inst); + } + } + } + + // If blocks were added the cfg and domtree are inconsistent and must be recomputed. + if self.has_new_blocks { + self.cfg.compute(&self.cur.func); + self.domtree.compute(&self.cur.func, self.cfg); + } + } + + fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) { + // TODO: target = dfg[branch].branch_destination().expect("conditional branch"); + let target = match self.cur.func.dfg[branch] { + InstructionData::Branch { destination, .. } + | InstructionData::BranchIcmp { destination, .. } + | InstructionData::BranchInt { destination, .. } + | InstructionData::BranchFloat { destination, .. } => destination, + _ => panic!("Unexpected instruction in visit_conditional_branch"), + }; + + // If there are any parameters, split the edge. + if self.should_split_edge(target) { + // Create the block the branch will jump to. + let new_block = self.cur.func.dfg.make_block(); + + // Insert the new block before the destination, such that it can fallthrough in the + // target block. + assert_ne!(Some(target), self.cur.layout().entry_block()); + self.cur.layout_mut().insert_block(new_block, target); + self.has_new_blocks = true; + + // Extract the arguments of the branch instruction, split the Block parameters and the + // branch arguments + let num_fixed = opcode.constraints().num_fixed_value_arguments(); + let dfg = &mut self.cur.func.dfg; + let old_args: Vec<_> = { + let args = dfg[branch].take_value_list().expect("block parameters"); + args.as_slice(&dfg.value_lists).iter().copied().collect() + }; + let (branch_args, block_params) = old_args.split_at(num_fixed); + + // Replace the branch destination by the new Block created with no parameters, and restore + // the branch arguments, without the original Block parameters. + { + let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists); + let data = &mut dfg[branch]; + *data.branch_destination_mut().expect("branch") = new_block; + data.put_value_list(branch_args); + } + let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok(); + debug_assert!(ok); + + // Insert a jump to the original target with its arguments into the new block. + self.cur.goto_first_insertion_point(new_block); + self.cur.ins().jump(target, block_params); + + // Reset the cursor to point to the branch. + self.cur.goto_inst(branch); + } + } + + fn visit_terminator_branch(&mut self, inst: Inst) { + let inst_data = &self.cur.func.dfg[inst]; + let opcode = inst_data.opcode(); + if opcode != Opcode::Jump && opcode != Opcode::Fallthrough { + // This opcode is ignored as it does not have any block parameters. + if opcode != Opcode::IndirectJumpTableBr { + debug_assert!(!opcode.is_branch()) + } + return; + } + + let target = match inst_data { + InstructionData::Jump { destination, .. } => destination, + _ => panic!( + "Unexpected instruction {} in visit_terminator_branch", + self.cur.display_inst(inst) + ), + }; + debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator()); + + // If there are any parameters, split the edge. + if self.should_split_edge(*target) { + // Create the block the branch will jump to. + let new_block = self.cur.func.dfg.make_block(); + self.has_new_blocks = true; + + // Split the current block before its terminator, and insert a new jump instruction to + // jump to it. + let jump = self.cur.ins().jump(new_block, &[]); + self.cur.insert_block(new_block); + + // Reset the cursor to point to new terminator of the old block. + self.cur.goto_inst(jump); + } + } + + /// Returns whether we should introduce a new branch. + fn should_split_edge(&self, target: Block) -> bool { + // We should split the edge if the target has any parameters. + if !self.cur.func.dfg.block_params(target).is_empty() { + return true; + }; + + // Or, if the target has more than one block reaching it. + debug_assert!(self.cfg.pred_iter(target).next() != None); + + self.cfg.pred_iter(target).nth(1).is_some() + } +} diff --git a/cranelift/codegen/src/regalloc/coalescing.rs b/cranelift/codegen/src/regalloc/coalescing.rs new file mode 100644 index 0000000000..4067a950cf --- /dev/null +++ b/cranelift/codegen/src/regalloc/coalescing.rs @@ -0,0 +1,1107 @@ +//! Constructing Conventional SSA form. +//! +//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related +//! values do not interfere. We construct CSSA by building virtual registers that are as large as +//! possible and inserting copies where necessary such that all argument values passed to an block +//! parameter will belong to the same virtual register as the block parameter value itself. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dbg::DisplayList; +use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::fx::FxHashMap; +use crate::ir::{self, InstBuilder, ProgramOrder}; +use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value}; +use crate::isa::{EncInfo, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::virtregs::{VirtReg, VirtRegs}; +use crate::timing; +use alloc::vec::Vec; +use core::cmp; +use core::fmt; +use core::iter; +use core::slice; +use log::debug; + +// # Implementation +// +// The coalescing algorithm implemented follows this paper fairly closely: +// +// Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and +// live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534 +// +// We use a more efficient dominator forest representation (a linear stack) described here: +// +// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for +// correctness, code quality and efficiency. +// +// The algorithm has two main phases: +// +// Phase 1: Union-find. +// +// We use the union-find support in `VirtRegs` to build virtual registers such that block parameter +// values always belong to the same virtual register as their corresponding block arguments at the +// predecessor branches. Trivial interferences between parameter and argument value live ranges are +// detected and resolved before unioning congruence classes, but non-trivial interferences between +// values that end up in the same congruence class are possible. +// +// Phase 2: Dominator forests. +// +// The virtual registers formed in phase 1 can contain interferences that we need to detect and +// eliminate. By ordering the values in a virtual register according to a dominator tree pre-order, +// we can identify all interferences in the virtual register in linear time. +// +// Interfering values are isolated and virtual registers rebuilt. + +/// Data structures to be used by the coalescing pass. +pub struct Coalescing { + preorder: DominatorTreePreorder, + forest: DomForest, + vcopies: VirtualCopies, + values: Vec, + predecessors: Vec, + backedges: Vec, +} + +/// One-shot context created once per invocation. +struct Context<'a> { + isa: &'a dyn TargetIsa, + encinfo: EncInfo, + + func: &'a mut Function, + cfg: &'a ControlFlowGraph, + domtree: &'a DominatorTree, + preorder: &'a DominatorTreePreorder, + liveness: &'a mut Liveness, + virtregs: &'a mut VirtRegs, + + forest: &'a mut DomForest, + vcopies: &'a mut VirtualCopies, + values: &'a mut Vec, + predecessors: &'a mut Vec, + backedges: &'a mut Vec, +} + +impl Coalescing { + /// Create a new coalescing pass. + pub fn new() -> Self { + Self { + forest: DomForest::new(), + preorder: DominatorTreePreorder::new(), + vcopies: VirtualCopies::new(), + values: Vec::new(), + predecessors: Vec::new(), + backedges: Vec::new(), + } + } + + /// Clear all data structures in this coalescing pass. + pub fn clear(&mut self) { + self.forest.clear(); + self.vcopies.clear(); + self.values.clear(); + self.predecessors.clear(); + self.backedges.clear(); + } + + /// Convert `func` to Conventional SSA form and build virtual registers in the process. + pub fn conventional_ssa( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + liveness: &mut Liveness, + virtregs: &mut VirtRegs, + ) { + let _tt = timing::ra_cssa(); + debug!("Coalescing for:\n{}", func.display(isa)); + self.preorder.compute(domtree, &func.layout); + let mut context = Context { + isa, + encinfo: isa.encoding_info(), + func, + cfg, + domtree, + preorder: &self.preorder, + liveness, + virtregs, + forest: &mut self.forest, + vcopies: &mut self.vcopies, + values: &mut self.values, + predecessors: &mut self.predecessors, + backedges: &mut self.backedges, + }; + + // Run phase 1 (union-find) of the coalescing algorithm on the current function. + for &block in domtree.cfg_postorder() { + context.union_find_block(block); + } + context.finish_union_find(); + + // Run phase 2 (dominator forests) on the current function. + context.process_vregs(); + } +} + +/// Phase 1: Union-find. +/// +/// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`. +impl<'a> Context<'a> { + /// Run the union-find algorithm on the parameter values on `block`. + /// + /// This ensure that all block parameters will belong to the same virtual register as their + /// corresponding arguments at all predecessor branches. + pub fn union_find_block(&mut self, block: Block) { + let num_params = self.func.dfg.num_block_params(block); + if num_params == 0 { + return; + } + + self.isolate_conflicting_params(block, num_params); + + for i in 0..num_params { + self.union_pred_args(block, i); + } + } + + // Identify block parameter values that are live at one of the predecessor branches. + // + // Such a parameter value will conflict with any argument value at the predecessor branch, so + // it must be isolated by inserting a copy. + fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) { + debug_assert_eq!(num_params, self.func.dfg.num_block_params(block)); + // The only way a parameter value can interfere with a predecessor branch is if the block is + // dominating the predecessor branch. That is, we are looking for loop back-edges. + for BlockPredecessor { + block: pred_block, + inst: pred_inst, + } in self.cfg.pred_iter(block) + { + // The quick pre-order dominance check is accurate because the block parameter is defined + // at the top of the block before any branches. + if !self.preorder.dominates(block, pred_block) { + continue; + } + + debug!( + " - checking {} params at back-edge {}: {}", + num_params, + pred_block, + self.func.dfg.display_inst(pred_inst, self.isa) + ); + + // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values + // to be live at the use. + for i in 0..num_params { + let param = self.func.dfg.block_params(block)[i]; + if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) { + self.isolate_param(block, param); + } + } + } + } + + // Union block parameter value `num` with the corresponding block arguments on the predecessor + // branches. + // + // Detect cases where the argument value is live-in to `block` so it conflicts with any block + // parameter. Isolate the argument in those cases before unioning it with the parameter value. + fn union_pred_args(&mut self, block: Block, argnum: usize) { + let param = self.func.dfg.block_params(block)[argnum]; + + for BlockPredecessor { + block: pred_block, + inst: pred_inst, + } in self.cfg.pred_iter(block) + { + let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + + // Never coalesce incoming function parameters on the stack. These parameters are + // pre-spilled, and the rest of the virtual register would be forced to spill to the + // `incoming_arg` stack slot too. + if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) { + if Some(def_block) == self.func.layout.entry_block() + && self.func.signature.params[def_num].location.is_stack() + { + debug!("-> isolating function stack parameter {}", arg); + let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); + continue; + } + } + + // Check for basic interference: If `arg` overlaps a value defined at the entry to + // `block`, it can never be used as an block argument. + let interference = { + let lr = &self.liveness[arg]; + + // There are two ways the argument value can interfere with `block`: + // + // 1. It is defined in a dominating block and live-in to `block`. + // 2. If is itself a parameter value for `block`. This case should already have been + // eliminated by `isolate_conflicting_params()`. + debug_assert!( + lr.def() != block.into(), + "{} parameter {} was missed by isolate_conflicting_params()", + block, + arg + ); + + // The only other possibility is that `arg` is live-in to `block`. + lr.is_livein(block, &self.func.layout) + }; + + if interference { + let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); + } else { + self.virtregs.union(param, arg); + } + } + } + + // Isolate block parameter value `param` on `block`. + // + // When `param=v10`: + // + // block1(v10: i32): + // foo + // + // becomes: + // + // block1(v11: i32): + // v10 = copy v11 + // foo + // + // This function inserts the copy and updates the live ranges of the old and new parameter + // values. Returns the new parameter value. + fn isolate_param(&mut self, block: Block, param: Value) -> Value { + debug_assert_eq!( + self.func.dfg.value_def(param).pp(), + ExpandedProgramPoint::Block(block) + ); + let ty = self.func.dfg.value_type(param); + let new_val = self.func.dfg.replace_block_param(param, ty); + + // Insert a copy instruction at the top of `block`. + let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block); + if let Some(inst) = pos.current_inst() { + pos.use_srcloc(inst); + } + pos.ins().with_result(param).copy(new_val); + let inst = pos.built_inst(); + self.liveness.move_def_locally(param, inst); + + debug!( + "-> inserted {}, following {}({}: {})", + pos.display_inst(inst), + block, + new_val, + ty + ); + + // Create a live range for the new value. + // TODO: Should we handle ghost values? + let affinity = Affinity::new( + &self + .encinfo + .operand_constraints(pos.func.encodings[inst]) + .expect("Bad copy encoding") + .outs[0], + ); + self.liveness.create_dead(new_val, block, affinity); + self.liveness + .extend_locally(new_val, block, inst, &pos.func.layout); + + new_val + } + + // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`. + // + // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block + // argument is `pred_val`. Since the argument value interferes with the corresponding block + // parameter at the destination, a copy is used instead: + // + // brnz v1, block2(v10) + // + // Becomes: + // + // v11 = copy v10 + // brnz v1, block2(v11) + // + // This way the interference with the block parameter is avoided. + // + // A live range for the new value is created while the live range for `pred_val` is left + // unaltered. + // + // The new argument value is returned. + fn isolate_arg( + &mut self, + pred_block: Block, + pred_inst: Inst, + argnum: usize, + pred_val: Value, + ) -> Value { + let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst); + pos.use_srcloc(pred_inst); + let copy = pos.ins().copy(pred_val); + let inst = pos.built_inst(); + + // Create a live range for the new value. + // TODO: Handle affinity for ghost values. + let affinity = Affinity::new( + &self + .encinfo + .operand_constraints(pos.func.encodings[inst]) + .expect("Bad copy encoding") + .outs[0], + ); + self.liveness.create_dead(copy, inst, affinity); + self.liveness + .extend_locally(copy, pred_block, pred_inst, &pos.func.layout); + + pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; + + debug!( + "-> inserted {}, before {}: {}", + pos.display_inst(inst), + pred_block, + pos.display_inst(pred_inst) + ); + + copy + } + + /// Finish the union-find part of the coalescing algorithm. + /// + /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric + /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`. + fn finish_union_find(&mut self) { + self.virtregs.finish_union_find(None); + debug!("After union-find phase:{}", self.virtregs); + } +} + +/// Phase 2: Dominator forests. +/// +/// The main entry point is `process_vregs()`. +impl<'a> Context<'a> { + /// Check al virtual registers for interference and fix conflicts. + pub fn process_vregs(&mut self) { + for vreg in self.virtregs.all_virtregs() { + self.process_vreg(vreg); + } + } + + // Check `vreg` for interferences and fix conflicts. + fn process_vreg(&mut self, vreg: VirtReg) { + if !self.check_vreg(vreg) { + self.synthesize_vreg(vreg); + } + } + + // Check `vreg` for interferences. + // + // We use a Budimlic dominator forest to check for interferences between the values in `vreg` + // and identify values that should be isolated. + // + // Returns true if `vreg` is free of interference. + fn check_vreg(&mut self, vreg: VirtReg) -> bool { + // Order the values according to the dominator pre-order of their definition. + let values = self.virtregs.sort_values(vreg, self.func, self.preorder); + debug!("Checking {} = {}", vreg, DisplayList(values)); + + // Now push the values in order to the dominator forest. + // This gives us the closest dominating value def for each of the values. + self.forest.clear(); + for &value in values { + let node = Node::value(value, 0, self.func); + + // Push this value and get the nearest dominating def back. + let parent = match self + .forest + .push_node(node, self.func, self.domtree, self.preorder) + { + None => continue, + Some(n) => n, + }; + + // Check for interference between `parent` and `value`. Since `parent` dominates + // `value`, we only have to check if it overlaps the definition. + if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) { + // The two values are interfering, so they can't be in the same virtual register. + debug!("-> interference: {} overlaps def of {}", parent, value); + return false; + } + } + + // No interference found. + true + } + + /// Destroy and rebuild `vreg` by iterative coalescing. + /// + /// When detecting that a virtual register formed in phase 1 contains interference, we have to + /// start over in a more careful way. We'll split the vreg into individual values and then + /// reassemble virtual registers using an iterative algorithm of pairwise merging. + /// + /// It is possible to recover multiple large virtual registers this way while still avoiding + /// a lot of copies. + fn synthesize_vreg(&mut self, vreg: VirtReg) { + self.vcopies.initialize( + self.virtregs.values(vreg), + self.func, + self.cfg, + self.preorder, + ); + debug!( + "Synthesizing {} from {} branches and params {}", + vreg, + self.vcopies.branches.len(), + DisplayList(&self.vcopies.params) + ); + self.virtregs.remove(vreg); + + while let Some(param) = self.vcopies.next_param() { + self.merge_param(param); + self.vcopies.merged_param(param, self.func); + } + } + + /// Merge block parameter value `param` with virtual registers at its predecessors. + fn merge_param(&mut self, param: Value) { + let (block, argnum) = match self.func.dfg.value_def(param) { + ir::ValueDef::Param(e, n) => (e, n), + ir::ValueDef::Result(_, _) => panic!("Expected parameter"), + }; + + // Collect all the predecessors and rearrange them. + // + // The order we process the predecessors matters because once one predecessor's virtual + // register is merged, it can cause interference with following merges. This means that the + // first predecessors processed are more likely to be copy-free. We want an ordering that + // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses + // instruction numbers which is not great for reproducible test cases. + // + // First merge loop back-edges in layout order, on the theory that shorter back-edges are + // more sensitive to inserted copies. + // + // Second everything else in reverse layout order. Again, short forward branches get merged + // first. There can also be backwards branches mixed in here, though, as long as they are + // not loop backedges. + debug_assert!(self.predecessors.is_empty()); + debug_assert!(self.backedges.is_empty()); + for BlockPredecessor { + block: pred_block, + inst: pred_inst, + } in self.cfg.pred_iter(block) + { + if self.preorder.dominates(block, pred_block) { + self.backedges.push(pred_inst); + } else { + self.predecessors.push(pred_inst); + } + } + // Order instructions in reverse order so we can pop them off the back. + { + let l = &self.func.layout; + self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a)); + self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b)); + self.predecessors.extend_from_slice(&self.backedges); + self.backedges.clear(); + } + + while let Some(pred_inst) = self.predecessors.pop() { + let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + + // We want to merge the vreg containing `param` with the vreg containing `arg`. + if self.try_merge_vregs(param, arg) { + continue; + } + + // Can't merge because of interference. Insert a copy instead. + let pred_block = self.func.layout.pp_block(pred_inst); + let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); + self.virtregs + .insert_single(param, new_arg, self.func, self.preorder); + } + } + + /// Merge the virtual registers containing `param` and `arg` if possible. + /// + /// Use self.vcopies to check for virtual copy interference too. + /// + /// Returns true if the virtual registers are successfully merged. + fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool { + if self.virtregs.same_class(param, arg) { + return true; + } + + if !self.can_merge_vregs(param, arg) { + return false; + } + + let _vreg = self.virtregs.unify(self.values); + debug!("-> merged into {} = {}", _vreg, DisplayList(self.values)); + true + } + + /// Check if it is possible to merge two virtual registers. + /// + /// Also leave `self.values` with the ordered list of values in the merged vreg. + fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool { + // We only need an immutable function reference. + let func = &*self.func; + let domtree = self.domtree; + let preorder = self.preorder; + + // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties + // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`. + self.vcopies + .set_filter([param, arg], func, self.virtregs, preorder); + + // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual + // registers and the filtered virtual copies. + let v0 = self.virtregs.congruence_class(¶m); + let v1 = self.virtregs.congruence_class(&arg); + debug!( + " - set 0: {}\n - set 1: {}", + DisplayList(v0), + DisplayList(v1) + ); + let nodes = MergeNodes::new( + func, + preorder, + MergeNodes::new( + func, + preorder, + v0.iter().map(|&value| Node::value(value, 0, func)), + v1.iter().map(|&value| Node::value(value, 1, func)), + ), + self.vcopies.iter(func), + ); + + // Now push the values in order to the dominator forest. + // This gives us the closest dominating value def for each of the values. + self.forest.clear(); + self.values.clear(); + for node in nodes { + // Accumulate ordered values for the new vreg. + if node.is_value() { + self.values.push(node.value); + } + + // Push this value and get the nearest dominating def back. + let parent = match self.forest.push_node(node, func, domtree, preorder) { + None => { + if node.is_vcopy { + self.forest.pop_last(); + } + continue; + } + Some(n) => n, + }; + + if node.is_vcopy { + // Vcopy nodes don't represent interference if they are copies of the parent value. + // In that case, the node must be removed because the parent value can still be + // live belong the vcopy. + if parent.is_vcopy || node.value == parent.value { + self.forest.pop_last(); + continue; + } + + // Check if the parent value interferes with the virtual copy. + let inst = node.def.unwrap_inst(); + if node.set_id != parent.set_id + && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout) + { + debug!( + " - interference: {} overlaps vcopy at {}:{}", + parent, + node.block, + self.func.dfg.display_inst(inst, self.isa) + ); + return false; + } + + // Keep this vcopy on the stack. It will save us a few interference checks. + continue; + } + + // Parent vcopies never represent any interference. We only keep them on the stack to + // avoid an interference check against a value higher up. + if parent.is_vcopy { + continue; + } + + // Both node and parent are values, so check for interference. + debug_assert!(node.is_value() && parent.is_value()); + if node.set_id != parent.set_id + && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) + { + // The two values are interfering. + debug!(" - interference: {} overlaps def of {}", parent, node.value); + return false; + } + } + + // The values vector should receive all values. + debug_assert_eq!(v0.len() + v1.len(), self.values.len()); + + // No interference found. + true + } +} + +/// Dominator forest. +/// +/// This is a utility type used for detecting interference in virtual registers, where each virtual +/// register is a list of values ordered according to the dominator tree pre-order. +/// +/// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack +/// representation in the Boissinot paper. Our version of the linear stack is slightly modified +/// because we have a pre-order of the dominator tree at the block granularity, not basic block +/// granularity. +/// +/// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed, +/// `push_node` will return the nearest previously pushed value that dominates the definition. +#[allow(dead_code)] +struct DomForest { + // Stack representing the rightmost edge of the dominator forest so far, ending in the last + // element of `values`. + // + // At all times, the block of each element in the stack dominates the block of the next one. + stack: Vec, +} + +/// A node in the dominator forest. +#[derive(Clone, Copy, Debug)] +#[allow(dead_code)] +struct Node { + /// The program point where the live range is defined. + def: ExpandedProgramPoint, + /// block containing `def`. + block: Block, + /// Is this a virtual copy or a value? + is_vcopy: bool, + /// Set identifier. + set_id: u8, + /// For a value node: The value defined at `def`. + /// For a vcopy node: The relevant branch argument at `def`. + value: Value, +} + +impl Node { + /// Create a node representing `value`. + pub fn value(value: Value, set_id: u8, func: &Function) -> Self { + let def = func.dfg.value_def(value).pp(); + let block = func.layout.pp_block(def); + Self { + def, + block, + is_vcopy: false, + set_id, + value, + } + } + + /// Create a node representing a virtual copy. + pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self { + let def = branch.into(); + let block = func.layout.pp_block(def); + Self { + def, + block, + is_vcopy: true, + set_id, + value, + } + } + + /// IF this a value node? + pub fn is_value(&self) -> bool { + !self.is_vcopy + } +} + +impl fmt::Display for Node { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_vcopy { + write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block) + } else { + write!(f, "{}:{}@{}", self.set_id, self.value, self.block) + } + } +} + +impl DomForest { + /// Create a new empty dominator forest. + pub fn new() -> Self { + Self { stack: Vec::new() } + } + + /// Clear all data structures in this dominator forest. + pub fn clear(&mut self) { + self.stack.clear(); + } + + /// Add a single node to the forest. + /// + /// Update the stack so its dominance invariants are preserved. Detect a parent node on the + /// stack which is the closest one dominating the new node and return it. + fn push_node( + &mut self, + node: Node, + func: &Function, + domtree: &DominatorTree, + preorder: &DominatorTreePreorder, + ) -> Option { + // The stack contains the current sequence of dominating defs. Pop elements until we + // find one whose block dominates `node.block`. + while let Some(top) = self.stack.pop() { + if preorder.dominates(top.block, node.block) { + // This is the right insertion spot for `node`. + self.stack.push(top); + self.stack.push(node); + + // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does + // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` + // program point may be below the last branch in `top.block` that dominates + // `node.def`. + // + // We do know, though, that if there is a nearest value dominating `node.def`, it + // will be on the stack. We just need to find the last stack entry that actually + // dominates. + let mut last_dom = node.def; + for &n in self.stack.iter().rev().skip(1) { + // If the node is defined at the block header, it does in fact dominate + // everything else pushed on the stack. + let def_inst = match n.def { + ExpandedProgramPoint::Block(_) => return Some(n), + ExpandedProgramPoint::Inst(i) => i, + }; + + // We need to find the last program point in `n.block` to dominate `node.def`. + last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) { + None => n.block.into(), + Some(inst) => { + if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater { + return Some(n); + } + inst.into() + } + }; + } + + // No real dominator found on the stack. + return None; + } + } + + // No dominators, start a new tree in the forest. + self.stack.push(node); + None + } + + pub fn pop_last(&mut self) { + self.stack.pop().expect("Stack is empty"); + } +} + +/// Virtual copies. +/// +/// When building a full virtual register at once, like phase 1 does with union-find, it is good +/// enough to check for interference between the values in the full virtual register like +/// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual +/// registers that don't represent the full transitive closure of the block argument-parameter +/// relation. This means that just checking for interference between values is inadequate. +/// +/// Example: +/// +/// v1 = iconst.i32 1 +/// brnz v10, block1(v1) +/// v2 = iconst.i32 2 +/// brnz v11, block1(v2) +/// return v1 +/// +/// block1(v3: i32): +/// v4 = iadd v3, v1 +/// +/// With just value interference checking, we could build the virtual register [v3, v1] since those +/// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2 +/// interfere. However, we can't resolve that interference either by inserting a copy: +/// +/// v1 = iconst.i32 1 +/// brnz v10, block1(v1) +/// v2 = iconst.i32 2 +/// v20 = copy v2 <-- new value +/// brnz v11, block1(v20) +/// return v1 +/// +/// block1(v3: i32): +/// v4 = iadd v3, v1 +/// +/// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We +/// shouldn't have placed v1 and v3 in the same virtual register to begin with. +/// +/// LLVM detects this form of interference by inserting copies in the predecessors of all phi +/// instructions, then attempting to delete the copies. This is quite expensive because it involves +/// creating a large number of copies and value. +/// +/// We'll detect this form of interference with *virtual copies*: Each block parameter value that +/// hasn't yet been fully merged with its block argument values is given a set of virtual copies at +/// the predecessors. Any candidate value to be merged is checked for interference against both the +/// virtual register and the virtual copies. +/// +/// In the general case, we're checking if two virtual registers can be merged, and both can +/// contain incomplete block parameter values with associated virtual copies. +/// +/// The `VirtualCopies` struct represents a set of incomplete parameters and their associated +/// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes +/// representing the virtual copies in both vregs. +struct VirtualCopies { + // Incomplete block parameters. These don't need to belong to the same virtual register. + params: Vec, + + // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks + // whose parameters can be found in `params`. + // + // Ordered by dominator tree pre-order of the branch instructions. + branches: Vec<(Inst, Block)>, + + // Filter for the currently active node iterator. + // + // An block => (set_id, num) entry means that branches to `block` are active in `set_id` with + // branch argument number `num`. + filter: FxHashMap, +} + +impl VirtualCopies { + /// Create an empty VirtualCopies struct. + pub fn new() -> Self { + Self { + params: Vec::new(), + branches: Vec::new(), + filter: FxHashMap(), + } + } + + /// Clear all state. + pub fn clear(&mut self) { + self.params.clear(); + self.branches.clear(); + self.filter.clear(); + } + + /// Initialize virtual copies from the (interfering) values in a union-find virtual register + /// that is going to be broken up and reassembled iteratively. + /// + /// The values are assumed to be in domtree pre-order. + /// + /// This will extract the block parameter values and associate virtual copies all of them. + pub fn initialize( + &mut self, + values: &[Value], + func: &Function, + cfg: &ControlFlowGraph, + preorder: &DominatorTreePreorder, + ) { + self.clear(); + + let mut last_block = None; + for &val in values { + if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) { + self.params.push(val); + + // We may have multiple parameters from the same block, but we only need to collect + // predecessors once. Also verify the ordering of values. + if let Some(last) = last_block { + match preorder.pre_cmp_block(last, block) { + cmp::Ordering::Less => {} + cmp::Ordering::Equal => continue, + cmp::Ordering::Greater => panic!("values in wrong order"), + } + } + + // This block hasn't been seen before. + for BlockPredecessor { + inst: pred_inst, .. + } in cfg.pred_iter(block) + { + self.branches.push((pred_inst, block)); + } + last_block = Some(block); + } + } + + // Reorder the predecessor branches as required by the dominator forest. + self.branches + .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout)); + } + + /// Get the next unmerged parameter value. + pub fn next_param(&self) -> Option { + self.params.last().cloned() + } + + /// Indicate that `param` is now fully merged. + pub fn merged_param(&mut self, param: Value, func: &Function) { + let popped = self.params.pop(); + debug_assert_eq!(popped, Some(param)); + + // The domtree pre-order in `self.params` guarantees that all parameters defined at the + // same block will be adjacent. This means we can see when all parameters at an block have been + // merged. + // + // We don't care about the last parameter - when that is merged we are done. + let last = match self.params.last() { + None => return, + Some(x) => *x, + }; + let block = func.dfg.value_def(param).unwrap_block(); + if func.dfg.value_def(last).unwrap_block() == block { + // We're not done with `block` parameters yet. + return; + } + + // Alright, we know there are no remaining `block` parameters in `self.params`. This means we + // can get rid of the `block` predecessors in `self.branches`. We don't have to, the + // `VCopyIter` will just skip them, but this reduces its workload. + self.branches.retain(|&(_, dest)| dest != block); + } + + /// Set a filter for the virtual copy nodes we're generating. + /// + /// Only generate nodes for parameter values that are in the same congruence class as `reprs`. + /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's + /// congruence class. + pub fn set_filter( + &mut self, + reprs: [Value; 2], + func: &Function, + virtregs: &VirtRegs, + preorder: &DominatorTreePreorder, + ) { + self.filter.clear(); + + // Parameters in `self.params` are ordered according to the domtree per-order, and they are + // removed from the back once they are fully merged. This means we can stop looking for + // parameters once we're beyond the last one. + let last_param = *self.params.last().expect("No more parameters"); + let limit = func.dfg.value_def(last_param).unwrap_block(); + + for (set_id, repr) in reprs.iter().enumerate() { + let set_id = set_id as u8; + for &value in virtregs.congruence_class(repr) { + if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) { + if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater { + // Stop once we're outside the bounds of `self.params`. + break; + } + self.filter.insert(block, (set_id, num)); + } + } + } + } + + /// Look up the set_id and argument number for `block` in the current filter. + /// + /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise + /// returns `(set_id, argnum)` for an active parameter defined at `block`. + fn lookup(&self, block: Block) -> Option<(u8, usize)> { + self.filter.get(&block).cloned() + } + + /// Get an iterator of dom-forest nodes corresponding to the current filter. + pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter { + VCopyIter { + func, + vcopies: self, + branches: self.branches.iter(), + } + } +} + +/// Virtual copy iterator. +/// +/// This iterator produces dom-forest nodes corresponding to the current filter in the virtual +/// copies container. +struct VCopyIter<'a> { + func: &'a Function, + vcopies: &'a VirtualCopies, + branches: slice::Iter<'a, (Inst, Block)>, +} + +impl<'a> Iterator for VCopyIter<'a> { + type Item = Node; + + fn next(&mut self) -> Option { + while let Some(&(branch, dest)) = self.branches.next() { + if let Some((set_id, argnum)) = self.vcopies.lookup(dest) { + let arg = self.func.dfg.inst_variable_args(branch)[argnum]; + return Some(Node::vcopy(branch, arg, set_id, self.func)); + } + } + None + } +} + +/// Node-merging iterator. +/// +/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. +struct MergeNodes<'a, IA, IB> +where + IA: Iterator, + IB: Iterator, +{ + a: iter::Peekable, + b: iter::Peekable, + layout: &'a ir::Layout, + preorder: &'a DominatorTreePreorder, +} + +impl<'a, IA, IB> MergeNodes<'a, IA, IB> +where + IA: Iterator, + IB: Iterator, +{ + pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self { + MergeNodes { + a: a.peekable(), + b: b.peekable(), + layout: &func.layout, + preorder, + } + } +} + +impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB> +where + IA: Iterator, + IB: Iterator, +{ + type Item = Node; + + fn next(&mut self) -> Option { + let ord = match (self.a.peek(), self.b.peek()) { + (Some(a), Some(b)) => { + let layout = self.layout; + self.preorder + .pre_cmp_block(a.block, b.block) + .then_with(|| layout.cmp(a.def, b.def)) + } + (Some(_), None) => cmp::Ordering::Less, + (None, Some(_)) => cmp::Ordering::Greater, + (None, None) => return None, + }; + // When the nodes compare equal, prefer the `a` side. + if ord != cmp::Ordering::Greater { + self.a.next() + } else { + self.b.next() + } + } +} diff --git a/cranelift/codegen/src/regalloc/coloring.rs b/cranelift/codegen/src/regalloc/coloring.rs new file mode 100644 index 0000000000..eb3cb513c8 --- /dev/null +++ b/cranelift/codegen/src/regalloc/coloring.rs @@ -0,0 +1,1324 @@ +//! Register allocator coloring pass. +//! +//! The coloring pass assigns a physical register to every SSA value with a register affinity, +//! under the assumption that the register pressure has been lowered sufficiently by spilling and +//! splitting. +//! +//! # Preconditions +//! +//! The coloring pass doesn't work on arbitrary code. Certain preconditions must be satisfied: +//! +//! 1. All instructions must be legalized and assigned an encoding. The encoding recipe guides the +//! register assignments and provides exact constraints. +//! +//! 2. Instructions with tied operands must be in a coloring-friendly state. Specifically, the +//! values used by the tied operands must be killed by the instruction. This can be achieved by +//! inserting a `copy` to a new value immediately before the two-address instruction. +//! +//! 3. If a value is bound to more than one operand on the same instruction, the operand +//! constraints must be compatible. This can also be achieved by inserting copies so the +//! incompatible operands get different values. +//! +//! 4. The register pressure must be lowered sufficiently by inserting spill code. Register +//! operands are allowed to read spilled values, but each such instance must be counted as using +//! a register. +//! +//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed +//! as arguments when branching to an block must belong to the same virtual register as the +//! corresponding block argument value. +//! +//! # Iteration order +//! +//! The SSA property guarantees that whenever the live range of two values overlap, one of the +//! values will be live at the definition point of the other value. If we visit the instructions in +//! a topological order relative to the dominance relation, we can assign colors to the values +//! defined by the instruction and only consider the colors of other values that are live at the +//! instruction. +//! +//! The first time we see a branch to an block, the block's argument values are colored to match the +//! registers currently holding branch argument values passed to the predecessor branch. By +//! visiting blocks in a CFG topological order, we guarantee that at least one predecessor branch has +//! been visited before the destination block. Therefore, the block's arguments are already colored. +//! +//! The exception is the entry block whose arguments are colored from the ABI requirements. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{ArgumentLoc, InstBuilder, ValueDef}; +use crate::ir::{Block, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc}; +use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit}; +use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa}; +use crate::packed_option::PackedOption; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::diversion::RegDiversions; +use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::liverange::LiveRange; +use crate::regalloc::register_set::RegisterSet; +use crate::regalloc::solver::{Solver, SolverError}; +use crate::timing; +use core::mem; +use log::debug; + +/// Data structures for the coloring pass. +/// +/// These are scratch space data structures that can be reused between invocations. +pub struct Coloring { + divert: RegDiversions, + solver: Solver, +} + +/// Kinds of ABI parameters. +enum AbiParams { + Parameters(SigRef), + Returns, +} + +/// Bundle of references that the coloring algorithm needs. +/// +/// Some of the needed mutable references are passed around as explicit function arguments so we +/// can avoid many fights with the borrow checker over mutable borrows of `self`. This includes the +/// `Function` and `LiveValueTracker` references. +/// +/// Immutable context information and mutable references that don't need to be borrowed across +/// method calls should go in this struct. +struct Context<'a> { + // Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + // Cached ISA information. + // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. + reginfo: RegInfo, + encinfo: EncInfo, + + // References to contextual data structures we need. + cfg: &'a ControlFlowGraph, + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + + // References to working set data structures. + // If we need to borrow out of a data structure across a method call, it must be passed as a + // function argument instead, see the `LiveValueTracker` arguments. + divert: &'a mut RegDiversions, + solver: &'a mut Solver, + + // Pristine set of registers that the allocator can use. + // This set remains immutable, we make clones. + usable_regs: RegisterSet, + + uses_pinned_reg: bool, +} + +impl Coloring { + /// Allocate scratch space data structures for the coloring pass. + pub fn new() -> Self { + Self { + divert: RegDiversions::new(), + solver: Solver::new(), + } + } + + /// Clear all data structures in this coloring pass. + pub fn clear(&mut self) { + self.divert.clear(); + self.solver.clear(); + } + + /// Run the coloring algorithm over `func`. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + liveness: &mut Liveness, + tracker: &mut LiveValueTracker, + ) { + let _tt = timing::ra_coloring(); + debug!("Coloring for:\n{}", func.display(isa)); + let mut ctx = Context { + usable_regs: isa.allocatable_registers(func), + uses_pinned_reg: isa.flags().enable_pinned_reg(), + cur: EncCursor::new(func, isa), + reginfo: isa.register_info(), + encinfo: isa.encoding_info(), + cfg, + domtree, + liveness, + divert: &mut self.divert, + solver: &mut self.solver, + }; + ctx.run(tracker) + } +} + +impl<'a> Context<'a> { + /// Is the pinned register usage enabled, and is this register the pinned register? + #[inline] + fn is_pinned_reg(&self, rc: RegClass, reg: RegUnit) -> bool { + rc.is_pinned_reg(self.uses_pinned_reg, reg) + } + + /// Run the coloring algorithm. + fn run(&mut self, tracker: &mut LiveValueTracker) { + self.cur + .func + .locations + .resize(self.cur.func.dfg.num_values()); + + // Visit blocks in reverse post-order. We need to ensure that at least one predecessor has + // been visited before each block. That guarantees that the block arguments have been colored. + for &block in self.domtree.cfg_postorder().iter().rev() { + self.visit_block(block, tracker); + } + } + + /// Visit `block`, assuming that the immediate dominator has already been visited. + fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { + debug!("Coloring {}:", block); + let mut regs = self.visit_block_header(block, tracker); + tracker.drop_dead_params(); + + // Now go through the instructions in `block` and color the values they define. + self.cur.goto_top(block); + while let Some(inst) = self.cur.next_inst() { + self.cur.use_srcloc(inst); + let opcode = self.cur.func.dfg[inst].opcode(); + if !opcode.is_ghost() { + // This is an instruction which either has an encoding or carries ABI-related + // register allocation constraints. + let enc = self.cur.func.encodings[inst]; + let constraints = self.encinfo.operand_constraints(enc); + if self.visit_inst(inst, constraints, tracker, &mut regs) { + self.replace_global_defines(inst, tracker); + // Restore cursor location after `replace_global_defines` moves it. + // We want to revisit the copy instructions it inserted. + self.cur.goto_inst(inst); + } + } else { + // This is a ghost instruction with no encoding and no extra constraints. + let (_throughs, kills) = tracker.process_ghost(inst); + self.process_ghost_kills(kills, &mut regs); + } + tracker.drop_dead(inst); + + // We are not able to insert any regmove for diversion or un-diversion after the first + // branch. Instead, we record the diversion to be restored at the entry of the next block, + // which should have a single predecessor. + if opcode.is_branch() { + // The next instruction is necessarily an unconditional branch. + if let Some(branch) = self.cur.next_inst() { + debug!( + "Skip coloring {}\n from {}\n with diversions {}", + self.cur.display_inst(branch), + regs.input.display(&self.reginfo), + self.divert.display(&self.reginfo) + ); + use crate::ir::instructions::BranchInfo::*; + let target = match self.cur.func.dfg.analyze_branch(branch) { + NotABranch | Table(_, _) => panic!( + "unexpected instruction {} after a conditional branch", + self.cur.display_inst(branch) + ), + SingleDest(block, _) => block, + }; + + // We have a single branch with a single target, and an block with a single + // predecessor. Thus we can forward the diversion set to the next block. + if self.cfg.pred_iter(target).count() == 1 { + // Transfer the diversion to the next block. + self.divert + .save_for_block(&mut self.cur.func.entry_diversions, target); + debug!( + "Set entry-diversion for {} to\n {}", + target, + self.divert.display(&self.reginfo) + ); + } else { + debug_assert!( + self.divert.is_empty(), + "Divert set is non-empty after the terminator." + ); + } + assert_eq!( + self.cur.next_inst(), + None, + "Unexpected instruction after a branch group." + ); + } else { + assert!(opcode.is_terminator()); + } + } + } + } + + /// Visit the `block` header. + /// + /// Initialize the set of live registers and color the arguments to `block`. + fn visit_block_header( + &mut self, + block: Block, + tracker: &mut LiveValueTracker, + ) -> AvailableRegs { + // Reposition the live value tracker and deal with the block arguments. + tracker.block_top( + block, + &self.cur.func.dfg, + self.liveness, + &self.cur.func.layout, + self.domtree, + ); + + // Copy the content of the registered diversions to be reused at the + // entry of this basic block. + self.divert.at_block(&self.cur.func.entry_diversions, block); + debug!( + "Start {} with entry-diversion set to\n {}", + block, + self.divert.display(&self.reginfo) + ); + + if self.cur.func.layout.entry_block() == Some(block) { + // Parameters on the entry block have ABI constraints. + self.color_entry_params(tracker.live()) + } else { + // The live-ins and parameters of a non-entry block have already been assigned a register. + // Reconstruct the allocatable set. + self.livein_regs(tracker.live()) + } + } + + /// Initialize a set of allocatable registers from the values that are live-in to a block. + /// These values must already be colored when the dominating blocks were processed. + /// + /// Also process the block arguments which were colored when the first predecessor branch was + /// encountered. + fn livein_regs(&self, live: &[LiveValue]) -> AvailableRegs { + // Start from the registers that are actually usable. We don't want to include any reserved + // registers in the set. + let mut regs = AvailableRegs::new(&self.usable_regs); + + for lv in live.iter().filter(|lv| !lv.is_dead) { + debug!( + "Live-in: {}:{} in {}", + lv.value, + lv.affinity.display(&self.reginfo), + self.divert + .get(lv.value, &self.cur.func.locations) + .display(&self.reginfo) + ); + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let loc = self.cur.func.locations[lv.value]; + let reg = match loc { + ValueLoc::Reg(reg) => reg, + ValueLoc::Unassigned => panic!("Live-in {} wasn't assigned", lv.value), + ValueLoc::Stack(ss) => { + panic!("Live-in {} is in {}, should be register", lv.value, ss) + } + }; + if lv.is_local { + regs.take(rc, reg, lv.is_local); + } else { + let loc = self.divert.get(lv.value, &self.cur.func.locations); + let reg_divert = match loc { + ValueLoc::Reg(reg) => reg, + ValueLoc::Unassigned => { + panic!("Diversion: Live-in {} wasn't assigned", lv.value) + } + ValueLoc::Stack(ss) => panic!( + "Diversion: Live-in {} is in {}, should be register", + lv.value, ss + ), + }; + regs.take_divert(rc, reg, reg_divert); + } + } + } + + regs + } + + /// Color the parameters on the entry block. + /// + /// These are function parameters that should already have assigned register units in the + /// function signature. + /// + /// Return the set of remaining allocatable registers after filtering out the dead arguments. + fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs { + let sig = &self.cur.func.signature; + debug_assert_eq!(sig.params.len(), args.len()); + + let mut regs = AvailableRegs::new(&self.usable_regs); + + for (lv, abi) in args.iter().zip(&sig.params) { + match lv.affinity { + Affinity::Reg(rci) => { + let rc = self.reginfo.rc(rci); + if let ArgumentLoc::Reg(reg) = abi.location { + if !lv.is_dead { + regs.take(rc, reg, lv.is_local); + } + self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); + } else { + // This should have been fixed by the reload pass. + panic!( + "Entry arg {} has {} affinity, but ABI {}", + lv.value, + lv.affinity.display(&self.reginfo), + abi.display(&self.reginfo) + ); + } + } + // The spiller will have assigned an incoming stack slot already. + Affinity::Stack => debug_assert!(abi.location.is_stack()), + // This is a ghost value, unused in the function. Don't assign it to a location + // either. + Affinity::Unassigned => {} + } + } + + regs + } + + /// Program the input-side ABI constraints for `inst` into the constraint solver. + /// + /// ABI constraints are the fixed register assignments useds for calls and returns. + fn program_input_abi(&mut self, inst: Inst, abi_params: AbiParams) { + let abi_types = match abi_params { + AbiParams::Parameters(sig) => &self.cur.func.dfg.signatures[sig].params, + AbiParams::Returns => &self.cur.func.signature.returns, + }; + + for (abi, &value) in abi_types + .iter() + .zip(self.cur.func.dfg.inst_variable_args(inst)) + { + if let ArgumentLoc::Reg(reg) = abi.location { + if let Affinity::Reg(rci) = self + .liveness + .get(value) + .expect("ABI register must have live range") + .affinity + { + let rc = self.reginfo.rc(rci); + let cur_reg = self.divert.reg(value, &self.cur.func.locations); + self.solver.reassign_in(value, rc, cur_reg, reg); + } else { + panic!("ABI argument {} should be in a register", value); + } + } + } + } + + /// Color the values defined by `inst` and insert any necessary shuffle code to satisfy + /// instruction constraints. + /// + /// Update `regs` to reflect the allocated registers after `inst`, including removing any dead + /// or killed values from the set. + /// + /// Returns true when the global values defined by `inst` must be replaced by local values. + fn visit_inst( + &mut self, + inst: Inst, + constraints: Option<&RecipeConstraints>, + tracker: &mut LiveValueTracker, + regs: &mut AvailableRegs, + ) -> bool { + debug!( + "Coloring {}\n from {}", + self.cur.display_inst(inst), + regs.input.display(&self.reginfo), + ); + + // block whose arguments should be colored to match the current branch instruction's + // arguments. + let mut color_dest_args = None; + + // Program the solver with register constraints for the input side. + self.solver.reset(®s.input); + + if let Some(constraints) = constraints { + self.program_input_constraints(inst, constraints.ins); + } + + let call_sig = self.cur.func.dfg.call_signature(inst); + if let Some(sig) = call_sig { + self.program_input_abi(inst, AbiParams::Parameters(sig)); + } else if self.cur.func.dfg[inst].opcode().is_return() { + self.program_input_abi(inst, AbiParams::Returns); + } else if self.cur.func.dfg[inst].opcode().is_branch() { + // This is a branch, so we need to make sure that globally live values are in their + // global registers. For blocks that take arguments, we also need to place the argument + // values in the expected registers. + if let Some(dest) = self.cur.func.dfg[inst].branch_destination() { + if self.program_block_arguments(inst, dest) { + color_dest_args = Some(dest); + } + } else { + // This is a multi-way branch like `br_table`. We only support arguments on + // single-destination branches. + debug_assert_eq!( + self.cur.func.dfg.inst_variable_args(inst).len(), + 0, + "Can't handle block arguments: {}", + self.cur.display_inst(inst) + ); + self.undivert_regs(|lr, _| !lr.is_local()); + } + } + + if self.solver.has_fixed_input_conflicts() { + self.divert_fixed_input_conflicts(tracker.live()); + } + + self.solver.inputs_done(); + + // Update the live value tracker with this instruction. + let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + + // Get rid of the killed values. + for lv in kills { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let reg = self.divert.reg(lv.value, &self.cur.func.locations); + + if self.is_pinned_reg(rc, reg) { + // Don't kill the pinned reg, either in the local or global register sets. + debug_assert!(lv.is_local, "pinned register SSA value can't be global"); + continue; + } + + debug!( + " kill {} in {} ({} {})", + lv.value, + self.reginfo.display_regunit(reg), + if lv.is_local { "local" } else { "global" }, + rc + ); + self.solver.add_kill(lv.value, rc, reg); + + // Update the global register set which has no diversions. + if !lv.is_local { + regs.global + .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); + } + } + } + + // This aligns with the " from" line at the top of the function. + debug!(" glob {}", regs.global.display(&self.reginfo)); + + // This flag is set when the solver failed to find a solution for the global defines that + // doesn't interfere with `regs.global`. We need to rewrite all of `inst`s global defines + // as local defines followed by copies. + let mut replace_global_defines = false; + + // Program the fixed output constraints before the general defines. This allows us to + // detect conflicts between fixed outputs and tied operands where the input value hasn't + // been converted to a solver variable. + if let Some(constraints) = constraints { + if constraints.fixed_outs { + self.program_fixed_outputs( + constraints.outs, + defs, + throughs, + &mut replace_global_defines, + ®s.global, + ); + } + } + + if let Some(sig) = call_sig { + self.program_output_abi( + sig, + defs, + throughs, + &mut replace_global_defines, + ®s.global, + ); + } + + if let Some(constraints) = constraints { + self.program_output_constraints( + inst, + constraints.outs, + defs, + &mut replace_global_defines, + ®s.global, + ); + } + + // Finally, we've fully programmed the constraint solver. + // We expect a quick solution in most cases. + let is_reload = match &self.cur.func.dfg[inst] { + InstructionData::Unary { + opcode: Opcode::Fill, + .. + } => true, + _ => false, + }; + + let output_regs = self + .solver + .quick_solve(®s.global, is_reload) + .unwrap_or_else(|_| { + debug!("quick_solve failed for {}", self.solver); + self.iterate_solution( + throughs, + ®s.global, + &mut replace_global_defines, + is_reload, + ) + }); + + // The solution and/or fixed input constraints may require us to shuffle the set of live + // registers around. + self.shuffle_inputs(&mut regs.input); + + // If this is the first time we branch to `dest`, color its arguments to match the current + // register state. + if let Some(dest) = color_dest_args { + self.color_block_params(inst, dest); + } + + // Apply the solution to the defs. + for v in self.solver.vars().iter().filter(|&v| v.is_define()) { + self.cur.func.locations[v.value] = ValueLoc::Reg(v.solution); + } + + // Tied defs are not part of the solution above. + // Copy register assignments from tied inputs to tied outputs. + if let Some(constraints) = constraints { + if constraints.tied_ops { + for (constraint, lv) in constraints.outs.iter().zip(defs) { + if let ConstraintKind::Tied(num) = constraint.kind { + let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; + let reg = self.divert.reg(arg, &self.cur.func.locations); + self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); + } + } + } + } + + // Update `regs` for the next instruction. + regs.input = output_regs; + for lv in defs { + let loc = self.cur.func.locations[lv.value]; + debug!( + " color {} -> {}{}", + lv.value, + loc.display(&self.reginfo), + if lv.is_local { + "" + } else if replace_global_defines { + " (global to be replaced)" + } else { + " (global)" + } + ); + + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let reg = loc.unwrap_reg(); + + debug_assert!( + !self.is_pinned_reg(rc, reg) + || self.cur.func.dfg[inst].opcode() == Opcode::GetPinnedReg, + "pinned register may not be part of outputs for '{}'.", + self.cur.func.dfg[inst].opcode() + ); + + if self.is_pinned_reg(rc, reg) { + continue; + } + + // Remove the dead defs. + if lv.endpoint == inst { + regs.input.free(rc, reg); + debug_assert!(lv.is_local); + } + + // Track globals in their undiverted locations. + if !lv.is_local && !replace_global_defines { + regs.global.take(rc, reg); + } + } + } + + self.forget_diverted(kills); + + replace_global_defines + } + + /// Program the input-side constraints for `inst` into the constraint solver. + fn program_input_constraints(&mut self, inst: Inst, constraints: &[OperandConstraint]) { + for (constraint, &arg_val) in constraints + .iter() + .zip(self.cur.func.dfg.inst_args(inst)) + .filter(|&(constraint, _)| constraint.kind != ConstraintKind::Stack) + { + // Reload pass is supposed to ensure that all arguments to register operands are + // already in a register. + let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); + match constraint.kind { + ConstraintKind::FixedReg(regunit) => { + // Add the fixed constraint even if `cur_reg == regunit`. + // It is possible that we will want to convert the value to a variable later, + // and this identity assignment prevents that from happening. + self.solver + .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); + } + ConstraintKind::FixedTied(regunit) => { + // The pinned register may not be part of a fixed tied requirement. If this + // becomes the case, then it must be changed to a different register. + debug_assert!( + !self.is_pinned_reg(constraint.regclass, regunit), + "see comment above" + ); + // See comment right above. + self.solver + .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); + } + ConstraintKind::Tied(_) => { + if self.is_pinned_reg(constraint.regclass, cur_reg) { + // Divert the pinned register; it shouldn't be reused for a tied input. + if self.solver.can_add_var(constraint.regclass, cur_reg) { + self.solver.add_var(arg_val, constraint.regclass, cur_reg); + } + } else if !constraint.regclass.contains(cur_reg) { + self.solver.add_var(arg_val, constraint.regclass, cur_reg); + } + } + ConstraintKind::Reg => { + if !constraint.regclass.contains(cur_reg) { + self.solver.add_var(arg_val, constraint.regclass, cur_reg); + } + } + ConstraintKind::Stack => unreachable!(), + } + } + } + + /// Program the complete set of input constraints into the solver. + /// + /// The `program_input_constraints()` function above will not tell the solver about any values + /// that are already assigned to appropriate registers. This is normally fine, but if we want + /// to add additional variables to help the solver, we need to make sure that they are + /// constrained properly. + /// + /// This function completes the work of `program_input_constraints()` by calling `add_var` for + /// all values used by the instruction. + fn program_complete_input_constraints(&mut self) { + let inst = self.cur.current_inst().expect("Not on an instruction"); + let constraints = self + .encinfo + .operand_constraints(self.cur.func.encodings[inst]) + .expect("Current instruction not encoded") + .ins; + + for (constraint, &arg_val) in constraints.iter().zip(self.cur.func.dfg.inst_args(inst)) { + match constraint.kind { + ConstraintKind::Reg | ConstraintKind::Tied(_) => { + let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); + + // This is the opposite condition of `program_input_constraints()`. The pinned + // register mustn't be added back as a variable. + if constraint.regclass.contains(cur_reg) + && !self.is_pinned_reg(constraint.regclass, cur_reg) + { + // This code runs after calling `solver.inputs_done()` so we must identify + // the new variable as killed or live-through. + let layout = &self.cur.func.layout; + if self.liveness[arg_val].killed_at(inst, layout.pp_block(inst), layout) { + self.solver + .add_killed_var(arg_val, constraint.regclass, cur_reg); + } else { + self.solver + .add_through_var(arg_val, constraint.regclass, cur_reg); + } + } + } + ConstraintKind::FixedReg(_) + | ConstraintKind::FixedTied(_) + | ConstraintKind::Stack => {} + } + } + } + + /// Prepare for a branch to `dest`. + /// + /// 1. Any values that are live-in to `dest` must be un-diverted so they live in their globally + /// assigned register. + /// 2. If the `dest` block takes arguments, reassign the branch argument values to the matching + /// registers. + /// + /// Returns true if this is the first time a branch to `dest` is seen, so the `dest` argument + /// values should be colored after `shuffle_inputs`. + fn program_block_arguments(&mut self, inst: Inst, dest: Block) -> bool { + // Find diverted registers that are live-in to `dest` and reassign them to their global + // home. + // + // Values with a global live range that are not live in to `dest` could appear as branch + // arguments, so they can't always be un-diverted. + self.undivert_regs(|lr, layout| lr.is_livein(dest, layout)); + + // Now handle the block arguments. + let br_args = self.cur.func.dfg.inst_variable_args(inst); + let dest_args = self.cur.func.dfg.block_params(dest); + debug_assert_eq!(br_args.len(), dest_args.len()); + for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { + // The first time we encounter a branch to `dest`, we get to pick the location. The + // following times we see a branch to `dest`, we must follow suit. + match self.cur.func.locations[dest_arg] { + ValueLoc::Unassigned => { + // This is the first branch to `dest`, so we should color `dest_arg` instead of + // `br_arg`. However, we don't know where `br_arg` will end up until + // after `shuffle_inputs`. See `color_block_params` below. + // + // It is possible for `dest_arg` to have no affinity, and then it should simply + // be ignored. + if self.liveness[dest_arg].affinity.is_reg() { + return true; + } + } + ValueLoc::Reg(dest_reg) => { + // We've branched to `dest` before. Make sure we use the correct argument + // registers by reassigning `br_arg`. + if let Affinity::Reg(rci) = self.liveness[br_arg].affinity { + let rc = self.reginfo.rc(rci); + let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); + self.solver.reassign_in(br_arg, rc, br_reg, dest_reg); + } else { + panic!("Branch argument {} is not in a register", br_arg); + } + } + ValueLoc::Stack(ss) => { + // The spiller should already have given us identical stack slots. + debug_assert_eq!(ValueLoc::Stack(ss), self.cur.func.locations[br_arg]); + } + } + } + + // No `dest` arguments need coloring. + false + } + + /// Knowing that we've never seen a branch to `dest` before, color its parameters to match our + /// register state. + /// + /// This function is only called when `program_block_arguments()` returned `true`. + fn color_block_params(&mut self, inst: Inst, dest: Block) { + let br_args = self.cur.func.dfg.inst_variable_args(inst); + let dest_args = self.cur.func.dfg.block_params(dest); + debug_assert_eq!(br_args.len(), dest_args.len()); + for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { + match self.cur.func.locations[dest_arg] { + ValueLoc::Unassigned => { + if self.liveness[dest_arg].affinity.is_reg() { + let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); + self.cur.func.locations[dest_arg] = ValueLoc::Reg(br_reg); + } + } + ValueLoc::Reg(_) => panic!("{} arg {} already colored", dest, dest_arg), + // Spilled value consistency is verified by `program_block_arguments()` above. + ValueLoc::Stack(_) => {} + } + } + } + + /// Find all diverted registers where `pred` returns `true` and undo their diversion so they + /// are reallocated to their global register assignments. + fn undivert_regs(&mut self, mut pred: Pred) + where + Pred: FnMut(&LiveRange, &Layout) -> bool, + { + for (&value, rdiv) in self.divert.iter() { + let lr = self + .liveness + .get(value) + .expect("Missing live range for diverted register"); + if pred(lr, &self.cur.func.layout) { + if let Affinity::Reg(rci) = lr.affinity { + let rc = self.reginfo.rc(rci); + // Stack diversions should not be possible here. They only live transiently + // during `shuffle_inputs()`. + self.solver.reassign_in( + value, + rc, + rdiv.to.unwrap_reg(), + rdiv.from.unwrap_reg(), + ); + } else { + panic!( + "Diverted register {} with {} affinity", + value, + lr.affinity.display(&self.reginfo) + ); + } + } + } + } + + /// Find existing live values that conflict with the fixed input register constraints programmed + /// into the constraint solver. Convert them to solver variables so they can be diverted. + fn divert_fixed_input_conflicts(&mut self, live: &[LiveValue]) { + for lv in live { + if let Affinity::Reg(rci) = lv.affinity { + let toprc = self.reginfo.toprc(rci); + let reg = self.divert.reg(lv.value, &self.cur.func.locations); + if self.solver.is_fixed_input_conflict(toprc, reg) { + debug!( + "adding var to divert fixed input conflict for {}", + toprc.info.display_regunit(reg) + ); + self.solver.add_var(lv.value, toprc, reg); + } + } + } + } + + /// Program any fixed-register output constraints into the solver. This may also detect + /// conflicts between live-through registers and fixed output registers. These live-through + /// values need to be turned into solver variables so they can be reassigned. + fn program_fixed_outputs( + &mut self, + constraints: &[OperandConstraint], + defs: &[LiveValue], + throughs: &[LiveValue], + replace_global_defines: &mut bool, + global_regs: &RegisterSet, + ) { + for (constraint, lv) in constraints.iter().zip(defs) { + match constraint.kind { + ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => { + self.add_fixed_output(lv.value, constraint.regclass, reg, throughs); + if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { + debug!( + "Fixed output {} in {}:{} is not available in global regs", + lv.value, + constraint.regclass, + self.reginfo.display_regunit(reg) + ); + *replace_global_defines = true; + } + } + ConstraintKind::Reg | ConstraintKind::Tied(_) | ConstraintKind::Stack => {} + } + } + } + + /// Program the output-side ABI constraints for `inst` into the constraint solver. + /// + /// That means return values for a call instruction. + fn program_output_abi( + &mut self, + sig: SigRef, + defs: &[LiveValue], + throughs: &[LiveValue], + replace_global_defines: &mut bool, + global_regs: &RegisterSet, + ) { + // It's technically possible for a call instruction to have fixed results before the + // variable list of results, but we have no known instances of that. + // Just assume all results are variable return values. + debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len()); + for (i, lv) in defs.iter().enumerate() { + let abi = self.cur.func.dfg.signatures[sig].returns[i]; + if let ArgumentLoc::Reg(reg) = abi.location { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + self.add_fixed_output(lv.value, rc, reg, throughs); + if !lv.is_local && !global_regs.is_avail(rc, reg) { + debug!( + "ABI output {} in {}:{} is not available in global regs", + lv.value, + rc, + self.reginfo.display_regunit(reg) + ); + *replace_global_defines = true; + } + } else { + panic!("ABI argument {} should be in a register", lv.value); + } + } + } + } + + /// Add a single fixed output value to the solver. + fn add_fixed_output( + &mut self, + value: Value, + rc: RegClass, + reg: RegUnit, + throughs: &[LiveValue], + ) { + // Pinned register is already unavailable in the solver, since it is copied in the + // available registers on entry. + if !self.is_pinned_reg(rc, reg) && !self.solver.add_fixed_output(rc, reg) { + // The fixed output conflicts with some of the live-through registers. + for lv in throughs { + if let Affinity::Reg(rci) = lv.affinity { + let toprc2 = self.reginfo.toprc(rci); + let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); + if regs_overlap(rc, reg, toprc2, reg2) { + // This live-through value is interfering with the fixed output assignment. + // Convert it to a solver variable. + self.solver.add_through_var(lv.value, toprc2, reg2); + } + } + } + + let ok = self.solver.add_fixed_output(rc, reg); + debug_assert!(ok, "Couldn't clear fixed output interference for {}", value); + } + self.cur.func.locations[value] = ValueLoc::Reg(reg); + } + + /// Program the output-side constraints for `inst` into the constraint solver. + /// + /// It is assumed that all fixed outputs have already been handled. + fn program_output_constraints( + &mut self, + inst: Inst, + constraints: &[OperandConstraint], + defs: &[LiveValue], + replace_global_defines: &mut bool, + global_regs: &RegisterSet, + ) { + for (constraint, lv) in constraints.iter().zip(defs) { + match constraint.kind { + ConstraintKind::FixedReg(_) + | ConstraintKind::FixedTied(_) + | ConstraintKind::Stack => continue, + ConstraintKind::Reg => { + self.solver + .add_def(lv.value, constraint.regclass, !lv.is_local); + } + ConstraintKind::Tied(num) => { + // Find the input operand we're tied to. + // The solver doesn't care about the output value. + let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; + let reg = self.divert.reg(arg, &self.cur.func.locations); + + if let Some(reg) = + self.solver + .add_tied_input(arg, constraint.regclass, reg, !lv.is_local) + { + // The value we're tied to has been assigned to a fixed register. + // We need to make sure that fixed output register is compatible with the + // global register set. + if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { + debug!( + "Tied output {} in {}:{} is not available in global regs", + lv.value, + constraint.regclass, + self.reginfo.display_regunit(reg) + ); + *replace_global_defines = true; + } + } + } + } + } + } + + /// Try harder to find a solution to the constraint problem since `quick_solve()` failed. + /// + /// We may need to move more registers around before a solution is possible. Use an iterative + /// algorithm that adds one more variable until a solution can be found. + fn iterate_solution( + &mut self, + throughs: &[LiveValue], + global_regs: &RegisterSet, + replace_global_defines: &mut bool, + is_reload: bool, + ) -> RegisterSet { + // Make sure `try_add_var()` below doesn't create a variable with too loose constraints. + self.program_complete_input_constraints(); + + loop { + match self.solver.real_solve(global_regs, is_reload) { + Ok(regs) => return regs, + Err(SolverError::Divert(rc)) => { + // Do we have any live-through `rc` registers that are not already variables? + let added = self.try_add_var(rc, throughs); + debug_assert!(added, "Ran out of registers in {}", rc); + } + Err(SolverError::Global(_value)) => { + debug!( + "Not enough global registers for {}, trying as local", + _value + ); + // We'll clear the `is_global` flag on all solver variables and instead make a + // note to replace all global defines with local defines followed by a copy. + *replace_global_defines = true; + self.solver.clear_all_global_flags(); + } + }; + } + } + + /// Try to add an `rc` variable to the solver from the `throughs` set. + fn try_add_var(&mut self, rc: RegClass, throughs: &[LiveValue]) -> bool { + debug!("Trying to add a {} reg from {} values", rc, throughs.len()); + + for lv in throughs { + if let Affinity::Reg(rci) = lv.affinity { + // The new variable gets to roam the whole top-level register class because it is + // not actually constrained by the instruction. We just want it out of the way. + let toprc2 = self.reginfo.toprc(rci); + let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); + if rc.contains(reg2) + && self.solver.can_add_var(toprc2, reg2) + && !self.is_live_on_outgoing_edge(lv.value) + { + self.solver.add_through_var(lv.value, toprc2, reg2); + return true; + } + } + } + + false + } + + /// Determine if `value` is live on a CFG edge from the current instruction. + /// + /// This means that the current instruction is a branch and `value` is live in to one of the + /// branch destinations. Branch arguments and block parameters are not considered live on the + /// edge. + fn is_live_on_outgoing_edge(&self, value: Value) -> bool { + use crate::ir::instructions::BranchInfo::*; + + let inst = self.cur.current_inst().expect("Not on an instruction"); + let layout = &self.cur.func.layout; + match self.cur.func.dfg.analyze_branch(inst) { + NotABranch => false, + SingleDest(block, _) => { + let lr = &self.liveness[value]; + lr.is_livein(block, layout) + } + Table(jt, block) => { + let lr = &self.liveness[value]; + !lr.is_local() + && (block.map_or(false, |block| lr.is_livein(block, layout)) + || self.cur.func.jump_tables[jt] + .iter() + .any(|block| lr.is_livein(*block, layout))) + } + } + } + + /// Emit `regmove` instructions as needed to move the live registers into place before the + /// instruction. Also update `self.divert` accordingly. + /// + /// The `self.cur` cursor is expected to point at the instruction. The register moves are + /// inserted before. + /// + /// The solver needs to be reminded of the available registers before any moves are inserted. + fn shuffle_inputs(&mut self, regs: &mut RegisterSet) { + use crate::regalloc::solver::Move::*; + + let spills = self.solver.schedule_moves(regs); + + // The move operations returned by `schedule_moves` refer to emergency spill slots by + // consecutive indexes starting from 0. Map these to real stack slots. + // It is very unlikely (impossible?) that we would need more than one spill per top-level + // register class, so avoid allocation by using a fixed array here. + let mut slot = [PackedOption::default(); 8]; + debug_assert!(spills <= slot.len(), "Too many spills ({})", spills); + + for m in self.solver.moves() { + match *m { + Reg { + value, + from, + to, + rc, + } => { + debug_assert!( + !self.is_pinned_reg(rc, to), + "pinned register used in a regmove" + ); + self.divert.regmove(value, from, to); + self.cur.ins().regmove(value, from, to); + } + Spill { + value, + from, + to_slot, + .. + } => { + debug_assert_eq!(slot[to_slot].expand(), None, "Overwriting slot in use"); + let ss = self + .cur + .func + .stack_slots + .get_emergency_slot(self.cur.func.dfg.value_type(value), &slot[0..spills]); + slot[to_slot] = ss.into(); + self.divert.regspill(value, from, ss); + self.cur.ins().regspill(value, from, ss); + } + Fill { + value, + from_slot, + to, + rc, + } => { + debug_assert!( + !self.is_pinned_reg(rc, to), + "pinned register used in a regfill" + ); + // These slots are single use, so mark `ss` as available again. + let ss = slot[from_slot].take().expect("Using unallocated slot"); + self.divert.regfill(value, ss, to); + self.cur.ins().regfill(value, ss, to); + } + } + } + } + + /// Forget about any register diversions in `kills`. + fn forget_diverted(&mut self, kills: &[LiveValue]) { + if self.divert.is_empty() { + return; + } + + for lv in kills { + if lv.affinity.is_reg() { + self.divert.remove(lv.value); + } + } + } + + /// Replace all global values defined by `inst` with local values that are then copied into the + /// global value: + /// + /// v1 = foo + /// + /// becomes: + /// + /// v20 = foo + /// v1 = copy v20 + /// + /// This is sometimes necessary when there are no global registers available that can satisfy + /// the constraints on the instruction operands. + /// + fn replace_global_defines(&mut self, inst: Inst, tracker: &mut LiveValueTracker) { + debug!("Replacing global defs on {}", self.cur.display_inst(inst)); + + // We'll insert copies *after `inst`. Our caller will move the cursor back. + self.cur.next_inst(); + + // The tracker keeps the defs from `inst` at the end. Any dead defs have already been + // removed, so it's not obvious how many defs to process + for lv in tracker.live_mut().iter_mut().rev() { + // Keep going until we reach a value that is not defined by `inst`. + if match self.cur.func.dfg.value_def(lv.value) { + ValueDef::Result(i, _) => i != inst, + _ => true, + } { + break; + } + if lv.is_local || !lv.affinity.is_reg() { + continue; + } + + // Now `lv.value` is globally live and defined by `inst`. Replace it with a local live + // range that is copied after `inst`. + let ty = self.cur.func.dfg.value_type(lv.value); + let local = self.cur.func.dfg.replace_result(lv.value, ty); + self.cur.ins().with_result(lv.value).copy(local); + let copy = self.cur.built_inst(); + + // Create a live range for `local: inst -> copy`. + self.liveness.create_dead(local, inst, lv.affinity); + self.liveness.extend_locally( + local, + self.cur.func.layout.pp_block(inst), + copy, + &self.cur.func.layout, + ); + + // Move the definition of the global `lv.value`. + self.liveness.move_def_locally(lv.value, copy); + + // Transfer the register coloring to `local`. + let loc = mem::replace(&mut self.cur.func.locations[lv.value], ValueLoc::default()); + self.cur.func.locations[local] = loc; + + // Update `lv` to reflect the new `local` live range. + lv.value = local; + lv.endpoint = copy; + lv.is_local = true; + + debug!( + " + {} with {} in {}", + self.cur.display_inst(copy), + local, + loc.display(&self.reginfo) + ); + } + debug!("Done: {}", self.cur.display_inst(inst)); + } + + /// Process kills on a ghost instruction. + /// - Forget diversions. + /// - Free killed registers. + fn process_ghost_kills(&mut self, kills: &[LiveValue], regs: &mut AvailableRegs) { + for lv in kills { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + let loc = match self.divert.remove(lv.value) { + Some(loc) => loc, + None => self.cur.func.locations[lv.value], + }; + regs.input.free(rc, loc.unwrap_reg()); + if !lv.is_local { + regs.global + .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); + } + } + } + } +} + +/// Keep track of the set of available registers in two interference domains: all registers +/// considering diversions and global registers not considering diversions. +struct AvailableRegs { + /// The exact set of registers available on the input side of the current instruction. This + /// takes into account register diversions, and it includes both local and global live ranges. + input: RegisterSet, + + /// Registers available for allocating globally live values. This set ignores any local values, + /// and it does not account for register diversions. + /// + /// Global values must be allocated out of this set because conflicts with other global values + /// can't be resolved with local diversions. + global: RegisterSet, +} + +impl AvailableRegs { + /// Initialize both the input and global sets from `regs`. + pub fn new(regs: &RegisterSet) -> Self { + Self { + input: regs.clone(), + global: regs.clone(), + } + } + + /// Take an un-diverted register from one or both sets. + pub fn take(&mut self, rc: RegClass, reg: RegUnit, is_local: bool) { + self.input.take(rc, reg); + if !is_local { + self.global.take(rc, reg); + } + } + + /// Take a diverted register from both sets for a non-local allocation. + pub fn take_divert(&mut self, rc: RegClass, reg: RegUnit, reg_divert: RegUnit) { + self.input.take(rc, reg_divert); + self.global.take(rc, reg); + } +} diff --git a/cranelift/codegen/src/regalloc/context.rs b/cranelift/codegen/src/regalloc/context.rs new file mode 100644 index 0000000000..dfbec985eb --- /dev/null +++ b/cranelift/codegen/src/regalloc/context.rs @@ -0,0 +1,247 @@ +//! Register allocator context. +//! +//! The `Context` struct contains data structures that should be preserved across invocations of +//! the register allocator algorithm. This doesn't preserve any data between functions, but it +//! avoids allocating data structures independently for each function begin compiled. + +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::Function; +use crate::isa::TargetIsa; +use crate::regalloc::branch_splitting; +use crate::regalloc::coalescing::Coalescing; +use crate::regalloc::coloring::Coloring; +use crate::regalloc::live_value_tracker::LiveValueTracker; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::reload::Reload; +use crate::regalloc::safepoint::emit_stackmaps; +use crate::regalloc::spilling::Spilling; +use crate::regalloc::virtregs::VirtRegs; +use crate::result::CodegenResult; +use crate::timing; +use crate::topo_order::TopoOrder; +use crate::verifier::{ + verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors, +}; + +/// Persistent memory allocations for register allocation. +pub struct Context { + liveness: Liveness, + virtregs: VirtRegs, + coalescing: Coalescing, + topo: TopoOrder, + tracker: LiveValueTracker, + spilling: Spilling, + reload: Reload, + coloring: Coloring, +} + +impl Context { + /// Create a new context for register allocation. + /// + /// This context should be reused for multiple functions in order to avoid repeated memory + /// allocations. + pub fn new() -> Self { + Self { + liveness: Liveness::new(), + virtregs: VirtRegs::new(), + coalescing: Coalescing::new(), + topo: TopoOrder::new(), + tracker: LiveValueTracker::new(), + spilling: Spilling::new(), + reload: Reload::new(), + coloring: Coloring::new(), + } + } + + /// Clear all data structures in this context. + pub fn clear(&mut self) { + self.liveness.clear(); + self.virtregs.clear(); + self.coalescing.clear(); + self.topo.clear(); + self.tracker.clear(); + self.spilling.clear(); + self.reload.clear(); + self.coloring.clear(); + } + + /// Current values liveness state. + pub fn liveness(&self) -> &Liveness { + &self.liveness + } + + /// Allocate registers in `func`. + /// + /// After register allocation, all values in `func` have been assigned to a register or stack + /// location that is consistent with instruction encoding constraints. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + ) -> CodegenResult<()> { + let _tt = timing::regalloc(); + debug_assert!(domtree.is_valid()); + + let mut errors = VerifierErrors::default(); + + // `Liveness` and `Coloring` are self-clearing. + self.virtregs.clear(); + + // Tracker state (dominator live sets) is actually reused between the spilling and coloring + // phases. + self.tracker.clear(); + + // Pass: Split branches, add space where to add copy & regmove instructions. + branch_splitting::run(isa, func, cfg, domtree, &mut self.topo); + + // Pass: Liveness analysis. + self.liveness.compute(isa, func, cfg); + + if isa.flags().enable_verifier() { + let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Coalesce and create Conventional SSA form. + self.coalescing.conventional_ssa( + isa, + func, + cfg, + domtree, + &mut self.liveness, + &mut self.virtregs, + ); + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Spilling. + self.spilling.run( + isa, + func, + domtree, + &mut self.liveness, + &self.virtregs, + &mut self.topo, + &mut self.tracker, + ); + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Reload. + self.reload.run( + isa, + func, + domtree, + &mut self.liveness, + &mut self.topo, + &mut self.tracker, + ); + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Pass: Coloring. + self.coloring.run( + isa, + func, + cfg, + domtree, + &mut self.liveness, + &mut self.tracker, + ); + + // This function runs after register allocation has taken + // place, meaning values have locations assigned already. + if isa.flags().enable_safepoints() { + emit_stackmaps(func, domtree, &self.liveness, &mut self.tracker, isa); + } else { + // Make sure no references are used. + for val in func.dfg.values() { + let ty = func.dfg.value_type(val); + if ty.lane_type().is_ref() { + panic!("reference types were found but safepoints were not enabled."); + } + } + } + + if isa.flags().enable_verifier() { + let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() + && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() + && verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok() + && verify_cssa( + func, + cfg, + domtree, + &self.liveness, + &self.virtregs, + &mut errors, + ) + .is_ok(); + + if !ok { + return Err(errors.into()); + } + } + + // Even if we arrive here, (non-fatal) errors might have been reported, so we + // must make sure absolutely nothing is wrong + if errors.is_empty() { + Ok(()) + } else { + Err(errors.into()) + } + } +} diff --git a/cranelift/codegen/src/regalloc/diversion.rs b/cranelift/codegen/src/regalloc/diversion.rs new file mode 100644 index 0000000000..e3bacbae72 --- /dev/null +++ b/cranelift/codegen/src/regalloc/diversion.rs @@ -0,0 +1,315 @@ +//! Register diversions. +//! +//! Normally, a value is assigned to a single register or stack location by the register allocator. +//! Sometimes, it is necessary to move register values to a different register in order to satisfy +//! instruction constraints. +//! +//! These register diversions are local to an block. No values can be diverted when entering a new +//! block. + +use crate::fx::FxHashMap; +use crate::hash_map::{Entry, Iter}; +use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations}; +use crate::ir::{InstructionData, Opcode}; +use crate::isa::{RegInfo, RegUnit}; +use core::fmt; +use cranelift_entity::{SparseMap, SparseMapValue}; + +/// A diversion of a value from its original location to a new register or stack location. +/// +/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the +/// same value. +/// +/// When tracking diversions, the `from` field is the original assigned value location, and `to` is +/// the current one. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Diversion { + /// The original value location. + pub from: ValueLoc, + /// The current value location. + pub to: ValueLoc, +} + +impl Diversion { + /// Make a new diversion. + pub fn new(from: ValueLoc, to: ValueLoc) -> Self { + debug_assert!(from.is_assigned() && to.is_assigned()); + Self { from, to } + } +} + +/// Keep track of diversions in an block. +#[derive(Clone)] +pub struct RegDiversions { + current: FxHashMap, +} + +/// Keep track of diversions at the entry of block. +#[derive(Clone)] +struct EntryRegDiversionsValue { + key: Block, + divert: RegDiversions, +} + +/// Map block to their matching RegDiversions at basic blocks entry. +pub struct EntryRegDiversions { + map: SparseMap, +} + +impl RegDiversions { + /// Create a new empty diversion tracker. + pub fn new() -> Self { + Self { + current: FxHashMap::default(), + } + } + + /// Clear the content of the diversions, to reset the state of the compiler. + pub fn clear(&mut self) { + self.current.clear() + } + + /// Are there any diversions? + pub fn is_empty(&self) -> bool { + self.current.is_empty() + } + + /// Get the current diversion of `value`, if any. + pub fn diversion(&self, value: Value) -> Option<&Diversion> { + self.current.get(&value) + } + + /// Get all current diversions. + pub fn iter(&self) -> Iter<'_, Value, Diversion> { + self.current.iter() + } + + /// Get the current location for `value`. Fall back to the assignment map for non-diverted + /// values + pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc { + match self.diversion(value) { + Some(d) => d.to, + None => locations[value], + } + } + + /// Get the current register location for `value`, or panic if `value` isn't in a register. + pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit { + self.get(value, locations).unwrap_reg() + } + + /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot. + pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot { + self.get(value, locations).unwrap_stack() + } + + /// Record any kind of move. + /// + /// The `from` location must match an existing `to` location, if any. + fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) { + debug_assert!(from.is_assigned() && to.is_assigned()); + match self.current.entry(value) { + Entry::Occupied(mut e) => { + // TODO: non-lexical lifetimes should allow removal of the scope and early return. + { + let d = e.get_mut(); + debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value); + if d.from != to { + d.to = to; + return; + } + } + e.remove(); + } + Entry::Vacant(e) => { + e.insert(Diversion::new(from, to)); + } + } + } + + /// Record a register -> register move. + pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) { + self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to)); + } + + /// Record a register -> stack move. + pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) { + self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to)); + } + + /// Record a stack -> register move. + pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) { + self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to)); + } + + /// Apply the effect of `inst`. + /// + /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to + /// match. + pub fn apply(&mut self, inst: &InstructionData) { + match *inst { + InstructionData::RegMove { + opcode: Opcode::Regmove, + arg, + src, + dst, + } => self.regmove(arg, src, dst), + InstructionData::RegSpill { + opcode: Opcode::Regspill, + arg, + src, + dst, + } => self.regspill(arg, src, dst), + InstructionData::RegFill { + opcode: Opcode::Regfill, + arg, + src, + dst, + } => self.regfill(arg, src, dst), + _ => {} + } + } + + /// Drop any recorded move for `value`. + /// + /// Returns the `to` location of the removed diversion. + pub fn remove(&mut self, value: Value) -> Option { + self.current.remove(&value).map(|d| d.to) + } + + /// Resets the state of the current diversions to the recorded diversions at the entry of the + /// given `block`. The recoded diversions is available after coloring on `func.entry_diversions` + /// field. + pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) { + self.clear(); + if let Some(entry_divert) = entry_diversions.map.get(block) { + let iter = entry_divert.divert.current.iter(); + self.current.extend(iter); + } + } + + /// Copy the current state of the diversions, and save it for the entry of the `block` given as + /// argument. + /// + /// Note: This function can only be called once on a `Block` with a given `entry_diversions` + /// argument, otherwise it would panic. + pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) { + // No need to save anything if there is no diversions to be recorded. + if self.is_empty() { + return; + } + debug_assert!(!entry_diversions.map.contains_key(target)); + let iter = self.current.iter(); + let mut entry_divert = Self::new(); + entry_divert.current.extend(iter); + entry_diversions.map.insert(EntryRegDiversionsValue { + key: target, + divert: entry_divert, + }); + } + + /// Check that the recorded entry for a given `block` matches what is recorded in the + /// `entry_diversions`. + pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool { + let entry_divert = match entry_diversions.map.get(target) { + Some(entry_divert) => entry_divert, + None => return self.is_empty(), + }; + + if entry_divert.divert.current.len() != self.current.len() { + return false; + } + + for (val, _) in entry_divert.divert.current.iter() { + if !self.current.contains_key(val) { + return false; + } + } + true + } + + /// Return an object that can display the diversions. + pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplayDiversions<'a> { + DisplayDiversions(&self, regs.into()) + } +} + +impl EntryRegDiversions { + /// Create a new empty entry diversion, to associate diversions to each block entry. + pub fn new() -> Self { + Self { + map: SparseMap::new(), + } + } + + pub fn clear(&mut self) { + self.map.clear(); + } +} + +impl Clone for EntryRegDiversions { + /// The Clone trait is required by `ir::Function`. + fn clone(&self) -> Self { + let mut tmp = Self::new(); + for v in self.map.values() { + tmp.map.insert(v.clone()); + } + tmp + } +} + +/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry +/// diversions for each block. +impl SparseMapValue for EntryRegDiversionsValue { + fn key(&self) -> Block { + self.key + } +} + +/// Object that displays register diversions. +pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayDiversions<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{{")?; + for (value, div) in self.0.current.iter() { + write!( + f, + " {}: {} -> {}", + value, + div.from.display(self.1), + div.to.display(self.1) + )? + } + write!(f, " }}") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + use crate::ir::Value; + + #[test] + fn inserts() { + let mut divs = RegDiversions::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + + divs.regmove(v1, 10, 12); + assert_eq!( + divs.diversion(v1), + Some(&Diversion { + from: ValueLoc::Reg(10), + to: ValueLoc::Reg(12), + }) + ); + assert_eq!(divs.diversion(v2), None); + + divs.regmove(v1, 12, 11); + assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11)); + divs.regmove(v1, 11, 10); + assert_eq!(divs.diversion(v1), None); + } +} diff --git a/cranelift/codegen/src/regalloc/live_value_tracker.rs b/cranelift/codegen/src/regalloc/live_value_tracker.rs new file mode 100644 index 0000000000..f106f4b39d --- /dev/null +++ b/cranelift/codegen/src/regalloc/live_value_tracker.rs @@ -0,0 +1,344 @@ +//! Track which values are live in an block with instruction granularity. +//! +//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an block. +//! The sets of live values are computed on the fly as the tracker is moved from instruction to +//! instruction, starting at the block header. + +use crate::dominator_tree::DominatorTree; +use crate::entity::{EntityList, ListPool}; +use crate::fx::FxHashMap; +use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value}; +use crate::partition_slice::partition_slice; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::liverange::LiveRange; +use alloc::vec::Vec; + +type ValueList = EntityList; + +/// Compute and track live values throughout an block. +pub struct LiveValueTracker { + /// The set of values that are live at the current program point. + live: LiveValueVec, + + /// Saved set of live values for every jump and branch that can potentially be an immediate + /// dominator of an block. + /// + /// This is the set of values that are live *before* the branch. + idom_sets: FxHashMap, + + /// Memory pool for the live sets. + idom_pool: ListPool, +} + +/// Information about a value that is live at the current program point. +#[derive(Debug)] +pub struct LiveValue { + /// The live value. + pub value: Value, + + /// The local ending point of the live range in the current block, as returned by + /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`. + pub endpoint: Inst, + + /// The affinity of the value as represented in its `LiveRange`. + /// + /// This value is simply a copy of the affinity stored in the live range. We copy it because + /// almost all users of `LiveValue` need to look at it. + pub affinity: Affinity, + + /// The live range for this value never leaves its block. + pub is_local: bool, + + /// This value is dead - the live range ends immediately. + pub is_dead: bool, +} + +struct LiveValueVec { + /// The set of values that are live at the current program point. + values: Vec, + + /// How many values at the front of `values` are known to be live after `inst`? + /// + /// This is used to pass a much smaller slice to `partition_slice` when its called a second + /// time for the same instruction. + live_prefix: Option<(Inst, usize)>, +} + +impl LiveValueVec { + fn new() -> Self { + Self { + values: Vec::new(), + live_prefix: None, + } + } + + /// Add a new live value to `values`. Copy some properties from `lr`. + fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) { + self.values.push(LiveValue { + value, + endpoint, + affinity: lr.affinity, + is_local: lr.is_local(), + is_dead: lr.is_dead(), + }); + } + + /// Remove all elements. + fn clear(&mut self) { + self.values.clear(); + self.live_prefix = None; + } + + /// Make sure that the values killed by `next_inst` are moved to the end of the `values` + /// vector. + /// + /// Returns the number of values that will be live after `next_inst`. + fn live_after(&mut self, next_inst: Inst) -> usize { + // How many values at the front of the vector are already known to survive `next_inst`? + // We don't need to pass this prefix to `partition_slice()` + let keep = match self.live_prefix { + Some((i, prefix)) if i == next_inst => prefix, + _ => 0, + }; + + // Move the remaining surviving values to the front partition of the vector. + let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst); + + // Remember the new prefix length in case we get called again for the same `next_inst`. + self.live_prefix = Some((next_inst, prefix)); + prefix + } + + /// Remove the values killed by `next_inst`. + fn remove_kill_values(&mut self, next_inst: Inst) { + let keep = self.live_after(next_inst); + self.values.truncate(keep); + } + + /// Remove any dead values. + fn remove_dead_values(&mut self) { + self.values.retain(|v| !v.is_dead); + self.live_prefix = None; + } +} + +impl LiveValueTracker { + /// Create a new blank tracker. + pub fn new() -> Self { + Self { + live: LiveValueVec::new(), + idom_sets: FxHashMap(), + idom_pool: ListPool::new(), + } + } + + /// Clear all cached information. + pub fn clear(&mut self) { + self.live.clear(); + self.idom_sets.clear(); + self.idom_pool.clear(); + } + + /// Get the set of currently live values. + /// + /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and + /// defined by the current instruction. + pub fn live(&self) -> &[LiveValue] { + &self.live.values + } + + /// Get a mutable set of currently live values. + /// + /// Use with care and don't move entries around. + pub fn live_mut(&mut self) -> &mut [LiveValue] { + &mut self.live.values + } + + /// Move the current position to the top of `block`. + /// + /// This depends on the stored live value set at `block`'s immediate dominator, so that must have + /// been visited first. + /// + /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values + /// from the immediate dominator. The second slice is the set of `block` parameters. + /// + /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them. + pub fn block_top( + &mut self, + block: Block, + dfg: &DataFlowGraph, + liveness: &Liveness, + layout: &Layout, + domtree: &DominatorTree, + ) -> (&[LiveValue], &[LiveValue]) { + // Start over, compute the set of live values at the top of the block from two sources: + // + // 1. Values that were live before `block`'s immediate dominator, filtered for those that are + // actually live-in. + // 2. Arguments to `block` that are not dead. + // + self.live.clear(); + + // Compute the live-in values. Start by filtering the set of values that were live before + // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e., + // the entry block or an unreachable block). + if let Some(idom) = domtree.idom(block) { + // If the immediate dominator exits, we must have a stored list for it. This is a + // requirement to the order blocks are visited: All dominators must have been processed + // before the current block. + let idom_live_list = self + .idom_sets + .get(&idom) + .expect("No stored live set for dominator"); + // Get just the values that are live-in to `block`. + for &value in idom_live_list.as_slice(&self.idom_pool) { + let lr = liveness + .get(value) + .expect("Immediate dominator value has no live range"); + + // Check if this value is live-in here. + if let Some(endpoint) = lr.livein_local_end(block, layout) { + self.live.push(value, endpoint, lr); + } + } + } + + // Now add all the live parameters to `block`. + let first_arg = self.live.values.len(); + for &value in dfg.block_params(block) { + let lr = &liveness[value]; + debug_assert_eq!(lr.def(), block.into()); + match lr.def_local_end().into() { + ExpandedProgramPoint::Inst(endpoint) => { + self.live.push(value, endpoint, lr); + } + ExpandedProgramPoint::Block(local_block) => { + // This is a dead block parameter which is not even live into the first + // instruction in the block. + debug_assert_eq!( + local_block, block, + "block parameter live range ends at wrong block header" + ); + // Give this value a fake endpoint that is the first instruction in the block. + // We expect it to be removed by calling `drop_dead_args()`. + self.live + .push(value, layout.first_inst(block).expect("Empty block"), lr); + } + } + } + + self.live.values.split_at(first_arg) + } + + /// Prepare to move past `inst`. + /// + /// Determine the set of already live values that are killed by `inst`, and add the new defined + /// values to the tracked set. + /// + /// Returns `(throughs, kills, defs)` as a tuple of slices: + /// + /// 1. The `throughs` slice is the set of live-through values that are neither defined nor + /// killed by the instruction. + /// 2. The `kills` slice is the set of values that were live before the instruction and are + /// killed at the instruction. This does not include dead defs. + /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes + /// dead defines. + /// + /// The order of `throughs` and `kills` is arbitrary. + /// + /// The `drop_dead()` method must be called next to actually remove the dead values from the + /// tracked set after the two returned slices are no longer needed. + pub fn process_inst( + &mut self, + inst: Inst, + dfg: &DataFlowGraph, + liveness: &Liveness, + ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) { + // Save a copy of the live values before any branches or jumps that could be somebody's + // immediate dominator. + if dfg[inst].opcode().is_branch() { + self.save_idom_live_set(inst); + } + + // Move killed values to the end of the vector. + // Don't remove them yet, `drop_dead()` will do that. + let first_kill = self.live.live_after(inst); + + // Add the values defined by `inst`. + let first_def = self.live.values.len(); + for &value in dfg.inst_results(inst) { + let lr = &liveness[value]; + debug_assert_eq!(lr.def(), inst.into()); + match lr.def_local_end().into() { + ExpandedProgramPoint::Inst(endpoint) => { + self.live.push(value, endpoint, lr); + } + ExpandedProgramPoint::Block(block) => { + panic!("Instruction result live range can't end at {}", block); + } + } + } + + ( + &self.live.values[0..first_kill], + &self.live.values[first_kill..first_def], + &self.live.values[first_def..], + ) + } + + /// Prepare to move past a ghost instruction. + /// + /// This is like `process_inst`, except any defs are ignored. + /// + /// Returns `(throughs, kills)`. + pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) { + let first_kill = self.live.live_after(inst); + self.live.values.as_slice().split_at(first_kill) + } + + /// Drop the values that are now dead after moving past `inst`. + /// + /// This removes both live values that were killed by `inst` and dead defines on `inst` itself. + /// + /// This must be called after `process_inst(inst)` and before proceeding to the next + /// instruction. + pub fn drop_dead(&mut self, inst: Inst) { + // Remove both live values that were killed by `inst` and dead defines from `inst`. + self.live.remove_kill_values(inst); + } + + /// Drop any values that are marked as `is_dead`. + /// + /// Use this after calling `block_top` to clean out dead block parameters. + pub fn drop_dead_params(&mut self) { + self.live.remove_dead_values(); + } + + /// Process new spills. + /// + /// Any values where `f` returns true are spilled and will be treated as if their affinity was + /// `Stack`. + pub fn process_spills(&mut self, mut f: F) + where + F: FnMut(Value) -> bool, + { + for lv in &mut self.live.values { + if f(lv.value) { + lv.affinity = Affinity::Stack; + } + } + } + + /// Save the current set of live values so it is associated with `idom`. + fn save_idom_live_set(&mut self, idom: Inst) { + let values = self.live.values.iter().map(|lv| lv.value); + let pool = &mut self.idom_pool; + // If there already is a set saved for `idom`, just keep it. + self.idom_sets.entry(idom).or_insert_with(|| { + let mut list = ValueList::default(); + list.extend(values, pool); + list + }); + } +} diff --git a/cranelift/codegen/src/regalloc/liveness.rs b/cranelift/codegen/src/regalloc/liveness.rs new file mode 100644 index 0000000000..88c106cce4 --- /dev/null +++ b/cranelift/codegen/src/regalloc/liveness.rs @@ -0,0 +1,443 @@ +//! Liveness analysis for SSA values. +//! +//! This module computes the live range of all the SSA values in a function and produces a +//! `LiveRange` instance for each. +//! +//! +//! # Liveness consumers +//! +//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each +//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the +//! currently live values as it is iterating down the instructions in the block. It asks the +//! following questions: +//! +//! - What is the set of live values at the entry to the block? +//! - When moving past a use of a value, is that value still alive in the block, or was that the last +//! use? +//! - When moving past a branch, which of the live values are still live below the branch? +//! +//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and +//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the +//! dominator tree, so it can compute the set of live values at the beginning of an block by starting +//! from the set of live values at the dominating branch instruction and filtering it with +//! `livein_local_end`. These sets do not need to be stored in the liveness analysis. +//! +//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the +//! number of live values at every program point and insert spill code until the number of +//! registers needed is small enough. +//! +//! +//! # Alternative algorithms +//! +//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few +//! alternatives. +//! +//! ## Data-flow equations +//! +//! The classic *live variables analysis* that you will find in all compiler books from the +//! previous century does not depend on SSA form. It is typically implemented by iteratively +//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of +//! variables for every basic block in the program. +//! +//! This algorithm has some disadvantages that makes us look elsewhere: +//! +//! - Quadratic memory use. We need a bit per variable per basic block in the function. +//! - Dense representation of sparse data. In practice, the majority of SSA values never leave +//! their basic block, and those that do spa basic blocks rarely span a large number of basic +//! blocks. This makes the data stored in the bitvectors quite sparse. +//! - Traditionally, the data-flow equations were solved for real program *variables* which does +//! not include temporaries used in evaluating expressions. We have an SSA form program which +//! blurs the distinction between temporaries and variables. This makes the quadratic memory +//! problem worse because there are many more SSA values than there was variables in the original +//! program, and we don't know a priori which SSA values leave their basic block. +//! - Missing last-use information. For values that are not live-out of a basic block, we would +//! need to store information about the last use in the block somewhere. LLVM stores this +//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a +//! source of problems for LLVM's register allocator. +//! +//! Data-flow equations can detect when a variable is used uninitialized, and they can handle +//! multiple definitions of the same variable. We don't need this generality since we already have +//! a program in SSA form. +//! +//! ## LLVM's liveness analysis +//! +//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is +//! a disjoint union of related SSA values that should be assigned to the same physical register. +//! It uses a compact data structure very similar to our `LiveRange`. The important difference is +//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval` +//! describes the live range of a virtual register *and* which one of the related SSA values is +//! live at any given program point. +//! +//! LLVM computes the live range of each virtual register independently by using the use-def chains +//! that are baked into its IR. The algorithm for a single virtual register is: +//! +//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using +//! the def-chain. This does not include any phi-values. +//! 2. Go through the virtual register's use chain and perform the following steps at each use: +//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks +//! that already contain some liveness and extend the last live SSA value in the block to be +//! live-out. Also build a list of new basic blocks where the register needs to be live-in. +//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new +//! PHI values to be created when different SSA values can reach the same block. +//! +//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered +//! one SSA value. +//! +//! This algorithm has some advantages compared to the data-flow equations: +//! +//! - The live ranges of local virtual registers are computed very quickly without ever traversing +//! the CFG. The memory needed to store these live ranges is independent of the number of basic +//! blocks in the program. +//! - The time to compute the live range of a global virtual register is proportional to the number +//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large +//! functions. +//! - A single live range can be recomputed after making modifications to the IR. No global +//! algorithm is necessary. This feature depends on having use-def chains for virtual registers +//! which Cranelift doesn't. +//! +//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important +//! difference that live ranges are computed per SSA value instead of per virtual register, and the +//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that +//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses. +//! +//! ## Fast Liveness Checking for SSA-Form Programs +//! +//! A liveness analysis that is often brought up in the context of SSA-based register allocation +//! was presented at CGO 2008: +//! +//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness +//! Checking for SSA-Form Programs.* CGO. +//! +//! This analysis uses a global pre-computation that only depends on the CFG of the function. It +//! then allows liveness queries for any (value, program point) pair. Each query traverses the use +//! chain of the value and performs lookups in the precomputed bit-vectors. +//! +//! I did not seriously consider this analysis for Cranelift because: +//! +//! - It depends critically on use chains which Cranelift doesn't have. +//! - Popular variables like the `this` pointer in a C++ method can have very large use chains. +//! Traversing such a long use chain on every liveness lookup has the potential for some nasty +//! quadratic behavior in unfortunate cases. +//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow +//! based approach, which isn't that impressive. +//! +//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift +//! gains use chains, this approach would be worth a proper evaluation. +//! +//! +//! # Cranelift's liveness analysis +//! +//! The algorithm implemented in this module is similar to LLVM's with these differences: +//! +//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual +//! register. +//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers. +//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use +//! chains, so it is not possible to compute the live range for a single SSA value independently. +//! +//! The liveness computation visits all instructions in the program. The order is not important for +//! the algorithm to be correct. At each instruction, the used values are examined. +//! +//! - The first time a value is encountered, its live range is constructed as a dead live range +//! containing only the defining program point. +//! - The local interval of the value's live range is extended so it reaches the use. This may +//! require creating a new live-in local interval for the block. +//! - If the live range became live-in to the block, add the block to a work-list. +//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each +//! of the live-in block's CFG predecessor instructions as a 'use'. +//! +//! The effect of this algorithm is to extend the live range of each to reach uses as they are +//! visited. No data about each value beyond the live range is needed between visiting uses, so +//! nothing is lost by computing the live range of all values simultaneously. +//! +//! ## Cache efficiency of Cranelift vs LLVM +//! +//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the +//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use +//! chain order, some cache thrashing can occur as a result of pulling instructions into cache +//! somewhat chaotically. +//! +//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each +//! instruction is brought into cache only once, and it is likely that the other instructions on +//! the same cache line will be visited before the line is evicted. +//! +//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always +//! regularly. We should strive to make the `LiveRange` struct as small as possible such that +//! multiple related values can live on the same cache line. +//! +//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current +//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the +//! size to 32 bytes. +//! - Related values should be stored on the same cache line. The current sparse set implementation +//! does a decent job of that. +//! - For global values, the list of live-in intervals is very likely to fit on a single cache +//! line. These lists are very likely to be found in L2 cache at least. +//! +//! There is some room for improvement. + +use crate::entity::SparseMap; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::dfg::ValueDef; +use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value}; +use crate::isa::{EncInfo, OperandConstraint, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::liverange::LiveRange; +use crate::timing; +use alloc::vec::Vec; +use core::mem; +use core::ops::Index; + +/// A set of live ranges, indexed by value number. +type LiveRangeSet = SparseMap; + +/// Get a mutable reference to the live range for `value`. +/// Create it if necessary. +fn get_or_create<'a>( + lrset: &'a mut LiveRangeSet, + value: Value, + isa: &dyn TargetIsa, + func: &Function, + encinfo: &EncInfo, +) -> &'a mut LiveRange { + // It would be better to use `get_mut()` here, but that leads to borrow checker fighting + // which can probably only be resolved by non-lexical lifetimes. + // https://github.com/rust-lang/rfcs/issues/811 + if lrset.get(value).is_none() { + // Create a live range for value. We need the program point that defines it. + let def; + let affinity; + match func.dfg.value_def(value) { + ValueDef::Result(inst, rnum) => { + def = inst.into(); + // Initialize the affinity from the defining instruction's result constraints. + // Don't do this for call return values which are always tied to a single register. + affinity = encinfo + .operand_constraints(func.encodings[inst]) + .and_then(|rc| rc.outs.get(rnum)) + .map(Affinity::new) + .or_else(|| { + // If this is a call, get the return value affinity. + func.dfg + .call_signature(inst) + .map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)) + }) + .unwrap_or_default(); + } + ValueDef::Param(block, num) => { + def = block.into(); + if func.layout.entry_block() == Some(block) { + // The affinity for entry block parameters can be inferred from the function + // signature. + affinity = Affinity::abi(&func.signature.params[num], isa); + } else { + // Give normal block parameters a register affinity matching their type. + let rc = isa.regclass_for_abi_type(func.dfg.value_type(value)); + affinity = Affinity::Reg(rc.into()); + } + } + }; + lrset.insert(LiveRange::new(value, def, affinity)); + } + lrset.get_mut(value).unwrap() +} + +/// Extend the live range for `value` so it reaches `to` which must live in `block`. +fn extend_to_use( + lr: &mut LiveRange, + block: Block, + to: Inst, + worklist: &mut Vec, + func: &Function, + cfg: &ControlFlowGraph, +) { + // This is our scratch working space, and we'll leave it empty when we return. + debug_assert!(worklist.is_empty()); + + // Extend the range locally in `block`. + // If there already was a live interval in that block, we're done. + if lr.extend_in_block(block, to, &func.layout) { + worklist.push(block); + } + + // The work list contains those blocks where we have learned that the value needs to be + // live-in. + // + // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the + // CFG from the existing live range to `block`. + // + // Extend the live range as we go. The live range itself also serves as a visited set since + // `extend_in_block` will never return true twice for the same block. + // + while let Some(livein) = worklist.pop() { + // We've learned that the value needs to be live-in to the `livein` block. + // Make sure it is also live at all predecessor branches to `livein`. + for BlockPredecessor { + block: pred, + inst: branch, + } in cfg.pred_iter(livein) + { + if lr.extend_in_block(pred, branch, &func.layout) { + // This predecessor block also became live-in. We need to process it later. + worklist.push(pred); + } + } + } +} + +/// Liveness analysis for a function. +/// +/// Compute a live range for every SSA value used in the function. +pub struct Liveness { + /// The live ranges that have been computed so far. + ranges: LiveRangeSet, + + /// Working space for the `extend_to_use` algorithm. + /// This vector is always empty, except for inside that function. + /// It lives here to avoid repeated allocation of scratch memory. + worklist: Vec, +} + +impl Liveness { + /// Create a new empty liveness analysis. + /// + /// The memory allocated for this analysis can be reused for multiple functions. Use the + /// `compute` method to actually runs the analysis for a function. + pub fn new() -> Self { + Self { + ranges: LiveRangeSet::new(), + worklist: Vec::new(), + } + } + + /// Current live ranges. + pub fn ranges(&self) -> &LiveRangeSet { + &self.ranges + } + + /// Clear all data structures in this liveness analysis. + pub fn clear(&mut self) { + self.ranges.clear(); + self.worklist.clear(); + } + + /// Get the live range for `value`, if it exists. + pub fn get(&self, value: Value) -> Option<&LiveRange> { + self.ranges.get(value) + } + + /// Create a new live range for `value`. + /// + /// The new live range will be defined at `def` with no extent, like a dead value. + /// + /// This asserts that `value` does not have an existing live range. + pub fn create_dead(&mut self, value: Value, def: PP, affinity: Affinity) + where + PP: Into, + { + let old = self + .ranges + .insert(LiveRange::new(value, def.into(), affinity)); + debug_assert!(old.is_none(), "{} already has a live range", value); + } + + /// Move the definition of `value` to `def`. + /// + /// The old and new def points must be in the same block, and before the end of the live range. + pub fn move_def_locally(&mut self, value: Value, def: PP) + where + PP: Into, + { + let lr = self.ranges.get_mut(value).expect("Value has no live range"); + lr.move_def_locally(def.into()); + } + + /// Locally extend the live range for `value` to reach `user`. + /// + /// It is assumed the `value` is already live before `user` in `block`. + /// + /// Returns a mutable reference to the value's affinity in case that also needs to be updated. + pub fn extend_locally( + &mut self, + value: Value, + block: Block, + user: Inst, + layout: &Layout, + ) -> &mut Affinity { + debug_assert_eq!(Some(block), layout.inst_block(user)); + let lr = self.ranges.get_mut(value).expect("Value has no live range"); + let livein = lr.extend_in_block(block, user, layout); + debug_assert!(!livein, "{} should already be live in {}", value, block); + &mut lr.affinity + } + + /// Change the affinity of `value` to `Stack` and return the previous affinity. + pub fn spill(&mut self, value: Value) -> Affinity { + let lr = self.ranges.get_mut(value).expect("Value has no live range"); + mem::replace(&mut lr.affinity, Affinity::Stack) + } + + /// Compute the live ranges of all SSA values used in `func`. + /// This clears out any existing analysis stored in this data structure. + pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { + let _tt = timing::ra_liveness(); + self.ranges.clear(); + + // Get ISA data structures used for computing live range affinities. + let encinfo = isa.encoding_info(); + let reginfo = isa.register_info(); + + // The liveness computation needs to visit all uses, but the order doesn't matter. + // TODO: Perhaps this traversal of the function could be combined with a dead code + // elimination pass if we visit a post-order of the dominator tree? + for block in func.layout.blocks() { + // Make sure we have created live ranges for dead block parameters. + // TODO: If these parameters are really dead, we could remove them, except for the + // entry block which must match the function signature. + for &arg in func.dfg.block_params(block) { + get_or_create(&mut self.ranges, arg, isa, func, &encinfo); + } + + for inst in func.layout.block_insts(block) { + // Eliminate all value aliases, they would confuse the register allocator. + func.dfg.resolve_aliases_in_arguments(inst); + + // Make sure we have created live ranges for dead defs. + // TODO: When we implement DCE, we can use the absence of a live range to indicate + // an unused value. + for &def in func.dfg.inst_results(inst) { + get_or_create(&mut self.ranges, def, isa, func, &encinfo); + } + + // Iterator of constraints, one per value operand. + let encoding = func.encodings[inst]; + let operand_constraint_slice: &[OperandConstraint] = + encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins); + let mut operand_constraints = operand_constraint_slice.iter(); + + for &arg in func.dfg.inst_args(inst) { + // Get the live range, create it as a dead range if necessary. + let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo); + + // Extend the live range to reach this use. + extend_to_use(lr, block, inst, &mut self.worklist, func, cfg); + + // Apply operand constraint, ignoring any variable arguments after the fixed + // operands described by `operand_constraints`. Variable arguments are either + // block arguments or call/return ABI arguments. + if let Some(constraint) = operand_constraints.next() { + lr.affinity.merge(constraint, ®info); + } + } + } + } + } +} + +impl Index for Liveness { + type Output = LiveRange; + fn index(&self, index: Value) -> &LiveRange { + self.ranges + .get(index) + .unwrap_or_else(|| panic!("{} has no live range", index)) + } +} diff --git a/cranelift/codegen/src/regalloc/liverange.rs b/cranelift/codegen/src/regalloc/liverange.rs new file mode 100644 index 0000000000..0e2f8385fc --- /dev/null +++ b/cranelift/codegen/src/regalloc/liverange.rs @@ -0,0 +1,720 @@ +//! Data structure representing the live range of an SSA value. +//! +//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of +//! an SSA value begins where it is defined and extends to all program points where the value is +//! still needed. +//! +//! # Local Live Ranges +//! +//! Inside a single basic block, the live range of a value is always an interval between +//! two program points (if the value is live in the block at all). The starting point is either: +//! +//! 1. The instruction that defines the value, or +//! 2. The block header, because the value is an argument to the block, or +//! 3. The block header, because the value is defined in another block and live-in to this one. +//! +//! The ending point of the local live range is the last of the following program points in the +//! block: +//! +//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument. +//! 2. The last branch or jump instruction in the block that can reach a use. +//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it. +//! +//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined +//! outside a loop and used inside the loop, it will be live in the entire loop. +//! +//! # Global Live Ranges +//! +//! Values that appear in more than one block have a *global live range* which can be seen as the +//! disjoint union of the per-block local intervals for all of the blocks where the value is live. +//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live +//! range becomes a linear sequence of disjoint intervals, at most one per block. +//! +//! In the special case of a dead value, the global live range is a single interval where the start +//! and end points are the same. The global live range of a value is never completely empty. +//! +//! # Register interference +//! +//! The register allocator uses live ranges to determine if values *interfere*, which means that +//! they can't be stored in the same register. Two live ranges interfere if and only if any of +//! their intervals overlap. +//! +//! If one live range ends at an instruction that defines another live range, those two live ranges +//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input +//! register for an output value. If Cranelift gets support for inline assembly, we will need to +//! handle *early clobbers* which are output registers that are not allowed to alias any input +//! registers. +//! +//! If `i1 < i2 < i3` are program points, we have: +//! +//! - `i1-i2` and `i1-i3` interfere because the intervals overlap. +//! - `i1-i2` and `i2-i3` don't interfere. +//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register. +//! - `i1-i2` and `i2-i2` don't interfere. +//! - `i2-i3` and `i2-i2` do interfere. +//! +//! Because of this behavior around interval end points, live range interference is not completely +//! equivalent to mathematical intersection of open or half-open intervals. +//! +//! # Implementation notes +//! +//! A few notes about the implementation of the live intervals field `liveins`. This should not +//! concern someone only looking to use the public interface. +//! +//! ## Current representation +//! +//! Our current implementation uses a sorted array of compressed intervals, represented by their +//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of +//! intervals easily, and shows some nice performance behavior. See +//! https://github.com/bytecodealliance/cranelift/issues/1084 for benchmarks against using a +//! bforest::Map. +//! +//! ## block ordering +//! +//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to +//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't +//! depend on any property of the program order, so alternative orderings are possible: +//! +//! 1. The block layout order. This is what we currently use. +//! 2. A topological order of the dominator tree. All the live-in intervals would come after the +//! def interval. +//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the +//! `ProgramOrder` for comparisons. +//! +//! These orderings will cause small differences in coalescing opportunities, but all of them would +//! do a decent job of compressing a long live range. The numerical order might be preferable +//! because: +//! +//! - It has better performance because block numbers can be compared directly without any table +//! lookups. +//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious +//! live-in intervals from any coalesced representations that happen to cross a new block. +//! +//! For comparing instructions, the layout order is always what we want. +//! +//! ## Alternative representation +//! +//! Since a local live-in interval always begins at its block header, it is uniquely described by its +//! end point instruction alone. We can use the layout to look up the block containing the end point. +//! This means that a sorted `Vec` would be enough to represent the set of live-in intervals. +//! +//! Coalescing is an important compression technique because some live ranges can span thousands of +//! blocks. We can represent that by switching to a sorted `Vec` representation where +//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding +//! `Block` entry represents a single live-in interval. +//! +//! This representation is more compact for a live range with many uncoalesced live-in intervals. +//! It is more complicated to work with, though, so it is probably not worth it. The performance +//! benefits of switching to a numerical block order only appears if the binary search is doing +//! block-block comparisons. +//! +//! A `BTreeMap` could have been used for the live-in intervals, but it doesn't provide +//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes. +//! +//! Even the specialized `bforest::Map` implementation is slower than a plain sorted +//! array, see https://github.com/bytecodealliance/cranelift/issues/1084 for details. + +use crate::entity::SparseMapValue; +use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value}; +use crate::regalloc::affinity::Affinity; +use core::cmp::Ordering; +use core::marker::PhantomData; +use smallvec::SmallVec; + +/// Global live range of a single SSA value. +/// +/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an +/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at +/// most one interval per block. We further distinguish between: +/// +/// 1. The *def interval* is the local interval in the block where the value is defined, and +/// 2. The *live-in intervals* are the local intervals in the remaining blocks. +/// +/// A live-in interval always begins at the block header, while the def interval can begin at the +/// defining instruction, or at the block header for an block argument value. +/// +/// All values have a def interval, but a large proportion of values don't have any live-in +/// intervals. These are called *local live ranges*. +/// +/// # Program order requirements +/// +/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for +/// ordering instructions inside an block *and* for ordering blocks. The methods that depend on the +/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to +/// ensure that the provided ordering is consistent between calls. +/// +/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges. +/// +/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the +/// instructions using or defining their value, `LiveRange` structs can contain references to +/// branch and jump instructions. +pub type LiveRange = GenericLiveRange; + +// See comment of liveins below. +pub struct Interval { + begin: Block, + end: Inst, +} + +/// Generic live range implementation. +/// +/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order. +/// Use `LiveRange` instead of using this generic directly. +pub struct GenericLiveRange { + /// The value described by this live range. + /// This member can't be modified in case the live range is stored in a `SparseMap`. + value: Value, + + /// The preferred register allocation for this value. + pub affinity: Affinity, + + /// The instruction or block header where this value is defined. + def_begin: ProgramPoint, + + /// The end point of the def interval. This must always belong to the same block as `def_begin`. + /// + /// We always have `def_begin <= def_end` with equality implying a dead def live range with no + /// uses. + def_end: ProgramPoint, + + /// Additional live-in intervals sorted in program order. + /// + /// This vector is empty for most values which are only used in one block. + /// + /// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to + /// `inst` which may belong to a later block in the program order. + /// + /// The entries are non-overlapping, and none of them overlap the block where the value is + /// defined. + liveins: SmallVec<[Interval; 2]>, + + po: PhantomData<*const PO>, +} + +/// A simple helper macro to make comparisons more natural to read. +macro_rules! cmp { + ($order:ident, $a:ident > $b:expr) => { + $order.cmp($a, $b) == Ordering::Greater + }; + ($order:ident, $a:ident >= $b:expr) => { + $order.cmp($a, $b) != Ordering::Less + }; + ($order:ident, $a:ident < $b:expr) => { + $order.cmp($a, $b) == Ordering::Less + }; + ($order:ident, $a:ident <= $b:expr) => { + $order.cmp($a, $b) != Ordering::Greater + }; +} + +impl GenericLiveRange { + /// Create a new live range for `value` defined at `def`. + /// + /// The live range will be created as dead, but it can be extended with `extend_in_block()`. + pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self { + Self { + value, + affinity, + def_begin: def, + def_end: def, + liveins: SmallVec::new(), + po: PhantomData, + } + } + + /// Finds an entry in the compressed set of live-in intervals that contains `block`, or return + /// the position where to insert such a new entry. + fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result { + self.liveins + .binary_search_by(|interval| order.cmp(interval.begin, block)) + .or_else(|n| { + // The previous interval's end might cover the searched block. + if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) { + Ok(n - 1) + } else { + Err(n) + } + }) + } + + /// Extend the local interval for `block` so it reaches `to` which must belong to `block`. + /// Create a live-in interval if necessary. + /// + /// If the live range already has a local interval in `block`, extend its end point so it + /// includes `to`, and return false. + /// + /// If the live range did not previously have a local interval in `block`, add one so the value + /// is live-in to `block`, extending to `to`. Return true. + /// + /// The return value can be used to detect if we just learned that the value is live-in to + /// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks. + pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool { + // First check if we're extending the def interval. + // + // We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't + // check it without a method for getting `inst`'s block. + if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) { + let inst_pp = inst.into(); + debug_assert_ne!( + inst_pp, self.def_begin, + "Can't use value in the defining instruction." + ); + if cmp!(order, inst > self.def_end) { + self.def_end = inst_pp; + } + return false; + } + + // Now check if we're extending any of the existing live-in intervals. + match self.lookup_entry_containing_block(block, order) { + Ok(n) => { + // We found one interval and might need to extend it. + if cmp!(order, inst <= self.liveins[n].end) { + // Both interval parts are already included in a compressed interval. + return false; + } + + // If the instruction at the end is the last instruction before the next block, + // coalesce the two intervals: + // [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end] + if let Some(next) = &self.liveins.get(n + 1) { + if order.is_block_gap(inst, next.begin) { + // At this point we can choose to remove the current interval or the next + // one; remove the next one to avoid one memory move. + let next_end = next.end; + debug_assert!(cmp!(order, next_end > self.liveins[n].end)); + self.liveins[n].end = next_end; + self.liveins.remove(n + 1); + return false; + } + } + + // We can't coalesce, just extend the interval. + self.liveins[n].end = inst; + false + } + + Err(n) => { + // No interval was found containing the current block: we need to insert a new one, + // unless there's a coalescing opportunity with the previous or next one. + let coalesce_next = self + .liveins + .get(n) + .filter(|next| order.is_block_gap(inst, next.begin)) + .is_some(); + let coalesce_prev = self + .liveins + .get(n.wrapping_sub(1)) + .filter(|prev| order.is_block_gap(prev.end, block)) + .is_some(); + + match (coalesce_prev, coalesce_next) { + // The new interval is the missing hole between prev and next: we can merge + // them all together. + (true, true) => { + let prev_end = self.liveins[n - 1].end; + debug_assert!(cmp!(order, prev_end <= self.liveins[n].end)); + self.liveins[n - 1].end = self.liveins[n].end; + self.liveins.remove(n); + } + + // Coalesce only with the previous or next one. + (true, false) => { + debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end)); + self.liveins[n - 1].end = inst; + } + (false, true) => { + debug_assert!(cmp!(order, block <= self.liveins[n].begin)); + self.liveins[n].begin = block; + } + + (false, false) => { + // No coalescing opportunity, we have to insert. + self.liveins.insert( + n, + Interval { + begin: block, + end: inst, + }, + ); + } + } + + true + } + } + } + + /// Is this the live range of a dead value? + /// + /// A dead value has no uses, and its live range ends at the same program point where it is + /// defined. + pub fn is_dead(&self) -> bool { + self.def_begin == self.def_end + } + + /// Is this a local live range? + /// + /// A local live range is only used in the same block where it was defined. It is allowed to span + /// multiple basic blocks within that block. + pub fn is_local(&self) -> bool { + self.liveins.is_empty() + } + + /// Get the program point where this live range is defined. + /// + /// This will be an block header when the value is an block argument, otherwise it is the defining + /// instruction. + pub fn def(&self) -> ProgramPoint { + self.def_begin + } + + /// Move the definition of this value to a new program point. + /// + /// It is only valid to move the definition within the same block, and it can't be moved beyond + /// `def_local_end()`. + pub fn move_def_locally(&mut self, def: ProgramPoint) { + self.def_begin = def; + } + + /// Get the local end-point of this live range in the block where it is defined. + /// + /// This can be the block header itself in the case of a dead block argument. + /// Otherwise, it will be the last local use or branch/jump that can reach a use. + pub fn def_local_end(&self) -> ProgramPoint { + self.def_end + } + + /// Get the local end-point of this live range in an block where it is live-in. + /// + /// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point + /// of this live range's local interval in `block`. + /// + /// If the live range is live through all of `block`, the terminator of `block` is a correct + /// answer, but it is also possible that an even later program point is returned. So don't + /// depend on the returned `Inst` to belong to `block`. + pub fn livein_local_end(&self, block: Block, order: &PO) -> Option { + self.lookup_entry_containing_block(block, order) + .and_then(|i| { + let inst = self.liveins[i].end; + if cmp!(order, block < inst) { + Ok(inst) + } else { + // Can be any error type, really, since it's discarded by ok(). + Err(i) + } + }) + .ok() + } + + /// Is this value live-in to `block`? + /// + /// An block argument is not considered to be live in. + pub fn is_livein(&self, block: Block, order: &PO) -> bool { + self.livein_local_end(block, order).is_some() + } + + /// Get all the live-in intervals. + /// + /// Note that the intervals are stored in a compressed form so each entry may span multiple + /// blocks where the value is live in. + pub fn liveins<'a>(&'a self) -> impl Iterator + 'a { + self.liveins + .iter() + .map(|interval| (interval.begin, interval.end)) + } + + /// Check if this live range overlaps a definition in `block`. + pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool { + // Two defs at the same program point always overlap, even if one is dead. + if def == self.def_begin.into() { + return true; + } + + // Check for an overlap with the local range. + if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) { + return true; + } + + // Check for an overlap with a live-in range. + match self.livein_local_end(block, order) { + Some(inst) => cmp!(order, def < inst), + None => false, + } + } + + /// Check if this live range reaches a use at `user` in `block`. + pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool { + // Check for an overlap with the local range. + if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) { + return true; + } + + // Check for an overlap with a live-in range. + match self.livein_local_end(block, order) { + Some(inst) => cmp!(order, user <= inst), + None => false, + } + } + + /// Check if this live range is killed at `user` in `block`. + pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool { + self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user) + } +} + +/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values. +impl SparseMapValue for GenericLiveRange { + fn key(&self) -> Value { + self.value + } +} + +#[cfg(test)] +mod tests { + use super::{GenericLiveRange, Interval}; + use crate::entity::EntityRef; + use crate::ir::{Block, Inst, Value}; + use crate::ir::{ExpandedProgramPoint, ProgramOrder}; + use alloc::vec::Vec; + use core::cmp::Ordering; + + // Dummy program order which simply compares indexes. + // It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes + // in between. `is_block_gap` assumes that terminator instructions have indexes of the form + // block * 10 + 1. This is used in the coalesce test. + struct ProgOrder {} + + impl ProgramOrder for ProgOrder { + fn cmp(&self, a: A, b: B) -> Ordering + where + A: Into, + B: Into, + { + fn idx(pp: ExpandedProgramPoint) -> usize { + match pp { + ExpandedProgramPoint::Inst(i) => i.index(), + ExpandedProgramPoint::Block(e) => e.index(), + } + } + + let ia = idx(a.into()); + let ib = idx(b.into()); + ia.cmp(&ib) + } + + fn is_block_gap(&self, inst: Inst, block: Block) -> bool { + inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1 + } + } + + impl ProgOrder { + // Get the block corresponding to `inst`. + fn inst_block(&self, inst: Inst) -> Block { + let i = inst.index(); + Block::new(i - i % 10) + } + + // Get the block of a program point. + fn pp_block>(&self, pp: PP) -> Block { + match pp.into() { + ExpandedProgramPoint::Inst(i) => self.inst_block(i), + ExpandedProgramPoint::Block(e) => e, + } + } + + // Validate the live range invariants. + fn validate(&self, lr: &GenericLiveRange) { + // The def interval must cover a single block. + let def_block = self.pp_block(lr.def_begin); + assert_eq!(def_block, self.pp_block(lr.def_end)); + + // Check that the def interval isn't backwards. + match self.cmp(lr.def_begin, lr.def_end) { + Ordering::Equal => assert!(lr.liveins.is_empty()), + Ordering::Greater => { + panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end) + } + Ordering::Less => {} + } + + // Check the live-in intervals. + let mut prev_end = None; + for Interval { begin, end } in lr.liveins.iter() { + let begin = *begin; + let end = *end; + + assert_eq!(self.cmp(begin, end), Ordering::Less); + if let Some(e) = prev_end { + assert_eq!(self.cmp(e, begin), Ordering::Less); + } + + assert!( + self.cmp(lr.def_end, begin) == Ordering::Less + || self.cmp(lr.def_begin, end) == Ordering::Greater, + "Interval can't overlap the def block" + ); + + // Save for next round. + prev_end = Some(end); + } + } + } + + // Singleton `ProgramOrder` for tests below. + const PO: &'static ProgOrder = &ProgOrder {}; + + #[test] + fn dead_def_range() { + let v0 = Value::new(0); + let e0 = Block::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let e2 = Block::new(2); + let lr = GenericLiveRange::new(v0, i1.into(), Default::default()); + assert!(lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), i1.into()); + assert_eq!(lr.def_local_end(), i1.into()); + assert_eq!(lr.livein_local_end(e2, PO), None); + PO.validate(&lr); + + // A dead live range overlaps its own def program point. + assert!(lr.overlaps_def(i1.into(), e0, PO)); + assert!(!lr.overlaps_def(i2.into(), e0, PO)); + assert!(!lr.overlaps_def(e0.into(), e0, PO)); + } + + #[test] + fn dead_arg_range() { + let v0 = Value::new(0); + let e2 = Block::new(2); + let lr = GenericLiveRange::new(v0, e2.into(), Default::default()); + assert!(lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), e2.into()); + assert_eq!(lr.def_local_end(), e2.into()); + // The def interval of an block argument does not count as live-in. + assert_eq!(lr.livein_local_end(e2, PO), None); + PO.validate(&lr); + } + + #[test] + fn local_def() { + let v0 = Value::new(0); + let e10 = Block::new(10); + let i11 = Inst::new(11); + let i12 = Inst::new(12); + let i13 = Inst::new(13); + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e10, i13, PO), false); + PO.validate(&lr); + assert!(!lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), i11.into()); + assert_eq!(lr.def_local_end(), i13.into()); + + // Extending to an already covered inst should not change anything. + assert_eq!(lr.extend_in_block(e10, i12, PO), false); + PO.validate(&lr); + assert_eq!(lr.def(), i11.into()); + assert_eq!(lr.def_local_end(), i13.into()); + } + + #[test] + fn local_arg() { + let v0 = Value::new(0); + let e10 = Block::new(10); + let i11 = Inst::new(11); + let i12 = Inst::new(12); + let i13 = Inst::new(13); + let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default()); + + // Extending a dead block argument in its own block should not indicate that a live-in + // interval was created. + assert_eq!(lr.extend_in_block(e10, i12, PO), false); + PO.validate(&lr); + assert!(!lr.is_dead()); + assert!(lr.is_local()); + assert_eq!(lr.def(), e10.into()); + assert_eq!(lr.def_local_end(), i12.into()); + + // Extending to an already covered inst should not change anything. + assert_eq!(lr.extend_in_block(e10, i11, PO), false); + PO.validate(&lr); + assert_eq!(lr.def(), e10.into()); + assert_eq!(lr.def_local_end(), i12.into()); + + // Extending further. + assert_eq!(lr.extend_in_block(e10, i13, PO), false); + PO.validate(&lr); + assert_eq!(lr.def(), e10.into()); + assert_eq!(lr.def_local_end(), i13.into()); + } + + #[test] + fn global_def() { + let v0 = Value::new(0); + let e10 = Block::new(10); + let i11 = Inst::new(11); + let i12 = Inst::new(12); + let e20 = Block::new(20); + let i21 = Inst::new(21); + let i22 = Inst::new(22); + let i23 = Inst::new(23); + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e10, i12, PO), false); + + // Adding a live-in interval. + assert_eq!(lr.extend_in_block(e20, i22, PO), true); + PO.validate(&lr); + assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); + + // Non-extending the live-in. + assert_eq!(lr.extend_in_block(e20, i21, PO), false); + assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); + + // Extending the existing live-in. + assert_eq!(lr.extend_in_block(e20, i23, PO), false); + PO.validate(&lr); + assert_eq!(lr.livein_local_end(e20, PO), Some(i23)); + } + + #[test] + fn coalesce() { + let v0 = Value::new(0); + let i11 = Inst::new(11); + let e20 = Block::new(20); + let i21 = Inst::new(21); + let e30 = Block::new(30); + let i31 = Inst::new(31); + let e40 = Block::new(40); + let i41 = Inst::new(41); + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e30, i31, PO,), true); + assert_eq!(lr.liveins().collect::>(), [(e30, i31)]); + + // Coalesce to previous + assert_eq!(lr.extend_in_block(e40, i41, PO,), true); + assert_eq!(lr.liveins().collect::>(), [(e30, i41)]); + + // Coalesce to next + assert_eq!(lr.extend_in_block(e20, i21, PO,), true); + assert_eq!(lr.liveins().collect::>(), [(e20, i41)]); + + let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); + + assert_eq!(lr.extend_in_block(e40, i41, PO,), true); + assert_eq!(lr.liveins().collect::>(), [(e40, i41)]); + + assert_eq!(lr.extend_in_block(e20, i21, PO,), true); + assert_eq!(lr.liveins().collect::>(), [(e20, i21), (e40, i41)]); + + // Coalesce to previous and next + assert_eq!(lr.extend_in_block(e30, i31, PO,), true); + assert_eq!(lr.liveins().collect::>(), [(e20, i41)]); + } +} diff --git a/cranelift/codegen/src/regalloc/mod.rs b/cranelift/codegen/src/regalloc/mod.rs new file mode 100644 index 0000000000..37fcccb3b0 --- /dev/null +++ b/cranelift/codegen/src/regalloc/mod.rs @@ -0,0 +1,26 @@ +//! Register allocation. +//! +//! This module contains data structures and algorithms used for register allocation. + +pub mod coloring; +pub mod live_value_tracker; +pub mod liveness; +pub mod liverange; +pub mod register_set; +pub mod virtregs; + +mod affinity; +mod branch_splitting; +mod coalescing; +mod context; +mod diversion; +mod pressure; +mod reload; +mod safepoint; +mod solver; +mod spilling; + +pub use self::context::Context; +pub use self::diversion::{EntryRegDiversions, RegDiversions}; +pub use self::register_set::RegisterSet; +pub use self::safepoint::emit_stackmaps; diff --git a/cranelift/codegen/src/regalloc/pressure.rs b/cranelift/codegen/src/regalloc/pressure.rs new file mode 100644 index 0000000000..3783a78e28 --- /dev/null +++ b/cranelift/codegen/src/regalloc/pressure.rs @@ -0,0 +1,385 @@ +//! Register pressure tracking. +//! +//! SSA-based register allocation depends on a spilling phase that "lowers register pressure +//! sufficiently". This module defines the data structures needed to measure register pressure +//! accurately enough to guarantee that the coloring phase will not run out of registers. +//! +//! Ideally, measuring register pressure amounts to simply counting the number of live registers at +//! any given program point. This simplistic method has two problems: +//! +//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point +//! register banks, so we need to at least count the number of live registers in each register +//! bank separately. +//! +//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM +//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register. +//! This makes it difficult to accurately measure register pressure. +//! +//! This module deals with the problems via *register banks* and *top-level register classes*. +//! Register classes in different register banks are completely independent, so we can count +//! registers in one bank without worrying about the other bank at all. +//! +//! All register classes have a unique top-level register class, and we will count registers for +//! each top-level register class individually. However, a register bank can have multiple +//! top-level register classes that interfere with each other, so all top-level counts need to +//! be considered when determining how many more registers can be allocated. +//! +//! Currently, the only register bank with multiple top-level registers is the `arm32` +//! floating-point register bank which has `S`, `D`, and `Q` top-level classes. +//! +//! # Base and transient counts +//! +//! We maintain two separate register counts per top-level register class: base counts and +//! transient counts. The base counts are adjusted with the `take` and `free` functions. The +//! transient counts are adjusted with `take_transient` and `free_transient`. + +// Remove once we're using the pressure tracker. +#![allow(dead_code)] + +use crate::isa::registers::{RegClass, RegClassMask, RegInfo}; +use crate::regalloc::RegisterSet; +use core::cmp::min; +use core::fmt; +use core::iter::ExactSizeIterator; +use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS; + +/// Information per top-level register class. +/// +/// Everything but the counts is static information computed from the constructor arguments. +#[derive(Default)] +struct TopRC { + /// Number of registers currently used from this register class. + base_count: u32, + transient_count: u32, + + /// Max number of registers that can be allocated. + limit: u32, + + /// Register units per register. + width: u8, + + /// The first aliasing top-level RC. + first_toprc: u8, + + /// The number of aliasing top-level RCs. + num_toprcs: u8, +} + +impl TopRC { + fn total_count(&self) -> u32 { + self.base_count + self.transient_count + } +} + +pub struct Pressure { + /// Bit mask of top-level register classes that are aliased by other top-level register classes. + /// Unaliased register classes can use a simpler interference algorithm. + aliased: RegClassMask, + + /// Current register counts per top-level register class. + toprc: [TopRC; MAX_TRACKED_TOP_RCS], +} + +impl Pressure { + /// Create a new register pressure tracker. + pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self { + let mut p = Self { + aliased: 0, + toprc: Default::default(), + }; + + // Get the layout of aliasing top-level register classes from the register banks. + for bank in reginfo.banks { + let first = bank.first_toprc; + let num = bank.num_toprcs; + + if bank.pressure_tracking { + for rc in &mut p.toprc[first..first + num] { + rc.first_toprc = first as u8; + rc.num_toprcs = num as u8; + } + + // Flag the top-level register classes with aliases. + if num > 1 { + p.aliased |= ((1 << num) - 1) << first; + } + } else { + // This bank has no pressure tracking, so its top-level register classes may exceed + // `MAX_TRACKED_TOPRCS`. Fill in dummy entries. + for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] { + // These aren't used if we don't set the `aliased` bit. + rc.first_toprc = !0; + rc.limit = !0; + } + } + } + + // Compute per-class limits from `usable`. + for (toprc, rc) in p + .toprc + .iter_mut() + .take_while(|t| t.num_toprcs > 0) + .zip(reginfo.classes) + { + toprc.limit = usable.iter(rc).len() as u32; + toprc.width = rc.width; + } + + p + } + + /// Check for an available register in the register class `rc`. + /// + /// If it is possible to allocate one more register from `rc`'s top-level register class, + /// returns 0. + /// + /// If not, returns a bit-mask of top-level register classes that are interfering. Register + /// pressure should be eased in one of the returned top-level register classes before calling + /// `can_take()` to check again. + fn check_avail(&self, rc: RegClass) -> RegClassMask { + let entry = match self.toprc.get(rc.toprc as usize) { + None => return 0, // Not a pressure tracked bank. + Some(e) => e, + }; + let mask = 1 << rc.toprc; + if (self.aliased & mask) == 0 { + // This is a simple unaliased top-level register class. + if entry.total_count() < entry.limit { + 0 + } else { + mask + } + } else { + // This is the more complicated case. The top-level register class has aliases. + self.check_avail_aliased(entry) + } + } + + /// Check for an available register in a top-level register class that may have aliases. + /// + /// This is the out-of-line slow path for `check_avail()`. + fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask { + let first = usize::from(entry.first_toprc); + let num = usize::from(entry.num_toprcs); + let width = u32::from(entry.width); + let ulimit = entry.limit * width; + + // Count up the number of available register units. + let mut units = 0; + for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) { + let rcw = u32::from(rc.width); + // If `rc.width` is smaller than `width`, each register in `rc` could potentially block + // one of ours. This is assuming that none of the smaller registers are straddling the + // bigger ones. + // + // If `rc.width` is larger than `width`, we are also assuming that the registers are + // aligned and `rc.width` is a multiple of `width`. + let u = if rcw < width { + // We can't take more than the total number of register units in the class. + // This matters for arm32 S-registers which can only ever lock out 16 D-registers. + min(rc.total_count() * width, rc.limit * rcw) + } else { + rc.total_count() * rcw + }; + + // If this top-level RC on its own is responsible for exceeding our limit, return it + // early to guarantee that registers here are spilled before spilling other registers + // unnecessarily. + if u >= ulimit { + return 1 << rci; + } + + units += u; + } + + // We've counted up the worst-case number of register units claimed by all aliasing + // classes. Compare to the unit limit in this class. + if units < ulimit { + 0 + } else { + // Registers need to be spilled from any one of the aliasing classes. + ((1 << num) - 1) << first + } + } + + /// Take a register from `rc`. + /// + /// This does not check if there are enough registers available. + pub fn take(&mut self, rc: RegClass) { + if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { + t.base_count += 1; + } + } + + /// Free a register in `rc`. + pub fn free(&mut self, rc: RegClass) { + if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { + t.base_count -= 1; + } + } + + /// Reset all counts to 0, both base and transient. + pub fn reset(&mut self) { + for e in &mut self.toprc { + e.base_count = 0; + e.transient_count = 0; + } + } + + /// Try to increment a transient counter. + /// + /// This will fail if there are not enough registers available. + pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> { + let mask = self.check_avail(rc); + if mask == 0 { + if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { + t.transient_count += 1; + } + + Ok(()) + } else { + Err(mask) + } + } + + /// Reset all transient counts to 0. + pub fn reset_transient(&mut self) { + for e in &mut self.toprc { + e.transient_count = 0; + } + } + + /// Preserve the transient counts by transferring them to the base counts. + pub fn preserve_transient(&mut self) { + for e in &mut self.toprc { + e.base_count += e.transient_count; + e.transient_count = 0; + } + } +} + +impl fmt::Display for Pressure { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Pressure[")?; + for rc in &self.toprc { + if rc.limit > 0 && rc.limit < !0 { + write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?; + } + } + write!(f, " ]") + } +} + +#[cfg(test)] +#[cfg(feature = "arm32")] +mod tests { + use super::Pressure; + use crate::isa::{RegClass, TargetIsa}; + use crate::regalloc::RegisterSet; + use alloc::boxed::Box; + use core::borrow::Borrow; + use core::str::FromStr; + use target_lexicon::triple; + + // Make an arm32 `TargetIsa`, if possible. + fn arm32() -> Option> { + use crate::isa; + use crate::settings; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + isa::lookup(triple!("arm")) + .ok() + .map(|b| b.finish(shared_flags)) + } + + // Get a register class by name. + fn rc_by_name(isa: &dyn TargetIsa, name: &str) -> RegClass { + isa.register_info() + .classes + .iter() + .find(|rc| rc.name == name) + .expect("Can't find named register class.") + } + + #[test] + fn basic_counting() { + let isa = arm32().expect("This test requires arm32 support"); + let isa = isa.borrow(); + let gpr = rc_by_name(isa, "GPR"); + let s = rc_by_name(isa, "S"); + let reginfo = isa.register_info(); + let regs = RegisterSet::new(); + + let mut pressure = Pressure::new(®info, ®s); + let mut count = 0; + while pressure.check_avail(gpr) == 0 { + pressure.take(gpr); + count += 1; + } + assert_eq!(count, 16); + assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); + assert_eq!(pressure.check_avail(s), 0); + pressure.free(gpr); + assert_eq!(pressure.check_avail(gpr), 0); + pressure.take(gpr); + assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); + assert_eq!(pressure.check_avail(s), 0); + pressure.reset(); + assert_eq!(pressure.check_avail(gpr), 0); + assert_eq!(pressure.check_avail(s), 0); + } + + #[test] + fn arm_float_bank() { + let isa = arm32().expect("This test requires arm32 support"); + let isa = isa.borrow(); + let s = rc_by_name(isa, "S"); + let d = rc_by_name(isa, "D"); + let q = rc_by_name(isa, "Q"); + let reginfo = isa.register_info(); + let regs = RegisterSet::new(); + + let mut pressure = Pressure::new(®info, ®s); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + // Allocating a single S-register should not affect availability. + pressure.take(s); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + pressure.take(d); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + pressure.take(q); + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + // Take a total of 16 S-regs. + for _ in 1..16 { + pressure.take(s); + } + assert_eq!(pressure.check_avail(s), 0); + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + + // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs. + for _ in 0..6 { + assert_eq!(pressure.check_avail(d), 0); + assert_eq!(pressure.check_avail(q), 0); + pressure.take(q); + } + + // We've taken 16 S, 1 D, and 7 Qs. + assert!(pressure.check_avail(s) != 0); + assert_eq!(pressure.check_avail(d), 0); + assert!(pressure.check_avail(q) != 0); + } +} diff --git a/cranelift/codegen/src/regalloc/register_set.rs b/cranelift/codegen/src/regalloc/register_set.rs new file mode 100644 index 0000000000..52b8a6fa0a --- /dev/null +++ b/cranelift/codegen/src/regalloc/register_set.rs @@ -0,0 +1,391 @@ +//! Set of allocatable registers as a bit vector of register units. +//! +//! While allocating registers, we need to keep track of which registers are available and which +//! registers are in use. Since registers can alias in different ways, we track this via the +//! "register unit" abstraction. Every register contains one or more register units. Registers that +//! share a register unit can't be in use at the same time. + +use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask}; +use core::char; +use core::fmt; +use core::iter::ExactSizeIterator; +use core::mem::size_of_val; + +/// Set of registers available for allocation. +#[derive(Clone)] +pub struct RegisterSet { + avail: RegUnitMask, +} + +// Given a register class and a register unit in the class, compute a word index and a bit mask of +// register units representing that register. +// +// Note that a register is not allowed to straddle words. +fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) { + // Bit mask representing the register. It is `rc.width` consecutive units. + let width_bits = (1 << rc.width) - 1; + // Index into avail[] of the word containing `reg`. + let word_index = (reg / 32) as usize; + // The actual bits in the word that cover `reg`. + let reg_bits = width_bits << (reg % 32); + + (word_index, reg_bits) +} + +impl RegisterSet { + /// Create a new register set with all registers available. + /// + /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of + /// allocatable registers where reserved registers have been filtered out. + pub fn new() -> Self { + Self { avail: [!0; 3] } + } + + /// Create a new register set with no registers available. + pub fn empty() -> Self { + Self { avail: [0; 3] } + } + + /// Returns `true` if the specified register is available. + pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool { + let (idx, bits) = bitmask(rc, reg); + (self.avail[idx] & bits) == bits + } + + /// Allocate `reg` from `rc` so it is no longer available. + /// + /// It is an error to take a register that doesn't have all of its register units available. + pub fn take(&mut self, rc: RegClass, reg: RegUnit) { + let (idx, bits) = bitmask(rc, reg); + debug_assert!( + (self.avail[idx] & bits) == bits, + "{}:{} not available in {}", + rc, + rc.info.display_regunit(reg), + self.display(rc.info) + ); + self.avail[idx] &= !bits; + } + + /// Return `reg` and all of its register units to the set of available registers. + pub fn free(&mut self, rc: RegClass, reg: RegUnit) { + let (idx, bits) = bitmask(rc, reg); + debug_assert!( + (self.avail[idx] & bits) == 0, + "{}:{} is already free in {}", + rc, + rc.info.display_regunit(reg), + self.display(rc.info) + ); + self.avail[idx] |= bits; + } + + /// Return an iterator over all available registers belonging to the register class `rc`. + /// + /// This doesn't allocate anything from the set; use `take()` for that. + pub fn iter(&self, rc: RegClass) -> RegSetIter { + // Start by copying the RC mask. It is a single set bit for each register in the class. + let mut rsi = RegSetIter { regs: rc.mask }; + + // Mask out the unavailable units. + for idx in 0..self.avail.len() { + // If a single unit in a register is unavailable, the whole register can't be used. If + // a register straddles a word boundary, it will be marked as unavailable. There's an + // assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that. + for i in 0..rc.width { + rsi.regs[idx] &= self.avail[idx] >> i; + } + } + rsi + } + + /// Check if any register units allocated out of this set interferes with units allocated out + /// of `other`. + /// + /// This assumes that unused bits are 1. + pub fn interferes_with(&self, other: &Self) -> bool { + self.avail + .iter() + .zip(&other.avail) + .any(|(&x, &y)| (x | y) != !0) + } + + /// Intersect this set of registers with `other`. This has the effect of removing any register + /// units from this set that are not in `other`. + pub fn intersect(&mut self, other: &Self) { + for (x, &y) in self.avail.iter_mut().zip(&other.avail) { + *x &= y; + } + } + + /// Return an object that can display this register set, using the register info from the + /// target ISA. + pub fn display<'a, R: Into>>(&self, regs: R) -> DisplayRegisterSet<'a> { + DisplayRegisterSet(self.clone(), regs.into()) + } +} + +/// Iterator over available registers in a register class. +#[derive(Clone)] +pub struct RegSetIter { + regs: RegUnitMask, +} + +impl Iterator for RegSetIter { + type Item = RegUnit; + + fn next(&mut self) -> Option { + let mut unit_offset = 0; + + // Find the first set bit in `self.regs`. + for word in &mut self.regs { + if *word != 0 { + // Compute the register unit number from the lowest set bit in the word. + let unit = unit_offset + word.trailing_zeros() as RegUnit; + + // Clear that lowest bit so we won't find it again. + *word &= *word - 1; + + return Some(unit); + } + // How many register units was there in the word? This is a constant 32 for `u32` etc. + unit_offset += 8 * size_of_val(word) as RegUnit; + } + + // All of `self.regs` is 0. + None + } + + fn size_hint(&self) -> (usize, Option) { + let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum(); + (bits, Some(bits)) + } +} + +impl RegSetIter { + pub fn rnext(&mut self) -> Option { + let num_words = self.regs.len(); + let bits_per_word = 8 * size_of_val(&self.regs[0]); + + // Find the last set bit in `self.regs`. + for i in 0..num_words { + let word_ix = num_words - 1 - i; + + let word = &mut self.regs[word_ix]; + if *word != 0 { + let lzeroes = word.leading_zeros() as usize; + + // Clear that highest bit so we won't find it again. + *word &= !(1 << (bits_per_word - 1 - lzeroes)); + + return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit); + } + } + + // All of `self.regs` is 0. + None + } +} + +impl ExactSizeIterator for RegSetIter {} + +/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA. +pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>); + +impl<'a> fmt::Display for DisplayRegisterSet<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[")?; + match self.1 { + None => { + for w in &self.0.avail { + write!(f, " #{:08x}", w)?; + } + } + Some(reginfo) => { + let toprcs = reginfo + .banks + .iter() + .map(|b| b.first_toprc + b.num_toprcs) + .max() + .expect("No register banks"); + for rc in ®info.classes[0..toprcs] { + if rc.width == 1 { + let bank = ®info.banks[rc.bank as usize]; + write!(f, " {}: ", rc)?; + for offset in 0..bank.units { + let reg = bank.first_unit + offset; + if !rc.contains(reg) { + continue; + } + if !self.0.is_avail(rc, reg) { + write!(f, "-")?; + continue; + } + // Display individual registers as either the second letter of their + // name or the last digit of their number. + // This works for x86 (rax, rbx, ...) and for numbered regs. + write!( + f, + "{}", + bank.names + .get(offset as usize) + .and_then(|name| name.chars().nth(1)) + .unwrap_or_else(|| char::from_digit( + u32::from(offset % 10), + 10 + ) + .unwrap()) + )?; + } + } + } + } + } + write!(f, " ]") + } +} + +impl fmt::Display for RegisterSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.display(None).fmt(f) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::isa::registers::{RegClass, RegClassData}; + use alloc::vec::Vec; + + // Register classes for testing. + const GPR: RegClass = &RegClassData { + name: "GPR", + index: 0, + width: 1, + bank: 0, + toprc: 0, + first: 28, + subclasses: 0, + mask: [0xf0000000, 0x0000000f, 0], + info: &INFO, + pinned_reg: None, + }; + + const DPR: RegClass = &RegClassData { + name: "DPR", + index: 0, + width: 2, + bank: 0, + toprc: 0, + first: 28, + subclasses: 0, + mask: [0x50000000, 0x0000000a, 0], + info: &INFO, + pinned_reg: None, + }; + + const INFO: RegInfo = RegInfo { + banks: &[], + classes: &[], + }; + + const RSI_1: RegSetIter = RegSetIter { + regs: [0x31415927, 0x27182818, 0x14141356], + }; + + const RSI_2: RegSetIter = RegSetIter { + regs: [0x00000000, 0x00000000, 0x00000000], + }; + + const RSI_3: RegSetIter = RegSetIter { + regs: [0xffffffff, 0xffffffff, 0xffffffff], + }; + + fn reverse_regset_iteration_work(rsi: &RegSetIter) { + // Check the reverse iterator by comparing its output with the forward iterator. + let rsi_f = (*rsi).clone(); + let results_f = rsi_f.collect::>(); + + let mut rsi_r = (*rsi).clone(); + let mut results_r = Vec::::new(); + while let Some(r) = rsi_r.rnext() { + results_r.push(r); + } + + let len_f = results_f.len(); + let len_r = results_r.len(); + assert_eq!(len_f, len_r); + + for i in 0..len_f { + assert_eq!(results_f[i], results_r[len_f - 1 - i]); + } + } + + #[test] + fn reverse_regset_iteration() { + reverse_regset_iteration_work(&RSI_1); + reverse_regset_iteration_work(&RSI_2); + reverse_regset_iteration_work(&RSI_3); + } + + #[test] + fn put_and_take() { + let mut regs = RegisterSet::new(); + + // `GPR` has units 28-36. + assert_eq!(regs.iter(GPR).len(), 8); + assert_eq!(regs.iter(GPR).count(), 8); + assert_eq!(regs.iter(DPR).collect::>(), [28, 30, 33, 35]); + + assert!(regs.is_avail(GPR, 29)); + regs.take(&GPR, 29); + assert!(!regs.is_avail(GPR, 29)); + + assert_eq!(regs.iter(GPR).count(), 7); + assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); + + assert!(regs.is_avail(GPR, 30)); + regs.take(&GPR, 30); + assert!(!regs.is_avail(GPR, 30)); + + assert_eq!(regs.iter(GPR).count(), 6); + assert_eq!(regs.iter(DPR).collect::>(), [33, 35]); + + assert!(regs.is_avail(GPR, 32)); + regs.take(&GPR, 32); + assert!(!regs.is_avail(GPR, 32)); + + assert_eq!(regs.iter(GPR).count(), 5); + assert_eq!(regs.iter(DPR).collect::>(), [33, 35]); + + regs.free(&GPR, 30); + assert!(regs.is_avail(GPR, 30)); + assert!(!regs.is_avail(GPR, 29)); + assert!(!regs.is_avail(GPR, 32)); + + assert_eq!(regs.iter(GPR).count(), 6); + assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); + + regs.free(&GPR, 32); + assert!(regs.is_avail(GPR, 31)); + assert!(!regs.is_avail(GPR, 29)); + assert!(regs.is_avail(GPR, 32)); + + assert_eq!(regs.iter(GPR).count(), 7); + assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); + } + + #[test] + fn interference() { + let mut regs1 = RegisterSet::new(); + let mut regs2 = RegisterSet::new(); + + assert!(!regs1.interferes_with(®s2)); + regs1.take(&GPR, 32); + assert!(!regs1.interferes_with(®s2)); + regs2.take(&GPR, 31); + assert!(!regs1.interferes_with(®s2)); + regs1.intersect(®s2); + assert!(regs1.interferes_with(®s2)); + } +} diff --git a/cranelift/codegen/src/regalloc/reload.rs b/cranelift/codegen/src/regalloc/reload.rs new file mode 100644 index 0000000000..cdafb68af8 --- /dev/null +++ b/cranelift/codegen/src/regalloc/reload.rs @@ -0,0 +1,485 @@ +//! Reload pass +//! +//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to +//! insert `spill` and `fill` instructions such that instruction operands expecting a register will +//! get a value with register affinity, and operands expecting a stack slot will get a value with +//! stack affinity. +//! +//! The secondary responsibility of the reload pass is to reuse values in registers as much as +//! possible to minimize the number of `fill` instructions needed. This must not cause the register +//! pressure limits to be exceeded. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::entity::{SparseMap, SparseMapValue}; +use crate::ir::{AbiParam, ArgumentLoc, InstBuilder}; +use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc}; +use crate::isa::RegClass; +use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; +use crate::regalloc::liveness::Liveness; +use crate::timing; +use crate::topo_order::TopoOrder; +use alloc::vec::Vec; +use log::debug; + +/// Reusable data structures for the reload pass. +pub struct Reload { + candidates: Vec, + reloads: SparseMap, +} + +/// Context data structure that gets instantiated once per pass. +struct Context<'a> { + cur: EncCursor<'a>, + + // Cached ISA information. + // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. + encinfo: EncInfo, + + // References to contextual data structures we need. + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + topo: &'a mut TopoOrder, + + candidates: &'a mut Vec, + reloads: &'a mut SparseMap, +} + +impl Reload { + /// Create a new blank reload pass. + pub fn new() -> Self { + Self { + candidates: Vec::new(), + reloads: SparseMap::new(), + } + } + + /// Clear all data structures in this reload pass. + pub fn clear(&mut self) { + self.candidates.clear(); + self.reloads.clear(); + } + + /// Run the reload algorithm over `func`. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + domtree: &DominatorTree, + liveness: &mut Liveness, + topo: &mut TopoOrder, + tracker: &mut LiveValueTracker, + ) { + let _tt = timing::ra_reload(); + debug!("Reload for:\n{}", func.display(isa)); + let mut ctx = Context { + cur: EncCursor::new(func, isa), + encinfo: isa.encoding_info(), + domtree, + liveness, + topo, + candidates: &mut self.candidates, + reloads: &mut self.reloads, + }; + ctx.run(tracker) + } +} + +/// A reload candidate. +/// +/// This represents a stack value that is used by the current instruction where a register is +/// needed. +struct ReloadCandidate { + argidx: usize, + value: Value, + regclass: RegClass, +} + +/// A Reloaded value. +/// +/// This represents a value that has been reloaded into a register value from the stack. +struct ReloadedValue { + stack: Value, + reg: Value, +} + +impl SparseMapValue for ReloadedValue { + fn key(&self) -> Value { + self.stack + } +} + +impl<'a> Context<'a> { + fn run(&mut self, tracker: &mut LiveValueTracker) { + self.topo.reset(self.cur.func.layout.blocks()); + while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { + self.visit_block(block, tracker); + } + } + + fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { + debug!("Reloading {}:", block); + self.visit_block_header(block, tracker); + tracker.drop_dead_params(); + + // visit_block_header() places us at the first interesting instruction in the block. + while let Some(inst) = self.cur.current_inst() { + if !self.cur.func.dfg[inst].opcode().is_ghost() { + // This instruction either has an encoding or has ABI constraints, so visit it to + // insert spills and fills as needed. + let encoding = self.cur.func.encodings[inst]; + self.visit_inst(block, inst, encoding, tracker); + tracker.drop_dead(inst); + } else { + // This is a ghost instruction with no encoding and no extra constraints, so we can + // just skip over it. + self.cur.next_inst(); + } + } + } + + /// Process the block parameters. Move to the next instruction in the block to be processed + fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { + let (liveins, args) = tracker.block_top( + block, + &self.cur.func.dfg, + self.liveness, + &self.cur.func.layout, + self.domtree, + ); + + if self.cur.func.layout.entry_block() == Some(block) { + debug_assert_eq!(liveins.len(), 0); + self.visit_entry_params(block, args); + } else { + self.visit_block_params(block, args); + } + } + + /// Visit the parameters on the entry block. + /// These values have ABI constraints from the function signature. + fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) { + debug_assert_eq!(self.cur.func.signature.params.len(), args.len()); + self.cur.goto_first_inst(block); + + for (arg_idx, arg) in args.iter().enumerate() { + let abi = self.cur.func.signature.params[arg_idx]; + match abi.location { + ArgumentLoc::Reg(_) => { + if arg.affinity.is_stack() { + // An incoming register parameter was spilled. Replace the parameter value + // with a temporary register value that is immediately spilled. + let reg = self + .cur + .func + .dfg + .replace_block_param(arg.value, abi.value_type); + let affinity = Affinity::abi(&abi, self.cur.isa); + self.liveness.create_dead(reg, block, affinity); + self.insert_spill(block, arg.value, reg); + } + } + ArgumentLoc::Stack(_) => { + debug_assert!(arg.affinity.is_stack()); + } + ArgumentLoc::Unassigned => panic!("Unexpected ABI location"), + } + } + } + + fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) { + self.cur.goto_first_inst(block); + } + + /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction + /// that needs processing. + fn visit_inst( + &mut self, + block: Block, + inst: Inst, + encoding: Encoding, + tracker: &mut LiveValueTracker, + ) { + self.cur.use_srcloc(inst); + + // Get the operand constraints for `inst` that we are trying to satisfy. + let constraints = self.encinfo.operand_constraints(encoding); + + // Identify reload candidates. + debug_assert!(self.candidates.is_empty()); + self.find_candidates(inst, constraints); + + // If we find a copy from a stack slot to the same stack slot, replace + // it with a `copy_nop` but otherwise ignore it. In particular, don't + // generate a reload immediately followed by a spill. The `copy_nop` + // has a zero-length encoding, so will disappear at emission time. + if let InstructionData::Unary { + opcode: Opcode::Copy, + arg, + } = self.cur.func.dfg[inst] + { + let dst_vals = self.cur.func.dfg.inst_results(inst); + if dst_vals.len() == 1 { + let dst_val = dst_vals[0]; + let can_transform = match ( + self.cur.func.locations[arg], + self.cur.func.locations[dst_val], + ) { + (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => { + src_slot == dst_slot && { + let src_ty = self.cur.func.dfg.value_type(arg); + let dst_ty = self.cur.func.dfg.value_type(dst_val); + debug_assert!(src_ty == dst_ty); + // This limits the transformation to copies of the + // types: I128 I64 I32 I16 I8 F64 and F32, since that's + // the set of `copy_nop` encodings available. + src_ty.is_int() || src_ty.is_float() + } + } + _ => false, + }; + if can_transform { + // Convert the instruction into a `copy_nop`. + self.cur.func.dfg.replace(inst).copy_nop(arg); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok, "copy_nop encoding missing for this type"); + + // And move on to the next insn. + self.reloads.clear(); + let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + self.cur.next_inst(); + self.candidates.clear(); + return; + } + } + } + + // Deal with all instructions not special-cased by the immediately + // preceding fragment. + if let InstructionData::Unary { + opcode: Opcode::Copy, + .. + } = self.cur.func.dfg[inst] + { + self.reload_copy_candidates(inst); + } else { + self.reload_inst_candidates(block, inst); + } + + // TODO: Reuse reloads for future instructions. + self.reloads.clear(); + + let (_throughs, _kills, defs) = + tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + + // Advance to the next instruction so we can insert any spills after the instruction. + self.cur.next_inst(); + + // Rewrite register defs that need to be spilled. + // + // Change: + // + // v2 = inst ... + // + // Into: + // + // v7 = inst ... + // v2 = spill v7 + // + // That way, we don't need to rewrite all future uses of v2. + if let Some(constraints) = constraints { + for (lv, op) in defs.iter().zip(constraints.outs) { + if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack { + if let InstructionData::Unary { + opcode: Opcode::Copy, + arg, + } = self.cur.func.dfg[inst] + { + self.cur.func.dfg.replace(inst).spill(arg); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok); + } else { + let value_type = self.cur.func.dfg.value_type(lv.value); + let reg = self.cur.func.dfg.replace_result(lv.value, value_type); + self.liveness.create_dead(reg, inst, Affinity::new(op)); + self.insert_spill(block, lv.value, reg); + } + } + } + } + + // Same thing for spilled call return values. + let retvals = &defs[self.cur.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_results()..]; + if !retvals.is_empty() { + let sig = self + .cur + .func + .dfg + .call_signature(inst) + .expect("Extra results on non-call instruction"); + for (i, lv) in retvals.iter().enumerate() { + let abi = self.cur.func.dfg.signatures[sig].returns[i]; + debug_assert!( + abi.location.is_reg(), + "expected reg; got {:?}", + abi.location + ); + if lv.affinity.is_stack() { + let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type); + self.liveness + .create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa)); + self.insert_spill(block, lv.value, reg); + } + } + } + } + + // Reload the current candidates for the given `inst`. + fn reload_inst_candidates(&mut self, block: Block, inst: Inst) { + // Insert fill instructions before `inst` and replace `cand.value` with the filled value. + for cand in self.candidates.iter_mut() { + if let Some(reload) = self.reloads.get(cand.value) { + cand.value = reload.reg; + continue; + } + + let reg = self.cur.ins().fill(cand.value); + let fill = self.cur.built_inst(); + + self.reloads.insert(ReloadedValue { + stack: cand.value, + reg, + }); + cand.value = reg; + + // Create a live range for the new reload. + let affinity = Affinity::Reg(cand.regclass.into()); + self.liveness.create_dead(reg, fill, affinity); + self.liveness + .extend_locally(reg, block, inst, &self.cur.func.layout); + } + + // Rewrite instruction arguments. + // + // Only rewrite those arguments that were identified as candidates. This leaves block + // arguments on branches as-is without rewriting them. A spilled block argument needs to stay + // spilled because the matching block parameter is going to be in the same virtual register + // and therefore the same stack slot as the block argument value. + if !self.candidates.is_empty() { + let args = self.cur.func.dfg.inst_args_mut(inst); + while let Some(cand) = self.candidates.pop() { + args[cand.argidx] = cand.value; + } + } + } + + // Reload the current candidates for the given copy `inst`. + // + // As an optimization, replace a copy instruction where the argument has been spilled with + // a fill instruction. + fn reload_copy_candidates(&mut self, inst: Inst) { + // Copy instructions can only have one argument. + debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1); + + if let Some(cand) = self.candidates.pop() { + self.cur.func.dfg.replace(inst).fill(cand.value); + let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); + debug_assert!(ok); + } + } + + // Find reload candidates for `inst` and add them to `self.candidates`. + // + // These are uses of spilled values where the operand constraint requires a register. + fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) { + let args = self.cur.func.dfg.inst_args(inst); + + if let Some(constraints) = constraints { + for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { + if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() { + self.candidates.push(ReloadCandidate { + argidx, + value: arg, + regclass: op.regclass, + }) + } + } + } + + // If we only have the fixed arguments, we're done now. + let offset = self.cur.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + if args.len() == offset { + return; + } + let var_args = &args[offset..]; + + // Handle ABI arguments. + if let Some(sig) = self.cur.func.dfg.call_signature(inst) { + handle_abi_args( + self.candidates, + &self.cur.func.dfg.signatures[sig].params, + var_args, + offset, + self.cur.isa, + self.liveness, + ); + } else if self.cur.func.dfg[inst].opcode().is_return() { + handle_abi_args( + self.candidates, + &self.cur.func.signature.returns, + var_args, + offset, + self.cur.isa, + self.liveness, + ); + } + } + + /// Insert a spill at `pos` and update data structures. + /// + /// - Insert `stack = spill reg` at `pos`, and assign an encoding. + /// - Move the `stack` live range starting point to the new instruction. + /// - Extend the `reg` live range to reach the new instruction. + fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) { + self.cur.ins().with_result(stack).spill(reg); + let inst = self.cur.built_inst(); + + // Update live ranges. + self.liveness.move_def_locally(stack, inst); + self.liveness + .extend_locally(reg, block, inst, &self.cur.func.layout); + } +} + +/// Find reload candidates in the instruction's ABI variable arguments. This handles both +/// return values and call arguments. +fn handle_abi_args( + candidates: &mut Vec, + abi_types: &[AbiParam], + var_args: &[Value], + offset: usize, + isa: &dyn TargetIsa, + liveness: &Liveness, +) { + debug_assert_eq!(abi_types.len(), var_args.len()); + for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) { + if abi.location.is_reg() { + let lv = liveness.get(arg).expect("Missing live range for ABI arg"); + if lv.affinity.is_stack() { + candidates.push(ReloadCandidate { + argidx, + value: arg, + regclass: isa.regclass_for_abi_type(abi.value_type), + }); + } + } + } +} diff --git a/cranelift/codegen/src/regalloc/safepoint.rs b/cranelift/codegen/src/regalloc/safepoint.rs new file mode 100644 index 0000000000..128900d360 --- /dev/null +++ b/cranelift/codegen/src/regalloc/safepoint.rs @@ -0,0 +1,72 @@ +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::ir::{Function, InstBuilder, InstructionData, Opcode, TrapCode}; +use crate::isa::TargetIsa; +use crate::regalloc::live_value_tracker::LiveValueTracker; +use crate::regalloc::liveness::Liveness; +use alloc::vec::Vec; + +fn insert_and_encode_safepoint<'f>( + pos: &mut FuncCursor<'f>, + tracker: &LiveValueTracker, + isa: &dyn TargetIsa, +) { + // Iterate through all live values, collect only the references. + let live_ref_values = tracker + .live() + .iter() + .filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref()) + .map(|live_val| live_val.value) + .collect::>(); + + if !live_ref_values.is_empty() { + pos.ins().safepoint(&live_ref_values); + // Move cursor to the new safepoint instruction to encode it. + if let Some(inst) = pos.prev_inst() { + let ok = pos.func.update_encoding(inst, isa).is_ok(); + debug_assert!(ok); + } + // Restore cursor position. + pos.next_inst(); + } +} + +// The emit_stackmaps() function analyzes each instruction to retrieve the liveness of +// the defs and operands by traversing a function's blocks in layout order. +pub fn emit_stackmaps( + func: &mut Function, + domtree: &DominatorTree, + liveness: &Liveness, + tracker: &mut LiveValueTracker, + isa: &dyn TargetIsa, +) { + let mut curr = func.layout.entry_block(); + + while let Some(block) = curr { + tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree); + tracker.drop_dead_params(); + let mut pos = FuncCursor::new(func); + + // From the top of the block, step through the instructions. + pos.goto_top(block); + + while let Some(inst) = pos.next_inst() { + if let InstructionData::Trap { + code: TrapCode::Interrupt, + .. + } = &pos.func.dfg[inst] + { + insert_and_encode_safepoint(&mut pos, tracker, isa); + } else if pos.func.dfg[inst].opcode().is_call() { + insert_and_encode_safepoint(&mut pos, tracker, isa); + } else if pos.func.dfg[inst].opcode() == Opcode::Safepoint { + panic!("safepoint instruction can only be used by the compiler!"); + } + + // Process the instruction and get rid of dead values. + tracker.process_inst(inst, &pos.func.dfg, liveness); + tracker.drop_dead(inst); + } + curr = func.layout.next_block(block); + } +} diff --git a/cranelift/codegen/src/regalloc/solver.rs b/cranelift/codegen/src/regalloc/solver.rs new file mode 100644 index 0000000000..7416ec9bc7 --- /dev/null +++ b/cranelift/codegen/src/regalloc/solver.rs @@ -0,0 +1,1396 @@ +//! Constraint solver for register coloring. +//! +//! The coloring phase of SSA-based register allocation is very simple in theory, but in practice +//! it is complicated by the various constraints imposed by individual instructions: +//! +//! - Call and return instructions have to satisfy ABI requirements for arguments and return +//! values. +//! - Values live across a call must be in a callee-saved register. +//! - Some instructions have operand constraints such as register sub-classes, fixed registers, or +//! tied operands. +//! +//! # The instruction register coloring problem +//! +//! The constraint solver addresses the problem of satisfying the constraints of a single +//! instruction. We have: +//! +//! - A set of values that are live in registers before the instruction, with current register +//! assignments. Some are used by the instruction, some are not. +//! - A subset of the live register values that are killed by the instruction. +//! - A set of new register values that are defined by the instruction. +//! +//! We are not concerned with stack values at all. The reload pass ensures that all values required +//! to be in a register by the instruction are already in a register. +//! +//! A solution to the register coloring problem consists of: +//! +//! - Register reassignment prescriptions for a subset of the live register values. +//! - Register assignments for the instruction's defined values. +//! +//! The solution ensures that when live registers are reassigned as prescribed before the +//! instruction, all its operand constraints are satisfied, and the definition assignments won't +//! conflict. +//! +//! # Register diversions and global interference +//! +//! We can divert register values temporarily to satisfy constraints, but we need to put the +//! values back into their originally assigned register locations before leaving the block. +//! Otherwise, values won't be in the right register at the entry point of other blocks. +//! +//! Some values are *local*, and we don't need to worry about putting those values back since they +//! are not used in any other blocks. +//! +//! When we assign register locations to defines, we are assigning both the register used locally +//! immediately after the instruction and the register used globally when the defined value is used +//! in a different block. We need to avoid interference both locally at the instruction and globally. +//! +//! We have multiple mappings of values to registers: +//! +//! 1. The initial local mapping before the instruction. This includes any diversions from previous +//! instructions in the block, but not diversions for the current instruction. +//! 2. The local mapping after applying the additional reassignments required to satisfy the +//! constraints of the current instruction. +//! 3. The local mapping after the instruction. This excludes values killed by the instruction and +//! includes values defined by the instruction. +//! 4. The global mapping after the instruction. This mapping only contains values with global live +//! ranges, and it does not include any diversions. +//! +//! All four mappings must be kept free of interference. +//! +//! # Problems handled by previous passes. +//! +//! The constraint solver can only reassign registers, it can't create spill code, so some +//! constraints are handled by earlier passes: +//! +//! - There will be enough free registers available for the defines. Ensuring this is the primary +//! purpose of the spilling phase. +//! - When the same value is used for multiple operands, the intersection of operand constraints is +//! non-empty. The spilling phase will insert copies to handle mutually incompatible constraints, +//! such as when the same value is bound to two different function arguments. +//! - Values bound to tied operands must be killed by the instruction. Also enforced by the +//! spiller. +//! - Values used by register operands are in registers, and values used by stack operands are in +//! stack slots. This is enforced by the reload pass. +//! +//! # Solver algorithm +//! +//! The goal of the solver is to satisfy the instruction constraints with a minimal number of +//! register assignments before the instruction. +//! +//! 1. Compute the set of values used by operands with a fixed register constraint that isn't +//! already satisfied. These are mandatory predetermined reassignments. +//! 2. Compute the set of values that don't satisfy their register class constraint. These are +//! mandatory reassignments that we need to solve. +//! 3. Add the set of defines to the set of variables computed in 2. Exclude defines tied to an +//! input operand since their value is pre-determined. +//! +//! The set of values computed in 2. and 3. are the *variables* for the solver. Given a set of +//! variables, we can also compute a set of allocatable registers by removing the variables from +//! the set of assigned registers before the instruction. +//! +//! 1. For each variable, compute its domain as the intersection of the allocatable registers and +//! its register class constraint. +//! 2. Sort the variables in order of increasing domain size. +//! 3. Search for a solution that assigns each variable a register from its domain without +//! interference between variables. +//! +//! If the search fails to find a solution, we may need to reassign more registers. Find an +//! appropriate candidate among the set of live register values, add it as a variable and start +//! over. + +use super::RegisterSet; +use crate::dbg::DisplayList; +use crate::entity::{SparseMap, SparseMapValue}; +use crate::ir::Value; +use crate::isa::{RegClass, RegUnit}; +use crate::regalloc::register_set::RegSetIter; +use alloc::vec::Vec; +use core::cmp; +use core::fmt; +use core::mem; +use core::u16; +use log::debug; + +/// A variable in the constraint problem. +/// +/// Variables represent register values that can be assigned to any register unit within the +/// constraint register class. This includes live register values that can be reassigned to a new +/// register and values defined by the instruction which must be assigned to a register. +/// +/// Besides satisfying the register class constraint, variables must also be mutually +/// non-interfering in up to three contexts: +/// +/// 1. Input side live registers, after applying all the reassignments. +/// 2. Output side live registers, considering all the local register diversions. +/// 3. Global live register, not considering any local diversions. +/// +pub struct Variable { + /// The value whose register assignment we're looking for. + pub value: Value, + + /// Original register unit holding this live value before the instruction, or `None` for a + /// value that is defined by the instruction. + from: Option, + + /// Avoid interference on the input side. + is_input: bool, + + /// Avoid interference on the output side. + is_output: bool, + + /// Avoid interference with the global registers. + is_global: bool, + + /// Number of registers available in the domain of this variable. + domain: u16, + + /// The assigned register unit after a full solution was found. + pub solution: RegUnit, + + /// Any solution must belong to the constraint register class. + constraint: RegClass, +} + +impl Variable { + fn new_live(value: Value, constraint: RegClass, from: RegUnit, is_output: bool) -> Self { + Self { + value, + constraint, + from: Some(from), + is_input: true, + is_output, + is_global: false, + domain: 0, + solution: !0, + } + } + + fn new_def(value: Value, constraint: RegClass, is_global: bool) -> Self { + Self { + value, + constraint, + from: None, + is_input: false, + is_output: true, + is_global, + domain: 0, + solution: !0, + } + } + + /// Does this variable represent a value defined by the current instruction? + pub fn is_define(&self) -> bool { + self.from.is_none() + } + + /// Get an iterator over possible register choices, given the available registers on the input + /// and output sides as well as the available global register set. + fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter { + if !self.is_output { + debug_assert!(!self.is_global, "Global implies output"); + debug_assert!(self.is_input, "Missing interference set"); + return iregs.iter(self.constraint); + } + + let mut r = oregs.clone(); + if self.is_input { + r.intersect(iregs); + } + if self.is_global { + r.intersect(gregs); + } + r.iter(self.constraint) + } +} + +impl fmt::Display for Variable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}({}", self.value, self.constraint)?; + if let Some(reg) = self.from { + write!(f, ", from {}", self.constraint.info.display_regunit(reg))?; + } + if self.is_input { + write!(f, ", in")?; + } + if self.is_output { + write!(f, ", out")?; + } + if self.is_global { + write!(f, ", global")?; + } + if self.is_define() { + write!(f, ", def")?; + } + if self.domain > 0 { + write!(f, ", {}", self.domain)?; + } + write!(f, ")") + } +} + +#[derive(Clone, Debug)] +pub struct Assignment { + pub value: Value, + pub from: RegUnit, + pub to: RegUnit, + pub rc: RegClass, +} + +impl SparseMapValue for Assignment { + fn key(&self) -> Value { + self.value + } +} + +impl fmt::Display for Assignment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let ri = self.rc.info; + write!( + f, + "{}:{}({} -> {})", + self.value, + self.rc, + ri.display_regunit(self.from), + ri.display_regunit(self.to) + ) + } +} + +/// A move operation between two registers or between a register and an emergency spill slot. +#[derive(Clone, PartialEq)] +pub enum Move { + Reg { + value: Value, + rc: RegClass, + from: RegUnit, + to: RegUnit, + }, + #[allow(dead_code)] // rustc doesn't see it isn't dead. + Spill { + value: Value, + rc: RegClass, + from: RegUnit, + to_slot: usize, + }, + Fill { + value: Value, + rc: RegClass, + from_slot: usize, + to: RegUnit, + }, +} + +impl Move { + /// Create a register move from an assignment, but not for identity assignments. + fn with_assignment(a: &Assignment) -> Option { + if a.from != a.to { + Some(Self::Reg { + value: a.value, + from: a.from, + to: a.to, + rc: a.rc, + }) + } else { + None + } + } + + /// Get the "from" register and register class, if possible. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::wrong_self_convention))] + fn from_reg(&self) -> Option<(RegClass, RegUnit)> { + match *self { + Self::Reg { rc, from, .. } | Self::Spill { rc, from, .. } => Some((rc, from)), + Self::Fill { .. } => None, + } + } + + /// Get the "to" register and register class, if possible. + fn to_reg(&self) -> Option<(RegClass, RegUnit)> { + match *self { + Self::Reg { rc, to, .. } | Self::Fill { rc, to, .. } => Some((rc, to)), + Self::Spill { .. } => None, + } + } + + /// Replace the "to" register with `new` and return the old value. + fn replace_to_reg(&mut self, new: RegUnit) -> RegUnit { + mem::replace( + match *self { + Self::Reg { ref mut to, .. } | Self::Fill { ref mut to, .. } => to, + Self::Spill { .. } => panic!("No to register in a spill {}", self), + }, + new, + ) + } + + /// Convert this `Reg` move to a spill to `slot` and return the old "to" register. + fn change_to_spill(&mut self, slot: usize) -> RegUnit { + match self.clone() { + Self::Reg { + value, + rc, + from, + to, + } => { + *self = Self::Spill { + value, + rc, + from, + to_slot: slot, + }; + to + } + _ => panic!("Expected reg move: {}", self), + } + } + + /// Get the value being moved. + fn value(&self) -> Value { + match *self { + Self::Reg { value, .. } | Self::Fill { value, .. } | Self::Spill { value, .. } => value, + } + } + + /// Get the associated register class. + fn rc(&self) -> RegClass { + match *self { + Self::Reg { rc, .. } | Self::Fill { rc, .. } | Self::Spill { rc, .. } => rc, + } + } +} + +impl fmt::Display for Move { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Self::Reg { + value, + from, + to, + rc, + } => write!( + f, + "{}:{}({} -> {})", + value, + rc, + rc.info.display_regunit(from), + rc.info.display_regunit(to) + ), + Self::Spill { + value, + from, + to_slot, + rc, + } => write!( + f, + "{}:{}({} -> slot {})", + value, + rc, + rc.info.display_regunit(from), + to_slot + ), + Self::Fill { + value, + from_slot, + to, + rc, + } => write!( + f, + "{}:{}(slot {} -> {})", + value, + rc, + from_slot, + rc.info.display_regunit(to) + ), + } + } +} + +impl fmt::Debug for Move { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let as_display: &dyn fmt::Display = self; + as_display.fmt(f) + } +} + +/// Constraint solver for register allocation around a single instruction. +/// +/// Start by programming in the instruction constraints. +/// +/// 1. Initialize the solver by calling `reset()` with the set of allocatable registers before the +/// instruction. +/// 2. Program the input side constraints: Call `reassign_in()` for all fixed register constraints, +/// and `add_var()` for any input operands whose constraints are not already satisfied. +/// 3. Check for conflicts between fixed input assignments and existing live values by calling +/// `has_fixed_input_conflicts()`. Resolve any conflicts by calling `add_var()` with the +/// conflicting values. +/// 4. Prepare for adding output side constraints by calling `inputs_done()`. +/// 5. Add any killed register values that no longer cause interference on the output side by +/// calling `add_kill()`. +/// 6. Program the output side constraints: Call `add_fixed_output()` for all fixed register +/// constraints and `add_def()` for free defines. Resolve fixed output conflicts by calling +/// `add_through_var()`. +/// +pub struct Solver { + /// Register reassignments that are required or decided as part of a full solution. + /// This includes identity assignments for values that are already in the correct fixed + /// register. + assignments: SparseMap, + + /// Variables are the values that should be reassigned as part of a solution. + /// Values with fixed register constraints are not considered variables. They are represented + /// in the `assignments` vector if necessary. + vars: Vec, + + /// Are we finished adding input-side constraints? This changes the meaning of the `regs_in` + /// and `regs_out` register sets. + inputs_done: bool, + + /// Available registers on the input side of the instruction. + /// + /// While we're adding input constraints (`!inputs_done`): + /// + /// - Live values on the input side are marked as unavailable. + /// - The 'from' registers of fixed input reassignments are marked as available as they are + /// added. + /// - Input-side variables are marked as available. + /// + /// After finishing input constraints (`inputs_done`): + /// + /// - Live values on the input side are marked as unavailable. + /// - The 'to' registers of fixed input reassignments are marked as unavailable. + /// - Input-side variables are marked as available. + /// + regs_in: RegisterSet, + + /// Available registers on the output side of the instruction / fixed input scratch space. + /// + /// While we're adding input constraints (`!inputs_done`): + /// + /// - The 'to' registers of fixed input reassignments are marked as unavailable. + /// + /// After finishing input constraints (`inputs_done`): + /// + /// - Live-through values are marked as unavailable. + /// - Fixed output assignments are marked as unavailable. + /// - Live-through variables are marked as available. + /// + regs_out: RegisterSet, + + /// List of register moves scheduled to avoid conflicts. + /// + /// This is used as working space by the `schedule_moves()` function. + moves: Vec, + + /// List of pending fill moves. This is only used during `schedule_moves()`. + fills: Vec, +} + +/// Interface for programming the constraints into the solver. +impl Solver { + /// Create a new empty solver. + pub fn new() -> Self { + Self { + assignments: SparseMap::new(), + vars: Vec::new(), + inputs_done: false, + regs_in: RegisterSet::new(), + regs_out: RegisterSet::new(), + moves: Vec::new(), + fills: Vec::new(), + } + } + + /// Clear all data structures in this coloring pass. + pub fn clear(&mut self) { + self.assignments.clear(); + self.vars.clear(); + self.inputs_done = false; + self.regs_in = RegisterSet::new(); + self.regs_out = RegisterSet::new(); + self.moves.clear(); + self.fills.clear(); + } + + /// Reset the solver state and prepare solving for a new instruction with an initial set of + /// allocatable registers. + /// + /// The `regs` set is the allocatable registers before any reassignments are applied. + pub fn reset(&mut self, regs: &RegisterSet) { + self.assignments.clear(); + self.vars.clear(); + self.inputs_done = false; + self.regs_in = regs.clone(); + // Used for tracking fixed input assignments while `!inputs_done`: + self.regs_out = RegisterSet::new(); + self.moves.clear(); + self.fills.clear(); + } + + /// Add a fixed input reassignment of `value`. + /// + /// This means that `value` must be assigned to `to` and can't become a variable. Call with + /// `from == to` to ensure that `value` is not reassigned from its existing register location. + /// + /// In either case, `to` will not be available for variables on the input side of the + /// instruction. + pub fn reassign_in(&mut self, value: Value, rc: RegClass, from: RegUnit, to: RegUnit) { + debug!( + "reassign_in({}:{}, {} -> {})", + value, + rc, + rc.info.display_regunit(from), + rc.info.display_regunit(to) + ); + debug_assert!(!self.inputs_done); + if self.regs_in.is_avail(rc, from) { + // It looks like `value` was already removed from the register set. It must have been + // added as a variable previously. A fixed constraint beats a variable, so convert it. + if let Some(idx) = self.vars.iter().position(|v| v.value == value) { + let v = self.vars.remove(idx); + debug!("-> converting variable {} to a fixed constraint", v); + // The spiller is responsible for ensuring that all constraints on the uses of a + // value are compatible. + debug_assert!( + v.constraint.contains(to), + "Incompatible constraints for {}", + value + ); + } else { + panic!("Invalid from register for fixed {} constraint", value); + } + } + self.regs_in.free(rc, from); + self.regs_out.take(rc, to); + self.assignments.insert(Assignment { + value, + rc, + from, + to, + }); + } + + /// Add a variable representing an input side value with an existing register assignment. + /// + /// A variable is a value that should be reassigned to something in the `constraint` register + /// class. + /// + /// It is assumed initially that the value is also live on the output side of the instruction. + /// This can be changed by calling to `add_kill()`. + /// + /// This function can only be used before calling `inputs_done()`. Afterwards, more input-side + /// variables can be added by calling `add_killed_var()` and `add_through_var()` + pub fn add_var(&mut self, value: Value, constraint: RegClass, from: RegUnit) { + debug!( + "add_var({}:{}, from={})", + value, + constraint, + constraint.info.display_regunit(from) + ); + debug_assert!(!self.inputs_done); + self.add_live_var(value, constraint, from, true); + } + + /// Add an extra input-side variable representing a value that is killed by the current + /// instruction. + /// + /// This function should be called after `inputs_done()` only. Use `add_var()` before. + pub fn add_killed_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { + debug!( + "add_killed_var({}:{}, from={})", + value, + rc, + rc.info.display_regunit(from) + ); + debug_assert!(self.inputs_done); + self.add_live_var(value, rc, from, false); + } + + /// Add an extra input-side variable representing a value that is live through the current + /// instruction. + /// + /// This function should be called after `inputs_done()` only. Use `add_var()` before. + pub fn add_through_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { + debug!( + "add_through_var({}:{}, from={})", + value, + rc, + rc.info.display_regunit(from) + ); + debug_assert!(self.inputs_done); + self.add_live_var(value, rc, from, true); + } + + /// Shared code for `add_var`, `add_killed_var`, and `add_through_var`. + /// + /// Add a variable that is live before the instruction, and possibly live through. Merge + /// constraints if the value has already been added as a variable or fixed assignment. + fn add_live_var(&mut self, value: Value, rc: RegClass, from: RegUnit, live_through: bool) { + // Check for existing entries for this value. + if !self.can_add_var(rc, from) { + // There could be an existing variable entry. + if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { + // We have an existing variable entry for `value`. Combine the constraints. + if let Some(rc) = v.constraint.intersect(rc) { + debug!("-> combining constraint with {} yields {}", v, rc); + v.constraint = rc; + return; + } else { + // The spiller should have made sure the same value is not used with disjoint + // constraints. + panic!("Incompatible constraints: {} + {}", rc, v) + } + } + + // No variable, then it must be a fixed reassignment. + if let Some(a) = self.assignments.get(value) { + debug!("-> already fixed assignment {}", a); + debug_assert!(rc.contains(a.to), "Incompatible constraints for {}", value); + return; + } + + debug!("{}", self); + panic!("Wrong from register for {}", value); + } + + let new_var = Variable::new_live(value, rc, from, live_through); + debug!("-> new var: {}", new_var); + + self.regs_in.free(rc, from); + if self.inputs_done && live_through { + self.regs_out.free(rc, from); + } + self.vars.push(new_var); + } + + /// Check for conflicts between fixed input assignments and existing live values. + /// + /// Returns true if one of the live values conflicts with a fixed input assignment. Such a + /// conflicting value must be turned into a variable. + pub fn has_fixed_input_conflicts(&self) -> bool { + debug_assert!(!self.inputs_done); + // The `from` side of the fixed input diversions are taken from `regs_out`. + self.regs_out.interferes_with(&self.regs_in) + } + + /// Check if `rc, reg` specifically conflicts with the fixed input assignments. + pub fn is_fixed_input_conflict(&self, rc: RegClass, reg: RegUnit) -> bool { + debug_assert!(!self.inputs_done); + !self.regs_out.is_avail(rc, reg) + } + + /// Finish adding input side constraints. + /// + /// Call this method to indicate that there will be no more fixed input reassignments added + /// and prepare for the output side constraints. + pub fn inputs_done(&mut self) { + debug_assert!(!self.has_fixed_input_conflicts()); + + // At this point, `regs_out` contains the `to` side of the input reassignments, and the + // `from` side has already been marked as available in `regs_in`. + // + // Remove the `to` assignments from `regs_in` so it now indicates the registers available + // to variables at the input side. + self.regs_in.intersect(&self.regs_out); + + // The meaning of `regs_out` now changes completely to indicate the registers available to + // variables on the output side. + // The initial mask will be modified by `add_kill()` and `add_fixed_output()`. + self.regs_out = self.regs_in.clone(); + + // Now we can't add more fixed input assignments, but `add_var()` is still allowed. + self.inputs_done = true; + } + + /// Record that an input register value is killed by the instruction. + /// + /// Even if a fixed reassignment has been added for the value, the `reg` argument should be the + /// original location before the reassignments. + /// + /// This means that the register is available on the output side. + pub fn add_kill(&mut self, value: Value, rc: RegClass, reg: RegUnit) { + debug_assert!(self.inputs_done); + + // If a fixed assignment is killed, the `to` register becomes available on the output side. + if let Some(a) = self.assignments.get(value) { + debug_assert_eq!(a.from, reg); + self.regs_out.free(a.rc, a.to); + return; + } + + // It's also possible that a variable is killed. That means it doesn't need to satisfy + // interference constraints on the output side. + // Variables representing tied operands will get their `is_output` flag set again later. + if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { + debug_assert!(v.is_input); + v.is_output = false; + return; + } + + // Alright, this is just a boring value being killed by the instruction. Just reclaim + // the assigned register. + self.regs_out.free(rc, reg); + } + + /// Record that an input register is tied to an output register. + /// + /// It is assumed that `add_kill` was called previously with the same arguments. + /// + /// The output value that must have the same register as the input value is not recorded in the + /// solver. + /// + /// If the value has already been assigned to a fixed register, return that. + pub fn add_tied_input( + &mut self, + value: Value, + rc: RegClass, + reg: RegUnit, + is_global: bool, + ) -> Option { + debug_assert!(self.inputs_done); + + // If a fixed assignment is tied, the `to` register is not available on the output side. + if let Some(a) = self.assignments.get(value) { + debug_assert_eq!(a.from, reg); + self.regs_out.take(a.rc, a.to); + return Some(a.to); + } + + // Check if a variable was created. + if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { + debug_assert!(v.is_input); + v.is_output = true; + v.is_global = is_global; + return None; + } + + // No variable exists for `value` because its constraints are already satisfied. + // However, if the tied output value has a global live range, we must create a variable to + // avoid global interference too. + if is_global { + let mut new_var = Variable::new_live(value, rc, reg, true); + new_var.is_global = true; + debug!("add_tied_input: new tied-global value: {}", new_var); + self.vars.push(new_var); + self.regs_in.free(rc, reg); + } else { + self.regs_out.take(rc, reg); + } + + None + } + + /// Add a fixed output assignment. + /// + /// This means that `to` will not be available for variables on the output side of the + /// instruction. + /// + /// Returns `false` if a live value conflicts with `to`, so it couldn't be added. Find the + /// conflicting live-through value and turn it into a variable before calling this method + /// again. + #[allow(dead_code)] + pub fn add_fixed_output(&mut self, rc: RegClass, reg: RegUnit) -> bool { + debug_assert!(self.inputs_done); + if self.regs_out.is_avail(rc, reg) { + self.regs_out.take(rc, reg); + true + } else { + false + } + } + + /// Add a defined output value. + /// + /// This is similar to `add_var`, except the value doesn't have a prior register assignment. + pub fn add_def(&mut self, value: Value, constraint: RegClass, is_global: bool) { + debug_assert!(self.inputs_done); + self.vars + .push(Variable::new_def(value, constraint, is_global)); + } + + /// Clear the `is_global` flag on all solver variables. + /// + /// This is used when there are not enough global registers available, and global defines have + /// to be replaced with local defines followed by a copy. + pub fn clear_all_global_flags(&mut self) { + for v in &mut self.vars { + v.is_global = false; + } + } +} + +/// Error reported when the solver fails to find a solution with the current constraints. +/// +/// When no solution can be found, the error indicates how constraints could be loosened to help. +pub enum SolverError { + /// There are not available registers in the given register class. + /// + /// This should be resolved by turning live-through values into variables so they can be moved + /// out of the way. + Divert(RegClass), + + /// There are insufficient available registers in the global set to assign an `is_global` + /// variable with the given value. + /// + /// This should be resolved by converting the variable to a local one. + Global(Value), +} + +/// Interface for searching for a solution. +impl Solver { + /// Try a quick-and-dirty solution. + /// + /// This is expected to succeed for most instructions since the constraint problem is almost + /// always trivial. + /// + /// Returns `Ok(regs)` if a solution was found. + pub fn quick_solve( + &mut self, + global_regs: &RegisterSet, + is_reload: bool, + ) -> Result { + self.find_solution(global_regs, is_reload) + } + + /// Try harder to find a solution. + /// + /// Call this method after `quick_solve()` fails. + /// + /// This may return an error with a register class that has run out of registers. If registers + /// can be freed up in the starving class, this method can be called again after adding + /// variables for the freed registers. + pub fn real_solve( + &mut self, + global_regs: &RegisterSet, + is_reload: bool, + ) -> Result { + // Compute domain sizes for all the variables given the current register sets. + for v in &mut self.vars { + let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len(); + v.domain = cmp::min(d, u16::MAX as usize) as u16; + } + + // Solve for vars with small domains first to increase the chance of finding a solution. + // + // Also consider this case: + // + // v0: out, global + // v1: in + // v2: in+out + // + // If only %r0 and %r1 are available, the global constraint may cause us to assign: + // + // v0 -> %r1 + // v1 -> %r0 + // v2 -> ! + // + // Usually in+out variables will have a smaller domain, but in the above case the domain + // size is the same, so we also prioritize in+out variables. + // + // Include the reversed previous solution for this variable partly as a stable tie breaker, + // partly to shake things up on a second attempt. + // + // Use the `from` register and value number as a tie breaker to get a stable sort. + self.vars.sort_unstable_by_key(|v| { + ( + v.domain, + !(v.is_input && v.is_output), + !v.solution, + v.from.unwrap_or(0), + v.value, + ) + }); + + debug!("real_solve for {}", self); + self.find_solution(global_regs, is_reload) + } + + /// Search for a solution with the current list of variables. + /// + /// If a solution was found, returns `Ok(regs)` with the set of available registers on the + /// output side after the solution. If no solution could be found, returns `Err(rc)` with the + /// constraint register class that needs more available registers. + fn find_solution( + &mut self, + global_regs: &RegisterSet, + is_reload: bool, + ) -> Result { + // Available registers on the input and output sides respectively. + let mut iregs = self.regs_in.clone(); + let mut oregs = self.regs_out.clone(); + let mut gregs = global_regs.clone(); + + for v in &mut self.vars { + let rc = v.constraint; + + // Decide which register to assign. In order to try and keep registers holding + // reloaded values separate from all other registers to the extent possible, we choose + // the first available register in the normal case, but the last available one in the + // case of a reload. See "A side note on register choice heuristics" in + // src/redundant_reload_remover.rs for further details. + let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs); + let maybe_reg = if is_reload { + reg_set_iter.rnext() + } else { + reg_set_iter.next() + }; + + let reg = match maybe_reg { + Some(reg) => reg, + None => { + // If `v` must avoid global interference, there is not point in requesting + // live registers be diverted. We need to make it a non-global value. + if v.is_global && gregs.iter(rc).next().is_none() { + return Err(SolverError::Global(v.value)); + } + return Err(SolverError::Divert(rc)); + } + }; + + v.solution = reg; + if v.is_input { + iregs.take(rc, reg); + } + if v.is_output { + oregs.take(rc, reg); + } + if v.is_global { + gregs.take(rc, reg); + } + } + + Ok(oregs) + } + + /// Get all the variables. + pub fn vars(&self) -> &[Variable] { + &self.vars + } + + /// Check if `value` can be added as a variable to help find a solution. + pub fn can_add_var(&mut self, constraint: RegClass, from: RegUnit) -> bool { + !self.regs_in.is_avail(constraint, from) + && !self.vars.iter().any(|var| var.from == Some(from)) + } +} + +/// Interface for working with parallel copies once a solution has been found. +impl Solver { + /// Collect all the register moves we need to execute. + fn collect_moves(&mut self) { + self.moves.clear(); + + // Collect moves from the chosen solution for all non-define variables. + for v in &self.vars { + if let Some(from) = v.from { + // Omit variable solutions that don't require the value to be moved. + if from != v.solution { + self.moves.push(Move::Reg { + value: v.value, + from, + to: v.solution, + rc: v.constraint, + }); + } + } + } + + // Convert all of the fixed register assignments into moves, but omit the ones that are + // already in the right register. + self.moves + .extend(self.assignments.values().filter_map(Move::with_assignment)); + + if !self.moves.is_empty() { + debug!("collect_moves: {}", DisplayList(&self.moves)); + } + } + + /// Try to schedule a sequence of `regmove` instructions that will shuffle registers into + /// place. + /// + /// This may require the use of additional available registers, and it can fail if no + /// additional registers are available. + /// + /// TODO: Handle failure by generating a sequence of register swaps, or by temporarily spilling + /// a register. + /// + /// Returns the number of spills that had to be emitted. + pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize { + self.collect_moves(); + debug_assert!(self.fills.is_empty()); + + let mut num_spill_slots = 0; + let mut avail = regs.clone(); + let mut i = 0; + while i < self.moves.len() + self.fills.len() { + // Don't even look at the fills until we've spent all the moves. Deferring these lets + // us potentially reuse the claimed registers to resolve multiple cycles. + if i >= self.moves.len() { + self.moves.append(&mut self.fills); + } + + // Find the first move that can be executed now. + if let Some(j) = self.moves[i..].iter().position(|m| match m.to_reg() { + Some((rc, reg)) => avail.is_avail(rc, reg), + None => true, + }) { + // This move can be executed now. + self.moves.swap(i, i + j); + let m = &self.moves[i]; + if let Some((rc, reg)) = m.to_reg() { + avail.take(rc, reg); + } + if let Some((rc, reg)) = m.from_reg() { + avail.free(rc, reg); + } + debug!("move #{}: {}", i, m); + i += 1; + continue; + } + + // When we get here, none of the `moves[i..]` can be executed. This means there are + // only cycles remaining. The cycles can be broken in a few ways: + // + // 1. Grab an available register and use it to break a cycle. + // 2. Move a value temporarily into a stack slot instead of a register. + // 3. Use swap instructions. + // + // TODO: So far we only implement 1 and 2. + + // Pick an assignment with the largest possible width. This is more likely to break up + // a cycle than an assignment with fewer register units. For example, it may be + // necessary to move two arm32 S-registers out of the way before a D-register can move + // into place. + // + // We use `min_by_key` and `!` instead of `max_by_key` because it preserves the + // existing order of moves with the same width. + let j = self.moves[i..] + .iter() + .enumerate() + .min_by_key(|&(_, m)| !m.rc().width) + .unwrap() + .0; + self.moves.swap(i, i + j); + + // Check the top-level register class for an available register. It is an axiom of the + // register allocator that we can move between all registers in the top-level RC. + let m = self.moves[i].clone(); + let toprc = m.rc().toprc(); + if let Some(reg) = avail.iter(toprc).next() { + debug!( + "breaking cycle at {} with available {} register {}", + m, + toprc, + toprc.info.display_regunit(reg) + ); + + // Alter the move so it is guaranteed to be picked up when we loop. It is important + // that this move is scheduled immediately, otherwise we would have multiple moves + // of the same value, and they would not be commutable. + let old_to_reg = self.moves[i].replace_to_reg(reg); + // Append a fixup move so we end up in the right place. This move will be scheduled + // later. That's ok because it is the single remaining move of `m.value` after the + // next iteration. + self.moves.push(Move::Reg { + value: m.value(), + rc: toprc, + from: reg, + to: old_to_reg, + }); + // TODO: What if allocating an extra register is not enough to break a cycle? This + // can happen when there are registers of different widths in a cycle. For ARM, we + // may have to move two S-registers out of the way before we can resolve a cycle + // involving a D-register. + continue; + } + + // It was impossible to free up a register in toprc, so use an emergency spill slot as + // a last resort. + let slot = num_spill_slots; + num_spill_slots += 1; + debug!("breaking cycle at {} with slot {}", m, slot); + let old_to_reg = self.moves[i].change_to_spill(slot); + self.fills.push(Move::Fill { + value: m.value(), + rc: toprc, + from_slot: slot, + to: old_to_reg, + }); + } + + num_spill_slots + } + + /// Borrow the scheduled set of register moves that was computed by `schedule_moves()`. + pub fn moves(&self) -> &[Move] { + &self.moves + } +} + +impl fmt::Display for Solver { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let reginfo = self.vars.first().map(|v| v.constraint.info); + writeln!(f, "Solver {{ inputs_done: {},", self.inputs_done)?; + writeln!(f, " in: {}", self.regs_in.display(reginfo))?; + writeln!(f, " out: {}", self.regs_out.display(reginfo))?; + writeln!( + f, + " assignments: {}", + DisplayList(self.assignments.as_slice()) + )?; + writeln!(f, " vars: {}", DisplayList(&self.vars))?; + writeln!(f, " moves: {}", DisplayList(&self.moves))?; + writeln!(f, "}}") + } +} + +#[cfg(test)] +#[cfg(feature = "arm32")] +mod tests { + use super::{Move, Solver}; + use crate::entity::EntityRef; + use crate::ir::Value; + use crate::isa::{RegClass, RegInfo, RegUnit, TargetIsa}; + use crate::regalloc::RegisterSet; + use alloc::boxed::Box; + use core::str::FromStr; + use target_lexicon::triple; + + // Make an arm32 `TargetIsa`, if possible. + fn arm32() -> Option> { + use crate::isa; + use crate::settings; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + isa::lookup(triple!("arm")) + .ok() + .map(|b| b.finish(shared_flags)) + } + + // Get a register class by name. + fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { + reginfo + .classes + .iter() + .find(|rc| rc.name == name) + .expect("Can't find named register class.") + } + + // Construct a register move. + fn mov(value: Value, rc: RegClass, from: RegUnit, to: RegUnit) -> Move { + Move::Reg { + value, + rc, + from, + to, + } + } + + fn spill(value: Value, rc: RegClass, from: RegUnit, to_slot: usize) -> Move { + Move::Spill { + value, + rc, + from, + to_slot, + } + } + + fn fill(value: Value, rc: RegClass, from_slot: usize, to: RegUnit) -> Move { + Move::Fill { + value, + rc, + from_slot, + to, + } + } + + #[test] + fn simple_moves() { + let isa = arm32().expect("This test requires arm32 support"); + let reginfo = isa.register_info(); + let gpr = rc_by_name(®info, "GPR"); + let r0 = gpr.unit(0); + let r1 = gpr.unit(1); + let r2 = gpr.unit(2); + let gregs = RegisterSet::new(); + let mut regs = RegisterSet::new(); + let mut solver = Solver::new(); + let v10 = Value::new(10); + let v11 = Value::new(11); + + // As simple as it gets: Value is in r1, we want r0. + regs.take(gpr, r1); + solver.reset(®s); + solver.reassign_in(v10, gpr, r1, r0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]); + + // A bit harder: r0, r1 need to go in r1, r2. + regs.take(gpr, r0); + solver.reset(®s); + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r2); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[mov(v11, gpr, r1, r2), mov(v10, gpr, r0, r1)] + ); + + // Swap r0 and r1 in three moves using r2 as a scratch. + solver.reset(®s); + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[ + mov(v10, gpr, r0, r2), + mov(v11, gpr, r1, r0), + mov(v10, gpr, r2, r1), + ] + ); + } + + #[test] + fn harder_move_cycles() { + let isa = arm32().expect("This test requires arm32 support"); + let reginfo = isa.register_info(); + let s = rc_by_name(®info, "S"); + let d = rc_by_name(®info, "D"); + let d0 = d.unit(0); + let d1 = d.unit(1); + let d2 = d.unit(2); + let s0 = s.unit(0); + let s1 = s.unit(1); + let s2 = s.unit(2); + let s3 = s.unit(3); + let gregs = RegisterSet::new(); + let mut regs = RegisterSet::new(); + let mut solver = Solver::new(); + let v10 = Value::new(10); + let v11 = Value::new(11); + let v12 = Value::new(12); + + // Not a simple cycle: Swap d0 <-> (s2, s3) + regs.take(d, d0); + regs.take(d, d1); + solver.reset(®s); + solver.reassign_in(v10, d, d0, d1); + solver.reassign_in(v11, s, s2, s0); + solver.reassign_in(v12, s, s3, s1); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[ + mov(v10, d, d0, d2), + mov(v11, s, s2, s0), + mov(v12, s, s3, s1), + mov(v10, d, d2, d1), + ] + ); + + // Same problem in the other direction: Swap (s0, s1) <-> d1. + // + // If we divert the moves in order, we will need to allocate *two* temporary S registers. A + // trivial algorithm might assume that allocating a single temp is enough. + solver.reset(®s); + solver.reassign_in(v11, s, s0, s2); + solver.reassign_in(v12, s, s1, s3); + solver.reassign_in(v10, d, d1, d0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 0); + assert_eq!( + solver.moves(), + &[ + mov(v10, d, d1, d2), + mov(v12, s, s1, s3), + mov(v11, s, s0, s2), + mov(v10, d, d2, d0), + ] + ); + } + + #[test] + fn emergency_spill() { + let isa = arm32().expect("This test requires arm32 support"); + let reginfo = isa.register_info(); + let gpr = rc_by_name(®info, "GPR"); + let r0 = gpr.unit(0); + let r1 = gpr.unit(1); + let r2 = gpr.unit(2); + let r3 = gpr.unit(3); + let r4 = gpr.unit(4); + let r5 = gpr.unit(5); + let gregs = RegisterSet::new(); + let mut regs = RegisterSet::new(); + let mut solver = Solver::new(); + let v10 = Value::new(10); + let v11 = Value::new(11); + let v12 = Value::new(12); + let v13 = Value::new(13); + let v14 = Value::new(14); + let v15 = Value::new(15); + + // Claim r0--r2 and r3--r15 for other values. + for i in 0..16 { + regs.take(gpr, gpr.unit(i)); + } + + // Request a permutation cycle. + solver.reset(®s); + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r2); + solver.reassign_in(v12, gpr, r2, r0); + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + assert_eq!(solver.schedule_moves(®s), 1); + assert_eq!( + solver.moves(), + &[ + spill(v10, gpr, r0, 0), + mov(v12, gpr, r2, r0), + mov(v11, gpr, r1, r2), + fill(v10, gpr, 0, r1), + ] + ); + + // Two cycles should only require a single spill. + solver.reset(®s); + // Cycle 1. + solver.reassign_in(v10, gpr, r0, r1); + solver.reassign_in(v11, gpr, r1, r2); + solver.reassign_in(v12, gpr, r2, r0); + // Cycle 2. + solver.reassign_in(v13, gpr, r3, r4); + solver.reassign_in(v14, gpr, r4, r5); + solver.reassign_in(v15, gpr, r5, r3); + + solver.inputs_done(); + assert!(solver.quick_solve(&gregs, false).is_ok()); + // We resolve two cycles with one spill. + assert_eq!(solver.schedule_moves(®s), 1); + assert_eq!( + solver.moves(), + &[ + spill(v10, gpr, r0, 0), + mov(v12, gpr, r2, r0), + mov(v11, gpr, r1, r2), + mov(v13, gpr, r3, r1), // Use available r1 to break cycle 2. + mov(v15, gpr, r5, r3), + mov(v14, gpr, r4, r5), + mov(v13, gpr, r1, r4), + fill(v10, gpr, 0, r1), // Finally complete cycle 1. + ] + ); + } +} diff --git a/cranelift/codegen/src/regalloc/spilling.rs b/cranelift/codegen/src/regalloc/spilling.rs new file mode 100644 index 0000000000..e515543260 --- /dev/null +++ b/cranelift/codegen/src/regalloc/spilling.rs @@ -0,0 +1,640 @@ +//! Spilling pass. +//! +//! The spilling pass is the first to run after the liveness analysis. Its primary function is to +//! ensure that the register pressure never exceeds the number of available registers by moving +//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's +//! live range. +//! +//! Some instruction operand constraints may require additional registers to resolve. Since this +//! can cause spilling, the spilling pass is also responsible for resolving those constraints by +//! inserting copies. The extra constraints are: +//! +//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by +//! inserting a copy to a temporary value when necessary. +//! 2. When the same value is used more than once by an instruction, the operand constraints must +//! be compatible. Otherwise, the value must be copied into a new register for some of the +//! operands. + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc}; +use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit}; +use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa}; +use crate::regalloc::affinity::Affinity; +use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::pressure::Pressure; +use crate::regalloc::virtregs::VirtRegs; +use crate::timing; +use crate::topo_order::TopoOrder; +use alloc::vec::Vec; +use core::fmt; +use log::debug; + +/// Return a top-level register class which contains `unit`. +fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass { + let bank = reginfo.bank_containing_regunit(unit).unwrap(); + reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)] + .iter() + .find(|&rc| rc.contains(unit)) + .expect("reg unit should be in a toprc") +} + +/// Persistent data structures for the spilling pass. +pub struct Spilling { + spills: Vec, + reg_uses: Vec, +} + +/// Context data structure that gets instantiated once per pass. +struct Context<'a> { + // Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + // Cached ISA information. + reginfo: RegInfo, + encinfo: EncInfo, + + // References to contextual data structures we need. + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + virtregs: &'a VirtRegs, + topo: &'a mut TopoOrder, + + // Current register pressure. + pressure: Pressure, + + // Values spilled for the current instruction. These values have already been removed from the + // pressure tracker, but they are still present in the live value tracker and their affinity + // hasn't been changed yet. + spills: &'a mut Vec, + + // Uses of register values in the current instruction. + reg_uses: &'a mut Vec, +} + +impl Spilling { + /// Create a new spilling data structure. + pub fn new() -> Self { + Self { + spills: Vec::new(), + reg_uses: Vec::new(), + } + } + + /// Clear all data structures in this spilling pass. + pub fn clear(&mut self) { + self.spills.clear(); + self.reg_uses.clear(); + } + + /// Run the spilling algorithm over `func`. + pub fn run( + &mut self, + isa: &dyn TargetIsa, + func: &mut Function, + domtree: &DominatorTree, + liveness: &mut Liveness, + virtregs: &VirtRegs, + topo: &mut TopoOrder, + tracker: &mut LiveValueTracker, + ) { + let _tt = timing::ra_spilling(); + debug!("Spilling for:\n{}", func.display(isa)); + let reginfo = isa.register_info(); + let usable_regs = isa.allocatable_registers(func); + let mut ctx = Context { + cur: EncCursor::new(func, isa), + reginfo: isa.register_info(), + encinfo: isa.encoding_info(), + domtree, + liveness, + virtregs, + topo, + pressure: Pressure::new(®info, &usable_regs), + spills: &mut self.spills, + reg_uses: &mut self.reg_uses, + }; + ctx.run(tracker) + } +} + +impl<'a> Context<'a> { + fn run(&mut self, tracker: &mut LiveValueTracker) { + self.topo.reset(self.cur.func.layout.blocks()); + while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { + self.visit_block(block, tracker); + } + } + + fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { + debug!("Spilling {}:", block); + self.cur.goto_top(block); + self.visit_block_header(block, tracker); + tracker.drop_dead_params(); + self.process_spills(tracker); + + while let Some(inst) = self.cur.next_inst() { + if !self.cur.func.dfg[inst].opcode().is_ghost() { + self.visit_inst(inst, block, tracker); + } else { + let (_throughs, kills) = tracker.process_ghost(inst); + self.free_regs(kills); + } + tracker.drop_dead(inst); + self.process_spills(tracker); + } + } + + // Take all live registers in `regs` from the pressure set. + // This doesn't cause any spilling, it is assumed there are enough registers. + fn take_live_regs(&mut self, regs: &[LiveValue]) { + for lv in regs { + if !lv.is_dead { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + self.pressure.take(rc); + } + } + } + } + + // Free all registers in `kills` from the pressure set. + fn free_regs(&mut self, kills: &[LiveValue]) { + for lv in kills { + if let Affinity::Reg(rci) = lv.affinity { + if !self.spills.contains(&lv.value) { + let rc = self.reginfo.rc(rci); + self.pressure.free(rc); + } + } + } + } + + // Free all dead registers in `regs` from the pressure set. + fn free_dead_regs(&mut self, regs: &[LiveValue]) { + for lv in regs { + if lv.is_dead { + if let Affinity::Reg(rci) = lv.affinity { + if !self.spills.contains(&lv.value) { + let rc = self.reginfo.rc(rci); + self.pressure.free(rc); + } + } + } + } + } + + fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { + let (liveins, params) = tracker.block_top( + block, + &self.cur.func.dfg, + self.liveness, + &self.cur.func.layout, + self.domtree, + ); + + // Count the live-in registers. These should already fit in registers; they did at the + // dominator. + self.pressure.reset(); + self.take_live_regs(liveins); + + // An block can have an arbitrary (up to 2^16...) number of parameters, so they are not + // guaranteed to fit in registers. + for lv in params { + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + 'try_take: while let Err(mask) = self.pressure.take_transient(rc) { + debug!("Need {} reg for block param {}", rc, lv.value); + match self.spill_candidate(mask, liveins) { + Some(cand) => { + debug!( + "Spilling live-in {} to make room for {} block param {}", + cand, rc, lv.value + ); + self.spill_reg(cand); + } + None => { + // We can't spill any of the live-in registers, so we have to spill an + // block argument. Since the current spill metric would consider all the + // block arguments equal, just spill the present register. + debug!("Spilling {} block argument {}", rc, lv.value); + + // Since `spill_reg` will free a register, add the current one here. + self.pressure.take(rc); + self.spill_reg(lv.value); + break 'try_take; + } + } + } + } + } + + // The transient pressure counts for the block arguments are accurate. Just preserve them. + self.pressure.preserve_transient(); + self.free_dead_regs(params); + } + + fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) { + debug!("Inst {}, {}", self.cur.display_inst(inst), self.pressure); + debug_assert_eq!(self.cur.current_inst(), Some(inst)); + debug_assert_eq!(self.cur.current_block(), Some(block)); + + let constraints = self + .encinfo + .operand_constraints(self.cur.func.encodings[inst]); + + // We may need to resolve register constraints if there are any noteworthy uses. + debug_assert!(self.reg_uses.is_empty()); + self.collect_reg_uses(inst, block, constraints); + + // Calls usually have fixed register uses. + let call_sig = self.cur.func.dfg.call_signature(inst); + if let Some(sig) = call_sig { + self.collect_abi_reg_uses(inst, sig); + } + + if !self.reg_uses.is_empty() { + self.process_reg_uses(inst, tracker); + } + + // Update the live value tracker with this instruction. + let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); + + // Remove kills from the pressure tracker. + self.free_regs(kills); + + // If inst is a call, spill all register values that are live across the call. + // This means that we don't currently take advantage of callee-saved registers. + // TODO: Be more sophisticated. + let opcode = self.cur.func.dfg[inst].opcode(); + if call_sig.is_some() + || opcode == crate::ir::Opcode::X86ElfTlsGetAddr + || opcode == crate::ir::Opcode::X86MachoTlsGetAddr + { + for lv in throughs { + if lv.affinity.is_reg() && !self.spills.contains(&lv.value) { + self.spill_reg(lv.value); + } + } + } + + // Make sure we have enough registers for the register defs. + // Dead defs are included here. They need a register too. + // No need to process call return values, they are in fixed registers. + if let Some(constraints) = constraints { + for op in constraints.outs { + if op.kind != ConstraintKind::Stack { + // Add register def to pressure, spill if needed. + while let Err(mask) = self.pressure.take_transient(op.regclass) { + debug!("Need {} reg from {} throughs", op.regclass, throughs.len()); + match self.spill_candidate(mask, throughs) { + Some(cand) => self.spill_reg(cand), + None => panic!( + "Ran out of {} registers for {}", + op.regclass, + self.cur.display_inst(inst) + ), + } + } + } + } + self.pressure.reset_transient(); + } + + // Restore pressure state, compute pressure with affinities from `defs`. + // Exclude dead defs. Includes call return values. + // This won't cause spilling. + self.take_live_regs(defs); + } + + // Collect register uses that are noteworthy in one of the following ways: + // + // 1. It's a fixed register constraint. + // 2. It's a use of a spilled value. + // 3. It's a tied register constraint and the value isn't killed. + // + // We are assuming here that if a value is used both by a fixed register operand and a register + // class operand, they two are compatible. We are also assuming that two register class + // operands are always compatible. + fn collect_reg_uses( + &mut self, + inst: Inst, + block: Block, + constraints: Option<&RecipeConstraints>, + ) { + let args = self.cur.func.dfg.inst_args(inst); + let num_fixed_ins = if let Some(constraints) = constraints { + for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { + let mut reguse = RegUse::new(arg, idx, op.regclass.into()); + let lr = &self.liveness[arg]; + match op.kind { + ConstraintKind::Stack => continue, + ConstraintKind::FixedReg(_) => reguse.fixed = true, + ConstraintKind::Tied(_) => { + // A tied operand must kill the used value. + reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); + } + ConstraintKind::FixedTied(_) => { + reguse.fixed = true; + reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); + } + ConstraintKind::Reg => {} + } + if lr.affinity.is_stack() { + reguse.spilled = true; + } + + // Only collect the interesting register uses. + if reguse.fixed || reguse.tied || reguse.spilled { + debug!(" reguse: {}", reguse); + self.reg_uses.push(reguse); + } + } + constraints.ins.len() + } else { + // A non-ghost instruction with no constraints can't have any + // fixed operands. + 0 + }; + + // Similarly, for return instructions, collect uses of ABI-defined + // return values. + if self.cur.func.dfg[inst].opcode().is_return() { + debug_assert_eq!( + self.cur.func.dfg.inst_variable_args(inst).len(), + self.cur.func.signature.returns.len(), + "The non-fixed arguments in a return should follow the function's signature." + ); + for (ret_idx, (ret, &arg)) in + self.cur.func.signature.returns.iter().zip(args).enumerate() + { + let idx = num_fixed_ins + ret_idx; + let unit = match ret.location { + ArgumentLoc::Unassigned => { + panic!("function return signature should be legalized") + } + ArgumentLoc::Reg(unit) => unit, + ArgumentLoc::Stack(_) => continue, + }; + let toprc = toprc_containing_regunit(unit, &self.reginfo); + let mut reguse = RegUse::new(arg, idx, toprc.into()); + reguse.fixed = true; + + debug!(" reguse: {}", reguse); + self.reg_uses.push(reguse); + } + } + } + + // Collect register uses from the ABI input constraints. + fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) { + let num_fixed_args = self.cur.func.dfg[inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let args = self.cur.func.dfg.inst_variable_args(inst); + for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig] + .params + .iter() + .zip(args) + .enumerate() + { + if abi.location.is_reg() { + let (rci, spilled) = match self.liveness[arg].affinity { + Affinity::Reg(rci) => (rci, false), + Affinity::Stack => ( + self.cur.isa.regclass_for_abi_type(abi.value_type).into(), + true, + ), + Affinity::Unassigned => panic!("Missing affinity for {}", arg), + }; + let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci); + reguse.fixed = true; + reguse.spilled = spilled; + self.reg_uses.push(reguse); + } + } + } + + // Process multiple register uses to resolve potential conflicts. + // + // Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary. + // Trigger spilling if any of the temporaries cause the register pressure to become too high. + // + // Leave `self.reg_uses` empty. + fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) { + // We're looking for multiple uses of the same value, so start by sorting by value. The + // secondary `opidx` key makes it possible to use an unstable (non-allocating) sort. + self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx)); + + self.cur.use_srcloc(inst); + for i in 0..self.reg_uses.len() { + let ru = self.reg_uses[i]; + + // Do we need to insert a copy for this use? + let need_copy = if ru.tied { + true + } else if ru.fixed { + // This is a fixed register use which doesn't necessarily require a copy. + // Make a copy only if this is not the first use of the value. + self.reg_uses + .get(i.wrapping_sub(1)) + .map_or(false, |ru2| ru2.value == ru.value) + } else { + false + }; + + if need_copy { + let copy = self.insert_copy(ru.value, ru.rci); + self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy; + } + + // Even if we don't insert a copy, we may need to account for register pressure for the + // reload pass. + if need_copy || ru.spilled { + let rc = self.reginfo.rc(ru.rci); + while let Err(mask) = self.pressure.take_transient(rc) { + debug!("Copy of {} reg causes spill", rc); + // Spill a live register that is *not* used by the current instruction. + // Spilling a use wouldn't help. + // + // Do allow spilling of block arguments on branches. This is safe since we spill + // the whole virtual register which includes the matching block parameter value + // at the branch destination. It is also necessary since there can be + // arbitrarily many block arguments. + match { + let args = if self.cur.func.dfg[inst].opcode().is_branch() { + self.cur.func.dfg.inst_fixed_args(inst) + } else { + self.cur.func.dfg.inst_args(inst) + }; + self.spill_candidate( + mask, + tracker.live().iter().filter(|lv| !args.contains(&lv.value)), + ) + } { + Some(cand) => self.spill_reg(cand), + None => panic!( + "Ran out of {} registers when inserting copy before {}", + rc, + self.cur.display_inst(inst) + ), + } + } + } + } + self.pressure.reset_transient(); + self.reg_uses.clear() + } + + // Find a spill candidate from `candidates` whose top-level register class is in `mask`. + fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option + where + II: IntoIterator, + { + // Find the best viable spill candidate. + // + // The very simple strategy implemented here is to spill the value with the earliest def in + // the reverse post-order. This strategy depends on a good reload pass to generate good + // code. + // + // We know that all candidate defs dominate the current instruction, so one of them will + // dominate the others. That is the earliest def. + candidates + .into_iter() + .filter_map(|lv| { + // Viable candidates are registers in one of the `mask` classes, and not already in + // the spill set. + if let Affinity::Reg(rci) = lv.affinity { + let rc = self.reginfo.rc(rci); + if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) { + // Here, `lv` is a viable spill candidate. + return Some(lv.value); + } + } + None + }) + .min_by(|&a, &b| { + // Find the minimum candidate according to the RPO of their defs. + self.domtree.rpo_cmp( + self.cur.func.dfg.value_def(a), + self.cur.func.dfg.value_def(b), + &self.cur.func.layout, + ) + }) + } + + /// Spill `value` immediately by + /// + /// 1. Changing its affinity to `Stack` which marks the spill. + /// 2. Removing the value from the pressure tracker. + /// 3. Adding the value to `self.spills` for later reference by `process_spills`. + /// + /// Note that this does not update the cached affinity in the live value tracker. Call + /// `process_spills` to do that. + fn spill_reg(&mut self, value: Value) { + if let Affinity::Reg(rci) = self.liveness.spill(value) { + let rc = self.reginfo.rc(rci); + self.pressure.free(rc); + self.spills.push(value); + debug!("Spilled {}:{} -> {}", value, rc, self.pressure); + } else { + panic!("Cannot spill {} that was already on the stack", value); + } + + // Assign a spill slot for the whole virtual register. + let ss = self + .cur + .func + .stack_slots + .make_spill_slot(self.cur.func.dfg.value_type(value)); + for &v in self.virtregs.congruence_class(&value) { + self.liveness.spill(v); + self.cur.func.locations[v] = ValueLoc::Stack(ss); + } + } + + /// Process any pending spills in the `self.spills` vector. + /// + /// It is assumed that spills are removed from the pressure tracker immediately, see + /// `spill_reg` above. + /// + /// We also need to update the live range affinity and remove spilled values from the live + /// value tracker. + fn process_spills(&mut self, tracker: &mut LiveValueTracker) { + if !self.spills.is_empty() { + tracker.process_spills(|v| self.spills.contains(&v)); + self.spills.clear() + } + } + + /// Insert a `copy value` before the current instruction and give it a live range extending to + /// the current instruction. + /// + /// Returns the new local value created. + fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value { + let copy = self.cur.ins().copy(value); + let inst = self.cur.built_inst(); + + // Update live ranges. + self.liveness.create_dead(copy, inst, Affinity::Reg(rci)); + self.liveness.extend_locally( + copy, + self.cur.func.layout.pp_block(inst), + self.cur.current_inst().expect("must be at an instruction"), + &self.cur.func.layout, + ); + + copy + } +} + +/// Struct representing a register use of a value. +/// Used to detect multiple uses of the same value with incompatible register constraints. +#[derive(Clone, Copy)] +struct RegUse { + value: Value, + opidx: u16, + + // Register class required by the use. + rci: RegClassIndex, + + // A use with a fixed register constraint. + fixed: bool, + + // A register use of a spilled value. + spilled: bool, + + // A use with a tied register constraint *and* the used value is not killed. + tied: bool, +} + +impl RegUse { + fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self { + Self { + value, + opidx: idx as u16, + rci, + fixed: false, + spilled: false, + tied: false, + } + } +} + +impl fmt::Display for RegUse { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}@op{}", self.value, self.opidx)?; + if self.fixed { + write!(f, "/fixed")?; + } + if self.spilled { + write!(f, "/spilled")?; + } + if self.tied { + write!(f, "/tied")?; + } + Ok(()) + } +} diff --git a/cranelift/codegen/src/regalloc/virtregs.rs b/cranelift/codegen/src/regalloc/virtregs.rs new file mode 100644 index 0000000000..28af9e22df --- /dev/null +++ b/cranelift/codegen/src/regalloc/virtregs.rs @@ -0,0 +1,505 @@ +//! Virtual registers. +//! +//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the +//! values in a virtual register are assigned to the same location, fewer copies will result in the +//! output. +//! +//! A virtual register is typically built by merging together SSA values that are "phi-related" - +//! that is, one value is passed as an block argument to a branch and the other is the block parameter +//! value itself. +//! +//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids +//! memory-to-memory copies when a spilled value is passed as an block argument. + +use crate::dbg::DisplayList; +use crate::dominator_tree::DominatorTreePreorder; +use crate::entity::entity_impl; +use crate::entity::{EntityList, ListPool}; +use crate::entity::{Keys, PrimaryMap, SecondaryMap}; +use crate::ir::{Function, Value}; +use crate::packed_option::PackedOption; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt; +use core::slice; +use smallvec::SmallVec; + +/// A virtual register reference. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct VirtReg(u32); +entity_impl!(VirtReg, "vreg"); + +type ValueList = EntityList; + +/// Collection of virtual registers. +/// +/// Each virtual register is a list of values. Also maintain a map from values to their unique +/// virtual register, if any. +pub struct VirtRegs { + /// Memory pool for the value lists. + pool: ListPool, + + /// The primary table of virtual registers. + vregs: PrimaryMap, + + /// Allocated virtual register numbers that are no longer in use. + unused_vregs: Vec, + + /// Each value belongs to at most one virtual register. + value_vregs: SecondaryMap>, + + /// Table used during the union-find phase while `vregs` is empty. + union_find: SecondaryMap, + + /// Values that have been activated in the `union_find` table, but not yet added to any virtual + /// registers by the `finish_union_find()` function. + pending_values: Vec, +} + +impl VirtRegs { + /// Create a new virtual register collection. + pub fn new() -> Self { + Self { + pool: ListPool::new(), + vregs: PrimaryMap::new(), + unused_vregs: Vec::new(), + value_vregs: SecondaryMap::new(), + union_find: SecondaryMap::new(), + pending_values: Vec::new(), + } + } + + /// Clear all virtual registers. + pub fn clear(&mut self) { + self.vregs.clear(); + self.unused_vregs.clear(); + self.value_vregs.clear(); + self.pool.clear(); + self.union_find.clear(); + self.pending_values.clear(); + } + + /// Get the virtual register containing `value`, if any. + pub fn get(&self, value: Value) -> Option { + self.value_vregs[value].into() + } + + /// Get the list of values in `vreg`. + pub fn values(&self, vreg: VirtReg) -> &[Value] { + self.vregs[vreg].as_slice(&self.pool) + } + + /// Get an iterator over all virtual registers. + pub fn all_virtregs(&self) -> Keys { + self.vregs.keys() + } + + /// Get the congruence class of `value`. + /// + /// If `value` belongs to a virtual register, the congruence class is the values of the virtual + /// register. Otherwise it is just the value itself. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))] + pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value] + where + 'a: 'b, + { + self.get(*value) + .map_or_else(|| slice::from_ref(value), |vr| self.values(vr)) + } + + /// Check if `a` and `b` belong to the same congruence class. + pub fn same_class(&self, a: Value, b: Value) -> bool { + match (self.get(a), self.get(b)) { + (Some(va), Some(vb)) => va == vb, + _ => a == b, + } + } + + /// Sort the values in `vreg` according to the dominator tree pre-order. + /// + /// Returns the slice of sorted values which `values(vreg)` will also return from now on. + pub fn sort_values( + &mut self, + vreg: VirtReg, + func: &Function, + preorder: &DominatorTreePreorder, + ) -> &[Value] { + let s = self.vregs[vreg].as_mut_slice(&mut self.pool); + s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func)); + s + } + + /// Insert a single value into a sorted virtual register. + /// + /// It is assumed that the virtual register containing `big` is already sorted by + /// `sort_values()`, and that `single` does not already belong to a virtual register. + /// + /// If `big` is not part of a virtual register, one will be created. + pub fn insert_single( + &mut self, + big: Value, + single: Value, + func: &Function, + preorder: &DominatorTreePreorder, + ) -> VirtReg { + debug_assert_eq!(self.get(single), None, "Expected singleton {}", single); + + // Make sure `big` has a vreg. + let vreg = self.get(big).unwrap_or_else(|| { + let vr = self.alloc(); + self.vregs[vr].push(big, &mut self.pool); + self.value_vregs[big] = vr.into(); + vr + }); + + // Determine the insertion position for `single`. + let index = match self + .values(vreg) + .binary_search_by(|&v| preorder.pre_cmp_def(v, single, func)) + { + Ok(_) => panic!("{} already in {}", single, vreg), + Err(i) => i, + }; + self.vregs[vreg].insert(index, single, &mut self.pool); + self.value_vregs[single] = vreg.into(); + vreg + } + + /// Remove a virtual register. + /// + /// The values in `vreg` become singletons, and the virtual register number may be reused in + /// the future. + pub fn remove(&mut self, vreg: VirtReg) { + // Start by reassigning all the values. + for &v in self.vregs[vreg].as_slice(&self.pool) { + let old = self.value_vregs[v].take(); + debug_assert_eq!(old, Some(vreg)); + } + + self.vregs[vreg].clear(&mut self.pool); + self.unused_vregs.push(vreg); + } + + /// Allocate a new empty virtual register. + fn alloc(&mut self) -> VirtReg { + self.unused_vregs + .pop() + .unwrap_or_else(|| self.vregs.push(Default::default())) + } + + /// Unify `values` into a single virtual register. + /// + /// The values in the slice can be singletons or they can belong to a virtual register already. + /// If a value belongs to a virtual register, all of the values in that register must be + /// present. + /// + /// The values are assumed to already be in topological order. + pub fn unify(&mut self, values: &[Value]) -> VirtReg { + // Start by clearing all virtual registers involved. + let mut singletons = 0; + let mut cleared = 0; + for &val in values { + match self.get(val) { + None => singletons += 1, + Some(vreg) => { + if !self.vregs[vreg].is_empty() { + cleared += self.vregs[vreg].len(&self.pool); + self.vregs[vreg].clear(&mut self.pool); + self.unused_vregs.push(vreg); + } + } + } + } + + debug_assert_eq!( + values.len(), + singletons + cleared, + "Can't unify partial virtual registers" + ); + + let vreg = self.alloc(); + self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool); + for &v in values { + self.value_vregs[v] = vreg.into(); + } + + vreg + } +} + +impl fmt::Display for VirtRegs { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for vreg in self.all_virtregs() { + write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?; + } + Ok(()) + } +} + +/// Expanded version of a union-find table entry. +enum UFEntry { + /// This value is a a set leader. The embedded number is the set's rank. + Rank(u32), + + /// This value belongs to the same set as the linked value. + Link(Value), +} + +/// The `union_find` table contains `i32` entries that are interpreted as follows: +/// +/// x = 0: The value belongs to its own singleton set. +/// x > 0: The value is the leader of a set with rank x. +/// x < 0: The value belongs to the same set as the value numbered !x. +/// +/// The rank of a set is an upper bound on the number of links that must be followed from a member +/// of the set to the set leader. +/// +/// A singleton set is the same as a set with rank 0. It contains only the leader value. +impl UFEntry { + /// Decode a table entry. + fn decode(x: i32) -> Self { + if x < 0 { + Self::Link(Value::from_u32((!x) as u32)) + } else { + Self::Rank(x as u32) + } + } + + /// Encode a link entry. + fn encode_link(v: Value) -> i32 { + !(v.as_u32() as i32) + } +} + +/// Union-find algorithm for building virtual registers. +/// +/// Before values are added to virtual registers, it is possible to use a union-find algorithm to +/// construct virtual registers efficiently. This support implemented here is used as follows: +/// +/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same +/// virtual register. +/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the +/// `union()` calls. +/// +/// The values that were passed to `union(a, b)` must not belong to any existing virtual registers +/// by the time `finish_union_find()` is called. +/// +/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for +/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed. +/// +/// The [Wikipedia entry on disjoint-set data +/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good. +impl VirtRegs { + /// Find the leader value and rank of the set containing `v`. + /// Compress the path if needed. + fn find(&mut self, mut val: Value) -> (Value, u32) { + let mut val_stack = SmallVec::<[Value; 8]>::new(); + let found = loop { + match UFEntry::decode(self.union_find[val]) { + UFEntry::Rank(rank) => break (val, rank), + UFEntry::Link(parent) => { + val_stack.push(val); + val = parent; + } + } + }; + // Compress the path + while let Some(val) = val_stack.pop() { + self.union_find[val] = UFEntry::encode_link(found.0); + } + found + } + + /// Union the two sets containing `a` and `b`. + /// + /// This ensures that `a` and `b` will belong to the same virtual register after calling + /// `finish_union_find()`. + pub fn union(&mut self, a: Value, b: Value) { + let (leader_a, rank_a) = self.find(a); + let (leader_b, rank_b) = self.find(b); + + if leader_a == leader_b { + return; + } + + // The first time we see a value, its rank will be 0. Add it to the list of pending values. + if rank_a == 0 { + debug_assert_eq!(a, leader_a); + self.pending_values.push(a); + } + if rank_b == 0 { + debug_assert_eq!(b, leader_b); + self.pending_values.push(b); + } + + // Merge into the set with the greater rank. This preserves the invariant that the rank is + // an upper bound on the number of links to the leader. + match rank_a.cmp(&rank_b) { + Ordering::Less => { + self.union_find[leader_a] = UFEntry::encode_link(leader_b); + } + Ordering::Greater => { + self.union_find[leader_b] = UFEntry::encode_link(leader_a); + } + Ordering::Equal => { + // When the two sets have the same rank, we arbitrarily pick the a-set to preserve. + // We need to increase the rank by one since the elements in the b-set are now one + // link further away from the leader. + self.union_find[leader_a] += 1; + self.union_find[leader_b] = UFEntry::encode_link(leader_a); + } + } + } + + /// Compute virtual registers based on previous calls to `union(a, b)`. + /// + /// This terminates the union-find algorithm, so the next time `union()` is called, it is for a + /// new independent batch of values. + /// + /// The values in each virtual register will be ordered according to when they were first + /// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to + /// create a more sensible value order. + /// + /// The new virtual registers will be appended to `new_vregs`, if present. + pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec>) { + debug_assert_eq!( + self.pending_values.iter().find(|&&v| self.get(v).is_some()), + None, + "Values participating in union-find must not belong to existing virtual registers" + ); + + while let Some(val) = self.pending_values.pop() { + let (leader, _) = self.find(val); + + // Get the vreg for `leader`, or create it. + let vreg = self.get(leader).unwrap_or_else(|| { + // Allocate a vreg for `leader`, but leave it empty. + let vr = self.alloc(); + if let Some(ref mut vec) = new_vregs { + vec.push(vr); + } + self.value_vregs[leader] = vr.into(); + vr + }); + + // Push values in `pending_values` order, including when `v == leader`. + self.vregs[vreg].push(val, &mut self.pool); + self.value_vregs[val] = vreg.into(); + + // Clear the entry in the union-find table. The `find(val)` call may still look at this + // entry in a future iteration, but that it ok. It will return a rank 0 leader that has + // already been assigned to the correct virtual register. + self.union_find[val] = 0; + } + + // We do *not* call `union_find.clear()` table here because re-initializing the table for + // sparse use takes time linear in the number of values in the function. Instead we reset + // the entries that are known to be non-zero in the loop above. + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::entity::EntityRef; + use crate::ir::Value; + + #[test] + fn empty_union_find() { + let mut vregs = VirtRegs::new(); + vregs.finish_union_find(None); + assert_eq!(vregs.all_virtregs().count(), 0); + } + + #[test] + fn union_self() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + vregs.union(v1, v1); + vregs.finish_union_find(None); + assert_eq!(vregs.get(v1), None); + assert_eq!(vregs.all_virtregs().count(), 0); + } + + #[test] + fn union_pair() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + vregs.union(v1, v2); + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v2, v1]); + assert_eq!(vregs.congruence_class(&v2), &[v2, v1]); + assert_eq!(vregs.all_virtregs().count(), 1); + } + + #[test] + fn union_pair_backwards() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + vregs.union(v2, v1); + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v2]); + assert_eq!(vregs.congruence_class(&v2), &[v1, v2]); + assert_eq!(vregs.all_virtregs().count(), 1); + } + + #[test] + fn union_tree() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + let v3 = Value::new(3); + let v4 = Value::new(4); + + vregs.union(v2, v4); + vregs.union(v3, v1); + // Leaders: v2, v3 + vregs.union(v4, v1); + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]); + assert_eq!(vregs.all_virtregs().count(), 1); + } + + #[test] + fn union_two() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + let v3 = Value::new(3); + let v4 = Value::new(4); + + vregs.union(v2, v4); + vregs.union(v3, v1); + // Leaders: v2, v3 + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v3]); + assert_eq!(vregs.congruence_class(&v2), &[v4, v2]); + assert_eq!(vregs.congruence_class(&v3), &[v1, v3]); + assert_eq!(vregs.congruence_class(&v4), &[v4, v2]); + assert_eq!(vregs.all_virtregs().count(), 2); + } + + #[test] + fn union_uneven() { + let mut vregs = VirtRegs::new(); + let v1 = Value::new(1); + let v2 = Value::new(2); + let v3 = Value::new(3); + let v4 = Value::new(4); + + vregs.union(v2, v4); // Rank 0-0 + vregs.union(v3, v2); // Rank 0-1 + vregs.union(v2, v1); // Rank 1-0 + vregs.finish_union_find(None); + assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]); + assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]); + assert_eq!(vregs.all_virtregs().count(), 1); + } +} diff --git a/cranelift/codegen/src/result.rs b/cranelift/codegen/src/result.rs new file mode 100644 index 0000000000..7a577d09f2 --- /dev/null +++ b/cranelift/codegen/src/result.rs @@ -0,0 +1,36 @@ +//! Result and error types representing the outcome of compiling a function. + +use crate::verifier::VerifierErrors; +use thiserror::Error; + +/// A compilation error. +/// +/// When Cranelift fails to compile a function, it will return one of these error codes. +#[derive(Error, Debug, PartialEq, Eq)] +pub enum CodegenError { + /// A list of IR verifier errors. + /// + /// This always represents a bug, either in the code that generated IR for Cranelift, or a bug + /// in Cranelift itself. + #[error("Verifier errors")] + Verifier(#[from] VerifierErrors), + + /// An implementation limit was exceeded. + /// + /// Cranelift can compile very large and complicated functions, but the [implementation has + /// limits][limits] that cause compilation to fail when they are exceeded. + /// + /// [limits]: https://cranelift.readthedocs.io/en/latest/ir.html#implementation-limits + #[error("Implementation limit exceeded")] + ImplLimitExceeded, + + /// The code size for the function is too large. + /// + /// Different target ISAs may impose a limit on the size of a compiled function. If that limit + /// is exceeded, compilation fails. + #[error("Code for function is too large")] + CodeTooLarge, +} + +/// A convenient alias for a `Result` that uses `CodegenError` as the error type. +pub type CodegenResult = Result; diff --git a/cranelift/codegen/src/scoped_hash_map.rs b/cranelift/codegen/src/scoped_hash_map.rs new file mode 100644 index 0000000000..809d22132a --- /dev/null +++ b/cranelift/codegen/src/scoped_hash_map.rs @@ -0,0 +1,233 @@ +//! `ScopedHashMap` +//! +//! This module defines a struct `ScopedHashMap` which defines a `FxHashMap`-like +//! container that has a concept of scopes that can be entered and exited, such that +//! values inserted while inside a scope aren't visible outside the scope. + +use crate::fx::FxHashMap; +use core::hash::Hash; +use core::mem; + +#[cfg(not(feature = "std"))] +use crate::fx::FxHasher; +#[cfg(not(feature = "std"))] +type Hasher = core::hash::BuildHasherDefault; + +struct Val { + value: V, + next_key: Option, + depth: usize, +} + +/// A view into an occupied entry in a `ScopedHashMap`. It is part of the `Entry` enum. +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + #[cfg(feature = "std")] + entry: super::hash_map::OccupiedEntry<'a, K, Val>, + #[cfg(not(feature = "std"))] + entry: super::hash_map::OccupiedEntry<'a, K, Val, Hasher>, +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry. + pub fn get(&self) -> &V { + &self.entry.get().value + } +} + +/// A view into a vacant entry in a `ScopedHashMap`. It is part of the `Entry` enum. +pub struct VacantEntry<'a, K: 'a, V: 'a> { + #[cfg(feature = "std")] + entry: super::hash_map::VacantEntry<'a, K, Val>, + #[cfg(not(feature = "std"))] + entry: super::hash_map::VacantEntry<'a, K, Val, Hasher>, + next_key: Option, + depth: usize, +} + +impl<'a, K: Hash, V> VacantEntry<'a, K, V> { + /// Sets the value of the entry with the `VacantEntry`'s key. + pub fn insert(self, value: V) { + self.entry.insert(Val { + value, + next_key: self.next_key, + depth: self.depth, + }); + } +} + +/// A view into a single entry in a map, which may either be vacant or occupied. +/// +/// This enum is constructed from the `entry` method on `ScopedHashMap`. +pub enum Entry<'a, K: 'a, V: 'a> { + Occupied(OccupiedEntry<'a, K, V>), + Vacant(VacantEntry<'a, K, V>), +} + +/// A wrapper around a `FxHashMap` which adds the concept of scopes. Items inserted +/// within a scope are removed when the scope is exited. +/// +/// Shadowing, where one scope has entries with the same keys as a containing scope, +/// is not supported in this implementation. +pub struct ScopedHashMap { + map: FxHashMap>, + last_insert: Option, + current_depth: usize, +} + +impl ScopedHashMap +where + K: PartialEq + Eq + Hash + Clone, +{ + /// Creates an empty `ScopedHashMap`. + pub fn new() -> Self { + Self { + map: FxHashMap(), + last_insert: None, + current_depth: 0, + } + } + + /// Similar to `FxHashMap::entry`, gets the given key's corresponding entry in the map for + /// in-place manipulation. + pub fn entry(&mut self, key: K) -> Entry { + use super::hash_map::Entry::*; + match self.map.entry(key) { + Occupied(entry) => Entry::Occupied(OccupiedEntry { entry }), + Vacant(entry) => { + let clone_key = entry.key().clone(); + Entry::Vacant(VacantEntry { + entry, + next_key: mem::replace(&mut self.last_insert, Some(clone_key)), + depth: self.current_depth, + }) + } + } + } + + /// Enter a new scope. + pub fn increment_depth(&mut self) { + // Increment the depth. + self.current_depth = self.current_depth.checked_add(1).unwrap(); + } + + /// Exit the current scope. + pub fn decrement_depth(&mut self) { + // Remove all elements inserted at the current depth. + while let Some(key) = self.last_insert.clone() { + use crate::hash_map::Entry::*; + match self.map.entry(key) { + Occupied(entry) => { + if entry.get().depth != self.current_depth { + break; + } + self.last_insert = entry.remove_entry().1.next_key; + } + Vacant(_) => panic!(), + } + } + + // Decrement the depth. + self.current_depth = self.current_depth.checked_sub(1).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic() { + let mut map: ScopedHashMap = ScopedHashMap::new(); + + match map.entry(0) { + Entry::Occupied(_entry) => panic!(), + Entry::Vacant(entry) => entry.insert(1), + } + match map.entry(2) { + Entry::Occupied(_entry) => panic!(), + Entry::Vacant(entry) => entry.insert(8), + } + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + map.increment_depth(); + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(1) { + Entry::Occupied(_entry) => panic!(), + Entry::Vacant(entry) => entry.insert(3), + } + match map.entry(1) { + Entry::Occupied(entry) => assert!(*entry.get() == 3), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(0) { + Entry::Occupied(entry) => assert!(*entry.get() == 1), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + map.decrement_depth(); + match map.entry(0) { + Entry::Occupied(entry) => assert!(*entry.get() == 1), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + map.increment_depth(); + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(1) { + Entry::Occupied(_entry) => panic!(), + Entry::Vacant(entry) => entry.insert(4), + } + match map.entry(1) { + Entry::Occupied(entry) => assert!(*entry.get() == 4), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + map.decrement_depth(); + map.increment_depth(); + map.increment_depth(); + map.increment_depth(); + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(1) { + Entry::Occupied(_entry) => panic!(), + Entry::Vacant(entry) => entry.insert(5), + } + match map.entry(1) { + Entry::Occupied(entry) => assert!(*entry.get() == 5), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + map.decrement_depth(); + map.decrement_depth(); + map.decrement_depth(); + match map.entry(2) { + Entry::Occupied(entry) => assert!(*entry.get() == 8), + Entry::Vacant(_entry) => panic!(), + } + match map.entry(1) { + Entry::Occupied(_entry) => panic!(), + Entry::Vacant(entry) => entry.insert(3), + } + } +} diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs new file mode 100644 index 0000000000..57b9c18f89 --- /dev/null +++ b/cranelift/codegen/src/settings.rs @@ -0,0 +1,443 @@ +//! Shared settings module. +//! +//! This module defines data structures to access the settings defined in the meta language. +//! +//! Each settings group is translated to a `Flags` struct either in this module or in its +//! ISA-specific `settings` module. The struct provides individual getter methods for all of the +//! settings as well as computed predicate flags. +//! +//! The `Flags` struct is immutable once it has been created. A `Builder` instance is used to +//! create it. +//! +//! # Example +//! ``` +//! use cranelift_codegen::settings::{self, Configurable}; +//! +//! let mut b = settings::builder(); +//! b.set("opt_level", "speed_and_size"); +//! +//! let f = settings::Flags::new(b); +//! assert_eq!(f.opt_level(), settings::OptLevel::SpeedAndSize); +//! ``` + +use crate::constant_hash::{probe, simple_hash}; +use crate::isa::TargetIsa; +use alloc::boxed::Box; +use alloc::string::{String, ToString}; +use core::fmt; +use core::str; +use thiserror::Error; + +/// A string-based configurator for settings groups. +/// +/// The `Configurable` protocol allows settings to be modified by name before a finished `Flags` +/// struct is created. +pub trait Configurable { + /// Set the string value of any setting by name. + /// + /// This can set any type of setting whether it is numeric, boolean, or enumerated. + fn set(&mut self, name: &str, value: &str) -> SetResult<()>; + + /// Enable a boolean setting or apply a preset. + /// + /// If the identified setting isn't a boolean or a preset, a `BadType` error is returned. + fn enable(&mut self, name: &str) -> SetResult<()>; +} + +/// Collect settings values based on a template. +#[derive(Clone)] +pub struct Builder { + template: &'static detail::Template, + bytes: Box<[u8]>, +} + +impl Builder { + /// Create a new builder with defaults and names from the given template. + pub fn new(tmpl: &'static detail::Template) -> Self { + Self { + template: tmpl, + bytes: tmpl.defaults.into(), + } + } + + /// Extract contents of builder once everything is configured. + pub fn state_for(self, name: &str) -> Box<[u8]> { + assert_eq!(name, self.template.name); + self.bytes + } + + /// Set the value of a single bit. + fn set_bit(&mut self, offset: usize, bit: u8, value: bool) { + let byte = &mut self.bytes[offset]; + let mask = 1 << bit; + if value { + *byte |= mask; + } else { + *byte &= !mask; + } + } + + /// Apply a preset. The argument is a slice of (mask, value) bytes. + fn apply_preset(&mut self, values: &[(u8, u8)]) { + for (byte, &(mask, value)) in self.bytes.iter_mut().zip(values) { + *byte = (*byte & !mask) | value; + } + } + + /// Look up a descriptor by name. + fn lookup(&self, name: &str) -> SetResult<(usize, detail::Detail)> { + match probe(self.template, name, simple_hash(name)) { + Err(_) => Err(SetError::BadName(name.to_string())), + Ok(entry) => { + let d = &self.template.descriptors[self.template.hash_table[entry] as usize]; + Ok((d.offset as usize, d.detail)) + } + } + } +} + +fn parse_bool_value(value: &str) -> SetResult { + match value { + "true" | "on" | "yes" | "1" => Ok(true), + "false" | "off" | "no" | "0" => Ok(false), + _ => Err(SetError::BadValue("bool".to_string())), + } +} + +fn parse_enum_value(value: &str, choices: &[&str]) -> SetResult { + match choices.iter().position(|&tag| tag == value) { + Some(idx) => Ok(idx as u8), + None => { + // TODO: Use `join` instead of this code, once + // https://github.com/rust-lang/rust/issues/27747 is resolved. + let mut all_choices = String::new(); + let mut first = true; + for choice in choices { + if first { + first = false + } else { + all_choices += ", "; + } + all_choices += choice; + } + Err(SetError::BadValue(format!("any among {}", all_choices))) + } + } +} + +impl Configurable for Builder { + fn enable(&mut self, name: &str) -> SetResult<()> { + use self::detail::Detail; + let (offset, detail) = self.lookup(name)?; + match detail { + Detail::Bool { bit } => { + self.set_bit(offset, bit, true); + Ok(()) + } + Detail::Preset => { + self.apply_preset(&self.template.presets[offset..]); + Ok(()) + } + _ => Err(SetError::BadType), + } + } + + fn set(&mut self, name: &str, value: &str) -> SetResult<()> { + use self::detail::Detail; + let (offset, detail) = self.lookup(name)?; + match detail { + Detail::Bool { bit } => { + self.set_bit(offset, bit, parse_bool_value(value)?); + } + Detail::Num => { + self.bytes[offset] = value + .parse() + .map_err(|_| SetError::BadValue("number".to_string()))?; + } + Detail::Enum { last, enumerators } => { + self.bytes[offset] = + parse_enum_value(value, self.template.enums(last, enumerators))?; + } + Detail::Preset => return Err(SetError::BadName(name.to_string())), + } + Ok(()) + } +} + +/// An error produced when changing a setting. +#[derive(Error, Debug, PartialEq, Eq)] +pub enum SetError { + /// No setting by this name exists. + #[error("No existing setting named '{0}'")] + BadName(String), + + /// Type mismatch for setting (e.g., setting an enum setting as a bool). + #[error("Trying to set a setting with the wrong type")] + BadType, + + /// This is not a valid value for this setting. + #[error("Unexpected value for a setting, expected {0}")] + BadValue(String), +} + +/// A result returned when changing a setting. +pub type SetResult = Result; + +/// A reference to just the boolean predicates of a settings object. +/// +/// The settings objects themselves are generated and appear in the `isa/*/settings.rs` modules. +/// Each settings object provides a `predicate_view()` method that makes it possible to query +/// ISA predicates by number. +#[derive(Clone, Copy)] +pub struct PredicateView<'a>(&'a [u8]); + +impl<'a> PredicateView<'a> { + /// Create a new view of a precomputed predicate vector. + /// + /// See the `predicate_view()` method on the various `Flags` types defined for each ISA. + pub fn new(bits: &'a [u8]) -> Self { + PredicateView(bits) + } + + /// Check a numbered predicate. + pub fn test(self, p: usize) -> bool { + self.0[p / 8] & (1 << (p % 8)) != 0 + } +} + +/// Implementation details for generated code. +/// +/// This module holds definitions that need to be public so the can be instantiated by generated +/// code in other modules. +pub mod detail { + use crate::constant_hash; + use core::fmt; + + /// An instruction group template. + pub struct Template { + /// Name of the instruction group. + pub name: &'static str, + /// List of setting descriptors. + pub descriptors: &'static [Descriptor], + /// Union of all enumerators. + pub enumerators: &'static [&'static str], + /// Hash table of settings. + pub hash_table: &'static [u16], + /// Default values. + pub defaults: &'static [u8], + /// Pairs of (mask, value) for presets. + pub presets: &'static [(u8, u8)], + } + + impl Template { + /// Get enumerators corresponding to a `Details::Enum`. + pub fn enums(&self, last: u8, enumerators: u16) -> &[&'static str] { + let from = enumerators as usize; + let len = usize::from(last) + 1; + &self.enumerators[from..from + len] + } + + /// Format a setting value as a TOML string. This is mostly for use by the generated + /// `Display` implementation. + pub fn format_toml_value( + &self, + detail: Detail, + byte: u8, + f: &mut fmt::Formatter, + ) -> fmt::Result { + match detail { + Detail::Bool { bit } => write!(f, "{}", (byte & (1 << bit)) != 0), + Detail::Num => write!(f, "{}", byte), + Detail::Enum { last, enumerators } => { + if byte <= last { + let tags = self.enums(last, enumerators); + write!(f, "\"{}\"", tags[usize::from(byte)]) + } else { + write!(f, "{}", byte) + } + } + // Presets aren't printed. They are reflected in the other settings. + Detail::Preset { .. } => Ok(()), + } + } + } + + /// The template contains a hash table for by-name lookup. + impl<'a> constant_hash::Table<&'a str> for Template { + fn len(&self) -> usize { + self.hash_table.len() + } + + fn key(&self, idx: usize) -> Option<&'a str> { + let e = self.hash_table[idx] as usize; + if e < self.descriptors.len() { + Some(self.descriptors[e].name) + } else { + None + } + } + } + + /// A setting descriptor holds the information needed to generically set and print a setting. + /// + /// Each settings group will be represented as a constant DESCRIPTORS array. + pub struct Descriptor { + /// Lower snake-case name of setting as defined in meta. + pub name: &'static str, + + /// Offset of byte containing this setting. + pub offset: u32, + + /// Additional details, depending on the kind of setting. + pub detail: Detail, + } + + /// The different kind of settings along with descriptor bits that depend on the kind. + #[derive(Clone, Copy)] + pub enum Detail { + /// A boolean setting only uses one bit, numbered from LSB. + Bool { + /// 0-7. + bit: u8, + }, + + /// A numerical setting uses the whole byte. + Num, + + /// An Enum setting uses a range of enumerators. + Enum { + /// Numerical value of last enumerator, allowing for 1-256 enumerators. + last: u8, + + /// First enumerator in the ENUMERATORS table. + enumerators: u16, + }, + + /// A preset is not an individual setting, it is a collection of settings applied at once. + /// + /// The `Descriptor::offset` field refers to the `PRESETS` table. + Preset, + } + + impl Detail { + /// Check if a detail is a Detail::Preset. Useful because the Descriptor + /// offset field has a different meaning when the detail is a preset. + pub fn is_preset(self) -> bool { + match self { + Self::Preset => true, + _ => false, + } + } + } +} + +// Include code generated by `meta/gen_settings.rs`. This file contains a public `Flags` struct +// with an implementation for all of the settings defined in +// `cranelift-codegen/meta/src/shared/settings.rs`. +include!(concat!(env!("OUT_DIR"), "/settings.rs")); + +/// Wrapper containing flags and optionally a `TargetIsa` trait object. +/// +/// A few passes need to access the flags but only optionally a target ISA. The `FlagsOrIsa` +/// wrapper can be used to pass either, and extract the flags so they are always accessible. +#[derive(Clone, Copy)] +pub struct FlagsOrIsa<'a> { + /// Flags are always present. + pub flags: &'a Flags, + + /// The ISA may not be present. + pub isa: Option<&'a dyn TargetIsa>, +} + +impl<'a> From<&'a Flags> for FlagsOrIsa<'a> { + fn from(flags: &'a Flags) -> FlagsOrIsa { + FlagsOrIsa { flags, isa: None } + } +} + +impl<'a> From<&'a dyn TargetIsa> for FlagsOrIsa<'a> { + fn from(isa: &'a dyn TargetIsa) -> FlagsOrIsa { + FlagsOrIsa { + flags: isa.flags(), + isa: Some(isa), + } + } +} + +#[cfg(test)] +mod tests { + use super::Configurable; + use super::SetError::*; + use super::{builder, Flags}; + use alloc::string::ToString; + + #[test] + fn display_default() { + let b = builder(); + let f = Flags::new(b); + assert_eq!( + f.to_string(), + "[shared]\n\ + opt_level = \"none\"\n\ + tls_model = \"none\"\n\ + libcall_call_conv = \"isa_default\"\n\ + baldrdash_prologue_words = 0\n\ + probestack_size_log2 = 12\n\ + enable_verifier = true\n\ + is_pic = false\n\ + use_colocated_libcalls = false\n\ + avoid_div_traps = false\n\ + enable_float = true\n\ + enable_nan_canonicalization = false\n\ + enable_pinned_reg = false\n\ + use_pinned_reg_as_heap_base = false\n\ + enable_simd = false\n\ + enable_atomics = true\n\ + enable_safepoints = false\n\ + emit_all_ones_funcaddrs = false\n\ + enable_probestack = true\n\ + probestack_func_adjusts_sp = false\n\ + enable_jump_tables = true\n" + ); + assert_eq!(f.opt_level(), super::OptLevel::None); + assert_eq!(f.enable_simd(), false); + assert_eq!(f.baldrdash_prologue_words(), 0); + } + + #[test] + fn modify_bool() { + let mut b = builder(); + assert_eq!(b.enable("not_there"), Err(BadName("not_there".to_string()))); + assert_eq!(b.enable("enable_simd"), Ok(())); + assert_eq!(b.set("enable_simd", "false"), Ok(())); + + let f = Flags::new(b); + assert_eq!(f.enable_simd(), false); + } + + #[test] + fn modify_string() { + let mut b = builder(); + assert_eq!( + b.set("not_there", "true"), + Err(BadName("not_there".to_string())) + ); + assert_eq!(b.set("enable_simd", ""), Err(BadValue("bool".to_string()))); + assert_eq!( + b.set("enable_simd", "best"), + Err(BadValue("bool".to_string())) + ); + assert_eq!( + b.set("opt_level", "true"), + Err(BadValue( + "any among none, speed, speed_and_size".to_string() + )) + ); + assert_eq!(b.set("opt_level", "speed"), Ok(())); + assert_eq!(b.set("enable_simd", "0"), Ok(())); + + let f = Flags::new(b); + assert_eq!(f.enable_simd(), false); + assert_eq!(f.opt_level(), super::OptLevel::Speed); + } +} diff --git a/cranelift/codegen/src/simple_gvn.rs b/cranelift/codegen/src/simple_gvn.rs new file mode 100644 index 0000000000..5351aced43 --- /dev/null +++ b/cranelift/codegen/src/simple_gvn.rs @@ -0,0 +1,152 @@ +//! A simple GVN pass. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::ir::{Function, Inst, InstructionData, Opcode, Type}; +use crate::scoped_hash_map::ScopedHashMap; +use crate::timing; +use alloc::vec::Vec; +use core::cell::{Ref, RefCell}; +use core::hash::{Hash, Hasher}; + +/// Test whether the given opcode is unsafe to even consider for GVN. +fn trivially_unsafe_for_gvn(opcode: Opcode) -> bool { + opcode.is_call() + || opcode.is_branch() + || opcode.is_terminator() + || opcode.is_return() + || opcode.can_trap() + || opcode.other_side_effects() + || opcode.can_store() + || opcode.writes_cpu_flags() +} + +/// Test that, if the specified instruction is a load, it doesn't have the `readonly` memflag. +fn is_load_and_not_readonly(inst_data: &InstructionData) -> bool { + match *inst_data { + InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => { + !flags.readonly() + } + _ => inst_data.opcode().can_load(), + } +} + +/// Wrapper around `InstructionData` which implements `Eq` and `Hash` +#[derive(Clone)] +struct HashKey<'a, 'f: 'a> { + inst: InstructionData, + ty: Type, + pos: &'a RefCell>, +} +impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> { + fn hash(&self, state: &mut H) { + let pool = &self.pos.borrow().func.dfg.value_lists; + self.inst.hash(state, pool); + self.ty.hash(state); + } +} +impl<'a, 'f: 'a> PartialEq for HashKey<'a, 'f> { + fn eq(&self, other: &Self) -> bool { + let pool = &self.pos.borrow().func.dfg.value_lists; + self.inst.eq(&other.inst, pool) && self.ty == other.ty + } +} +impl<'a, 'f: 'a> Eq for HashKey<'a, 'f> {} + +/// Perform simple GVN on `func`. +/// +pub fn do_simple_gvn(func: &mut Function, domtree: &mut DominatorTree) { + let _tt = timing::gvn(); + debug_assert!(domtree.is_valid()); + + // Visit blocks in a reverse post-order. + // + // The RefCell here is a bit ugly since the HashKeys in the ScopedHashMap + // need a reference to the function. + let pos = RefCell::new(FuncCursor::new(func)); + + let mut visible_values: ScopedHashMap = ScopedHashMap::new(); + let mut scope_stack: Vec = Vec::new(); + + for &block in domtree.cfg_postorder().iter().rev() { + { + // Pop any scopes that we just exited. + let layout = &pos.borrow().func.layout; + loop { + if let Some(current) = scope_stack.last() { + if domtree.dominates(*current, block, layout) { + break; + } + } else { + break; + } + scope_stack.pop(); + visible_values.decrement_depth(); + } + + // Push a scope for the current block. + scope_stack.push(layout.first_inst(block).unwrap()); + visible_values.increment_depth(); + } + + pos.borrow_mut().goto_top(block); + while let Some(inst) = { + let mut pos = pos.borrow_mut(); + pos.next_inst() + } { + // Resolve aliases, particularly aliases we created earlier. + pos.borrow_mut().func.dfg.resolve_aliases_in_arguments(inst); + + let func = Ref::map(pos.borrow(), |pos| &pos.func); + + let opcode = func.dfg[inst].opcode(); + + if opcode.is_branch() && !opcode.is_terminator() { + scope_stack.push(func.layout.next_inst(inst).unwrap()); + visible_values.increment_depth(); + } + + if trivially_unsafe_for_gvn(opcode) { + continue; + } + + // These are split up to separate concerns. + if is_load_and_not_readonly(&func.dfg[inst]) { + continue; + } + + let ctrl_typevar = func.dfg.ctrl_typevar(inst); + let key = HashKey { + inst: func.dfg[inst].clone(), + ty: ctrl_typevar, + pos: &pos, + }; + use crate::scoped_hash_map::Entry::*; + match visible_values.entry(key) { + Occupied(entry) => { + #[allow(clippy::debug_assert_with_mut_call)] + { + // Clippy incorrectly believes `&func.layout` should not be used here: + // https://github.com/rust-lang/rust-clippy/issues/4737 + debug_assert!(domtree.dominates(*entry.get(), inst, &func.layout)); + } + + // If the redundant instruction is representing the current + // scope, pick a new representative. + let old = scope_stack.last_mut().unwrap(); + if *old == inst { + *old = func.layout.next_inst(inst).unwrap(); + } + // Replace the redundant instruction and remove it. + drop(func); + let mut pos = pos.borrow_mut(); + pos.func.dfg.replace_with_aliases(inst, *entry.get()); + pos.remove_inst_and_step_back(); + } + Vacant(entry) => { + entry.insert(inst); + } + } + } + } +} diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs new file mode 100644 index 0000000000..c5a02caea4 --- /dev/null +++ b/cranelift/codegen/src/simple_preopt.rs @@ -0,0 +1,967 @@ +//! A pre-legalization rewriting pass. +//! +//! This module provides early-stage optimizations. The optimizations found +//! should be useful for already well-optimized code. More general purpose +//! early-stage optimizations can be found in the preopt crate. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64}; +use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64}; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{ + condcodes::{CondCode, IntCC}, + dfg::ValueDef, + immediates, + instructions::{Opcode, ValueList}, + types::{I16, I32, I64, I8}, + Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value, +}; +use crate::isa::TargetIsa; +use crate::timing; + +#[inline] +/// Replaces the unique result of the instruction inst to an alias of the given value, and +/// replaces the instruction with a nop. Can be used only on instructions producing one unique +/// result, otherwise will assert. +fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) { + // Replace the result value by an alias. + let results = dfg.detach_results(inst); + debug_assert!(results.len(&dfg.value_lists) == 1); + let result = results.get(0, &dfg.value_lists).unwrap(); + dfg.change_to_alias(result, value); + + // Replace instruction by a nop. + dfg.replace(inst).nop(); +} + +//---------------------------------------------------------------------- +// +// Pattern-match helpers and transformation for div and rem by constants. + +// Simple math helpers + +/// if `x` is a power of two, or the negation thereof, return the power along +/// with a boolean that indicates whether `x` is negative. Else return None. +#[inline] +fn i32_is_power_of_two(x: i32) -> Option<(bool, u32)> { + // We have to special-case this because abs(x) isn't representable. + if x == -0x8000_0000 { + return Some((true, 31)); + } + let abs_x = i32::wrapping_abs(x) as u32; + if abs_x.is_power_of_two() { + return Some((x < 0, abs_x.trailing_zeros())); + } + None +} + +/// Same comments as for i32_is_power_of_two apply. +#[inline] +fn i64_is_power_of_two(x: i64) -> Option<(bool, u32)> { + // We have to special-case this because abs(x) isn't representable. + if x == -0x8000_0000_0000_0000 { + return Some((true, 63)); + } + let abs_x = i64::wrapping_abs(x) as u64; + if abs_x.is_power_of_two() { + return Some((x < 0, abs_x.trailing_zeros())); + } + None +} + +/// Representation of an instruction that can be replaced by a single division/remainder operation +/// between a left Value operand and a right immediate operand. +#[derive(Debug)] +enum DivRemByConstInfo { + DivU32(Value, u32), + DivU64(Value, u64), + DivS32(Value, i32), + DivS64(Value, i64), + RemU32(Value, u32), + RemU64(Value, u64), + RemS32(Value, i32), + RemS64(Value, i64), +} + +/// Possibly create a DivRemByConstInfo from the given components, by figuring out which, if any, +/// of the 8 cases apply, and also taking care to sanity-check the immediate. +fn package_up_divrem_info( + value: Value, + value_type: Type, + imm_i64: i64, + is_signed: bool, + is_rem: bool, +) -> Option { + let imm_u64 = imm_i64 as u64; + + match (is_signed, value_type) { + (false, I32) => { + if imm_u64 < 0x1_0000_0000 { + if is_rem { + Some(DivRemByConstInfo::RemU32(value, imm_u64 as u32)) + } else { + Some(DivRemByConstInfo::DivU32(value, imm_u64 as u32)) + } + } else { + None + } + } + + (false, I64) => { + // unsigned 64, no range constraint. + if is_rem { + Some(DivRemByConstInfo::RemU64(value, imm_u64)) + } else { + Some(DivRemByConstInfo::DivU64(value, imm_u64)) + } + } + + (true, I32) => { + if imm_u64 <= 0x7fff_ffff || imm_u64 >= 0xffff_ffff_8000_0000 { + if is_rem { + Some(DivRemByConstInfo::RemS32(value, imm_u64 as i32)) + } else { + Some(DivRemByConstInfo::DivS32(value, imm_u64 as i32)) + } + } else { + None + } + } + + (true, I64) => { + // signed 64, no range constraint. + if is_rem { + Some(DivRemByConstInfo::RemS64(value, imm_u64 as i64)) + } else { + Some(DivRemByConstInfo::DivS64(value, imm_u64 as i64)) + } + } + + _ => None, + } +} + +/// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands, +/// signedness, operation size and div-vs-rem-ness in a handy bundle. +fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option { + if let InstructionData::BinaryImm { opcode, arg, imm } = dfg[inst] { + let (is_signed, is_rem) = match opcode { + Opcode::UdivImm => (false, false), + Opcode::UremImm => (false, true), + Opcode::SdivImm => (true, false), + Opcode::SremImm => (true, true), + _ => return None, + }; + return package_up_divrem_info(arg, dfg.value_type(arg), imm.into(), is_signed, is_rem); + } + + None +} + +/// Actually do the transformation given a bundle containing the relevant information. +/// `divrem_info` describes a div or rem by a constant, that `pos` currently points at, and `inst` +/// is the associated instruction. `inst` is replaced by a sequence of other operations that +/// calculate the same result. Note that there are various `divrem_info` cases where we cannot do +/// any transformation, in which case `inst` is left unchanged. +fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCursor, inst: Inst) { + let is_rem = match *divrem_info { + DivRemByConstInfo::DivU32(_, _) + | DivRemByConstInfo::DivU64(_, _) + | DivRemByConstInfo::DivS32(_, _) + | DivRemByConstInfo::DivS64(_, _) => false, + DivRemByConstInfo::RemU32(_, _) + | DivRemByConstInfo::RemU64(_, _) + | DivRemByConstInfo::RemS32(_, _) + | DivRemByConstInfo::RemS64(_, _) => true, + }; + + match *divrem_info { + // -------------------- U32 -------------------- + + // U32 div, rem by zero: ignore + DivRemByConstInfo::DivU32(_n1, 0) | DivRemByConstInfo::RemU32(_n1, 0) => {} + + // U32 div by 1: identity + // U32 rem by 1: zero + DivRemByConstInfo::DivU32(n1, 1) | DivRemByConstInfo::RemU32(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I32, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } + } + + // U32 div, rem by a power-of-2 + DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) + if d.is_power_of_two() => + { + debug_assert!(d >= 2); + // compute k where d == 2^k + let k = d.trailing_zeros(); + debug_assert!(k >= 1 && k <= 31); + if is_rem { + let mask = (1u64 << k) - 1; + pos.func.dfg.replace(inst).band_imm(n1, mask as i64); + } else { + pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); + } + } + + // U32 div, rem by non-power-of-2 + DivRemByConstInfo::DivU32(n1, d) | DivRemByConstInfo::RemU32(n1, d) => { + debug_assert!(d >= 3); + let MU32 { + mul_by, + do_add, + shift_by, + } = magic_u32(d); + let qf; // final quotient + let q0 = pos.ins().iconst(I32, mul_by as i64); + let q1 = pos.ins().umulhi(n1, q0); + if do_add { + debug_assert!(shift_by >= 1 && shift_by <= 32); + let t1 = pos.ins().isub(n1, q1); + let t2 = pos.ins().ushr_imm(t1, 1); + let t3 = pos.ins().iadd(t2, q1); + // I never found any case where shift_by == 1 here. + // So there's no attempt to fold out a zero shift. + debug_assert_ne!(shift_by, 1); + qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64); + } else { + debug_assert!(shift_by >= 0 && shift_by <= 31); + // Whereas there are known cases here for shift_by == 0. + if shift_by > 0 { + qf = pos.ins().ushr_imm(q1, shift_by as i64); + } else { + qf = q1; + } + } + // Now qf holds the final quotient. If necessary calculate the + // remainder instead. + if is_rem { + let tt = pos.ins().imul_imm(qf, d as i64); + pos.func.dfg.replace(inst).isub(n1, tt); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); + } + } + + // -------------------- U64 -------------------- + + // U64 div, rem by zero: ignore + DivRemByConstInfo::DivU64(_n1, 0) | DivRemByConstInfo::RemU64(_n1, 0) => {} + + // U64 div by 1: identity + // U64 rem by 1: zero + DivRemByConstInfo::DivU64(n1, 1) | DivRemByConstInfo::RemU64(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I64, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } + } + + // U64 div, rem by a power-of-2 + DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) + if d.is_power_of_two() => + { + debug_assert!(d >= 2); + // compute k where d == 2^k + let k = d.trailing_zeros(); + debug_assert!(k >= 1 && k <= 63); + if is_rem { + let mask = (1u64 << k) - 1; + pos.func.dfg.replace(inst).band_imm(n1, mask as i64); + } else { + pos.func.dfg.replace(inst).ushr_imm(n1, k as i64); + } + } + + // U64 div, rem by non-power-of-2 + DivRemByConstInfo::DivU64(n1, d) | DivRemByConstInfo::RemU64(n1, d) => { + debug_assert!(d >= 3); + let MU64 { + mul_by, + do_add, + shift_by, + } = magic_u64(d); + let qf; // final quotient + let q0 = pos.ins().iconst(I64, mul_by as i64); + let q1 = pos.ins().umulhi(n1, q0); + if do_add { + debug_assert!(shift_by >= 1 && shift_by <= 64); + let t1 = pos.ins().isub(n1, q1); + let t2 = pos.ins().ushr_imm(t1, 1); + let t3 = pos.ins().iadd(t2, q1); + // I never found any case where shift_by == 1 here. + // So there's no attempt to fold out a zero shift. + debug_assert_ne!(shift_by, 1); + qf = pos.ins().ushr_imm(t3, (shift_by - 1) as i64); + } else { + debug_assert!(shift_by >= 0 && shift_by <= 63); + // Whereas there are known cases here for shift_by == 0. + if shift_by > 0 { + qf = pos.ins().ushr_imm(q1, shift_by as i64); + } else { + qf = q1; + } + } + // Now qf holds the final quotient. If necessary calculate the + // remainder instead. + if is_rem { + let tt = pos.ins().imul_imm(qf, d as i64); + pos.func.dfg.replace(inst).isub(n1, tt); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); + } + } + + // -------------------- S32 -------------------- + + // S32 div, rem by zero or -1: ignore + DivRemByConstInfo::DivS32(_n1, -1) + | DivRemByConstInfo::RemS32(_n1, -1) + | DivRemByConstInfo::DivS32(_n1, 0) + | DivRemByConstInfo::RemS32(_n1, 0) => {} + + // S32 div by 1: identity + // S32 rem by 1: zero + DivRemByConstInfo::DivS32(n1, 1) | DivRemByConstInfo::RemS32(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I32, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } + } + + DivRemByConstInfo::DivS32(n1, d) | DivRemByConstInfo::RemS32(n1, d) => { + if let Some((is_negative, k)) = i32_is_power_of_two(d) { + // k can be 31 only in the case that d is -2^31. + debug_assert!(k >= 1 && k <= 31); + let t1 = if k - 1 == 0 { + n1 + } else { + pos.ins().sshr_imm(n1, (k - 1) as i64) + }; + let t2 = pos.ins().ushr_imm(t1, (32 - k) as i64); + let t3 = pos.ins().iadd(n1, t2); + if is_rem { + // S32 rem by a power-of-2 + let t4 = pos.ins().band_imm(t3, i32::wrapping_neg(1 << k) as i64); + // Curiously, we don't care here what the sign of d is. + pos.func.dfg.replace(inst).isub(n1, t4); + } else { + // S32 div by a power-of-2 + let t4 = pos.ins().sshr_imm(t3, k as i64); + if is_negative { + pos.func.dfg.replace(inst).irsub_imm(t4, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, t4); + } + } + } else { + // S32 div, rem by a non-power-of-2 + debug_assert!(d < -2 || d > 2); + let MS32 { mul_by, shift_by } = magic_s32(d); + let q0 = pos.ins().iconst(I32, mul_by as i64); + let q1 = pos.ins().smulhi(n1, q0); + let q2 = if d > 0 && mul_by < 0 { + pos.ins().iadd(q1, n1) + } else if d < 0 && mul_by > 0 { + pos.ins().isub(q1, n1) + } else { + q1 + }; + debug_assert!(shift_by >= 0 && shift_by <= 31); + let q3 = if shift_by == 0 { + q2 + } else { + pos.ins().sshr_imm(q2, shift_by as i64) + }; + let t1 = pos.ins().ushr_imm(q3, 31); + let qf = pos.ins().iadd(q3, t1); + // Now qf holds the final quotient. If necessary calculate + // the remainder instead. + if is_rem { + let tt = pos.ins().imul_imm(qf, d as i64); + pos.func.dfg.replace(inst).isub(n1, tt); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); + } + } + } + + // -------------------- S64 -------------------- + + // S64 div, rem by zero or -1: ignore + DivRemByConstInfo::DivS64(_n1, -1) + | DivRemByConstInfo::RemS64(_n1, -1) + | DivRemByConstInfo::DivS64(_n1, 0) + | DivRemByConstInfo::RemS64(_n1, 0) => {} + + // S64 div by 1: identity + // S64 rem by 1: zero + DivRemByConstInfo::DivS64(n1, 1) | DivRemByConstInfo::RemS64(n1, 1) => { + if is_rem { + pos.func.dfg.replace(inst).iconst(I64, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, n1); + } + } + + DivRemByConstInfo::DivS64(n1, d) | DivRemByConstInfo::RemS64(n1, d) => { + if let Some((is_negative, k)) = i64_is_power_of_two(d) { + // k can be 63 only in the case that d is -2^63. + debug_assert!(k >= 1 && k <= 63); + let t1 = if k - 1 == 0 { + n1 + } else { + pos.ins().sshr_imm(n1, (k - 1) as i64) + }; + let t2 = pos.ins().ushr_imm(t1, (64 - k) as i64); + let t3 = pos.ins().iadd(n1, t2); + if is_rem { + // S64 rem by a power-of-2 + let t4 = pos.ins().band_imm(t3, i64::wrapping_neg(1 << k)); + // Curiously, we don't care here what the sign of d is. + pos.func.dfg.replace(inst).isub(n1, t4); + } else { + // S64 div by a power-of-2 + let t4 = pos.ins().sshr_imm(t3, k as i64); + if is_negative { + pos.func.dfg.replace(inst).irsub_imm(t4, 0); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, t4); + } + } + } else { + // S64 div, rem by a non-power-of-2 + debug_assert!(d < -2 || d > 2); + let MS64 { mul_by, shift_by } = magic_s64(d); + let q0 = pos.ins().iconst(I64, mul_by); + let q1 = pos.ins().smulhi(n1, q0); + let q2 = if d > 0 && mul_by < 0 { + pos.ins().iadd(q1, n1) + } else if d < 0 && mul_by > 0 { + pos.ins().isub(q1, n1) + } else { + q1 + }; + debug_assert!(shift_by >= 0 && shift_by <= 63); + let q3 = if shift_by == 0 { + q2 + } else { + pos.ins().sshr_imm(q2, shift_by as i64) + }; + let t1 = pos.ins().ushr_imm(q3, 63); + let qf = pos.ins().iadd(q3, t1); + // Now qf holds the final quotient. If necessary calculate + // the remainder instead. + if is_rem { + let tt = pos.ins().imul_imm(qf, d); + pos.func.dfg.replace(inst).isub(n1, tt); + } else { + replace_single_result_with_alias(&mut pos.func.dfg, inst, qf); + } + } + } + } +} + +#[inline] +fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option { + if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) { + if let InstructionData::UnaryImm { + opcode: Opcode::Iconst, + imm, + } = dfg[candidate_inst] + { + return Some(imm); + } + } + None +} + +/// Try to transform [(x << N) >> N] into a (un)signed-extending move. +/// Returns true if the final instruction has been converted to such a move. +fn try_fold_extended_move( + pos: &mut FuncCursor, + inst: Inst, + opcode: Opcode, + arg: Value, + imm: immediates::Imm64, +) -> bool { + if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { + if let InstructionData::BinaryImm { + opcode: Opcode::IshlImm, + arg: prev_arg, + imm: prev_imm, + } = &pos.func.dfg[arg_inst] + { + if imm != *prev_imm { + return false; + } + + let dest_ty = pos.func.dfg.ctrl_typevar(inst); + if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() { + return false; + } + + let imm_bits: i64 = imm.into(); + let ireduce_ty = match (dest_ty.lane_bits() as i64).wrapping_sub(imm_bits) { + 8 => I8, + 16 => I16, + 32 => I32, + _ => return false, + }; + let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap(); + + // This becomes a no-op, since ireduce_ty has a smaller lane width than + // the argument type (also the destination type). + let arg = *prev_arg; + let narrower_arg = pos.ins().ireduce(ireduce_ty, arg); + + if opcode == Opcode::UshrImm { + pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg); + } else { + pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg); + } + return true; + } + } + false +} + +/// Apply basic simplifications. +/// +/// This folds constants with arithmetic to form `_imm` instructions, and other minor +/// simplifications. +/// +/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the +/// controlling type's width of the instruction. This would result in an illegal instruction that +/// would likely be expanded back into an instruction on smaller types with the same initial +/// opcode, creating unnecessary churn. +fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) { + match pos.func.dfg[inst] { + InstructionData::Binary { opcode, args } => { + if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { + let new_opcode = match opcode { + Opcode::Iadd => Opcode::IaddImm, + Opcode::Imul => Opcode::ImulImm, + Opcode::Sdiv => Opcode::SdivImm, + Opcode::Udiv => Opcode::UdivImm, + Opcode::Srem => Opcode::SremImm, + Opcode::Urem => Opcode::UremImm, + Opcode::Band => Opcode::BandImm, + Opcode::Bor => Opcode::BorImm, + Opcode::Bxor => Opcode::BxorImm, + Opcode::Rotl => Opcode::RotlImm, + Opcode::Rotr => Opcode::RotrImm, + Opcode::Ishl => Opcode::IshlImm, + Opcode::Ushr => Opcode::UshrImm, + Opcode::Sshr => Opcode::SshrImm, + Opcode::Isub => { + imm = imm.wrapping_neg(); + Opcode::IaddImm + } + Opcode::Ifcmp => Opcode::IfcmpImm, + _ => return, + }; + let ty = pos.func.dfg.ctrl_typevar(inst); + if ty.bytes() <= native_word_width { + pos.func + .dfg + .replace(inst) + .BinaryImm(new_opcode, ty, imm, args[0]); + + // Repeat for BinaryImm simplification. + simplify(pos, inst, native_word_width); + } + } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) { + let new_opcode = match opcode { + Opcode::Iadd => Opcode::IaddImm, + Opcode::Imul => Opcode::ImulImm, + Opcode::Band => Opcode::BandImm, + Opcode::Bor => Opcode::BorImm, + Opcode::Bxor => Opcode::BxorImm, + Opcode::Isub => Opcode::IrsubImm, + _ => return, + }; + let ty = pos.func.dfg.ctrl_typevar(inst); + if ty.bytes() <= native_word_width { + pos.func + .dfg + .replace(inst) + .BinaryImm(new_opcode, ty, imm, args[1]); + } + } + } + + InstructionData::Unary { opcode, arg } => { + if let Opcode::AdjustSpDown = opcode { + if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) { + // Note this works for both positive and negative immediate values. + pos.func.dfg.replace(inst).adjust_sp_down_imm(imm); + } + } + } + + InstructionData::BinaryImm { opcode, arg, imm } => { + let ty = pos.func.dfg.ctrl_typevar(inst); + + let mut arg = arg; + let mut imm = imm; + match opcode { + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::BorImm + | Opcode::BandImm + | Opcode::BxorImm => { + // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x) + if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) { + if let InstructionData::BinaryImm { + opcode: prev_opcode, + arg: prev_arg, + imm: prev_imm, + } = &pos.func.dfg[arg_inst] + { + if opcode == *prev_opcode && ty == pos.func.dfg.ctrl_typevar(arg_inst) { + let lhs: i64 = imm.into(); + let rhs: i64 = (*prev_imm).into(); + let new_imm = match opcode { + Opcode::BorImm => lhs | rhs, + Opcode::BandImm => lhs & rhs, + Opcode::BxorImm => lhs ^ rhs, + Opcode::IaddImm => lhs.wrapping_add(rhs), + Opcode::ImulImm => lhs.wrapping_mul(rhs), + _ => panic!("can't happen"), + }; + let new_imm = immediates::Imm64::from(new_imm); + let new_arg = *prev_arg; + pos.func + .dfg + .replace(inst) + .BinaryImm(opcode, ty, new_imm, new_arg); + imm = new_imm; + arg = new_arg; + } + } + } + } + + Opcode::UshrImm | Opcode::SshrImm => { + if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width + && try_fold_extended_move(pos, inst, opcode, arg, imm) + { + return; + } + } + + _ => {} + }; + + // Replace operations that are no-ops. + match (opcode, imm.into()) { + (Opcode::IaddImm, 0) + | (Opcode::ImulImm, 1) + | (Opcode::SdivImm, 1) + | (Opcode::UdivImm, 1) + | (Opcode::BorImm, 0) + | (Opcode::BandImm, -1) + | (Opcode::BxorImm, 0) + | (Opcode::RotlImm, 0) + | (Opcode::RotrImm, 0) + | (Opcode::IshlImm, 0) + | (Opcode::UshrImm, 0) + | (Opcode::SshrImm, 0) => { + // Alias the result value with the original argument. + replace_single_result_with_alias(&mut pos.func.dfg, inst, arg); + } + (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => { + // Replace by zero. + pos.func.dfg.replace(inst).iconst(ty, 0); + } + (Opcode::BorImm, -1) => { + // Replace by minus one. + pos.func.dfg.replace(inst).iconst(ty, -1); + } + _ => {} + } + } + + InstructionData::IntCompare { opcode, cond, args } => { + debug_assert_eq!(opcode, Opcode::Icmp); + if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) { + if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width { + pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm); + } + } + } + + InstructionData::CondTrap { .. } + | InstructionData::Branch { .. } + | InstructionData::Ternary { + opcode: Opcode::Select, + .. + } => { + // Fold away a redundant `bint`. + let condition_def = { + let args = pos.func.dfg.inst_args(inst); + pos.func.dfg.value_def(args[0]) + }; + if let ValueDef::Result(def_inst, _) = condition_def { + if let InstructionData::Unary { + opcode: Opcode::Bint, + arg: bool_val, + } = pos.func.dfg[def_inst] + { + let args = pos.func.dfg.inst_args_mut(inst); + args[0] = bool_val; + } + } + } + + _ => {} + } +} + +struct BranchOptInfo { + br_inst: Inst, + cmp_arg: Value, + args: ValueList, + new_opcode: Opcode, +} + +/// Fold comparisons into branch operations when possible. +/// +/// This matches against operations which compare against zero, then use the +/// result in a `brz` or `brnz` branch. It folds those two operations into a +/// single `brz` or `brnz`. +fn branch_opt(pos: &mut FuncCursor, inst: Inst) { + let mut info = if let InstructionData::Branch { + opcode: br_opcode, + args: ref br_args, + .. + } = pos.func.dfg[inst] + { + let first_arg = { + let args = pos.func.dfg.inst_args(inst); + args[0] + }; + + let icmp_inst = if let ValueDef::Result(icmp_inst, _) = pos.func.dfg.value_def(first_arg) { + icmp_inst + } else { + return; + }; + + if let InstructionData::IntCompareImm { + opcode: Opcode::IcmpImm, + arg: cmp_arg, + cond: cmp_cond, + imm: cmp_imm, + } = pos.func.dfg[icmp_inst] + { + let cmp_imm: i64 = cmp_imm.into(); + if cmp_imm != 0 { + return; + } + + // icmp_imm returns non-zero when the comparison is true. So, if + // we're branching on zero, we need to invert the condition. + let cond = match br_opcode { + Opcode::Brz => cmp_cond.inverse(), + Opcode::Brnz => cmp_cond, + _ => return, + }; + + let new_opcode = match cond { + IntCC::Equal => Opcode::Brz, + IntCC::NotEqual => Opcode::Brnz, + _ => return, + }; + + BranchOptInfo { + br_inst: inst, + cmp_arg, + args: br_args.clone(), + new_opcode, + } + } else { + return; + } + } else { + return; + }; + + info.args.as_mut_slice(&mut pos.func.dfg.value_lists)[0] = info.cmp_arg; + if let InstructionData::Branch { ref mut opcode, .. } = pos.func.dfg[info.br_inst] { + *opcode = info.new_opcode; + } else { + panic!(); + } +} + +enum BranchOrderKind { + BrzToBrnz(Value), + BrnzToBrz(Value), + InvertIcmpCond(IntCC, Value, Value), +} + +/// Reorder branches to encourage fallthroughs. +/// +/// When an block ends with a conditional branch followed by an unconditional +/// branch, this will reorder them if one of them is branching to the next Block +/// layout-wise. The unconditional jump can then become a fallthrough. +fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, block: Block, inst: Inst) { + let (term_inst, term_inst_args, term_dest, cond_inst, cond_inst_args, cond_dest, kind) = + match pos.func.dfg[inst] { + InstructionData::Jump { + opcode: Opcode::Jump, + destination, + ref args, + } => { + let next_block = if let Some(next_block) = pos.func.layout.next_block(block) { + next_block + } else { + return; + }; + + if destination == next_block { + return; + } + + let prev_inst = if let Some(prev_inst) = pos.func.layout.prev_inst(inst) { + prev_inst + } else { + return; + }; + + let prev_inst_data = &pos.func.dfg[prev_inst]; + + if let Some(prev_dest) = prev_inst_data.branch_destination() { + if prev_dest != next_block { + return; + } + } else { + return; + } + + match prev_inst_data { + InstructionData::Branch { + opcode, + args: ref prev_args, + destination: cond_dest, + } => { + let cond_arg = { + let args = pos.func.dfg.inst_args(prev_inst); + args[0] + }; + + let kind = match opcode { + Opcode::Brz => BranchOrderKind::BrzToBrnz(cond_arg), + Opcode::Brnz => BranchOrderKind::BrnzToBrz(cond_arg), + _ => panic!("unexpected opcode"), + }; + + ( + inst, + args.clone(), + destination, + prev_inst, + prev_args.clone(), + *cond_dest, + kind, + ) + } + InstructionData::BranchIcmp { + opcode: Opcode::BrIcmp, + cond, + destination: cond_dest, + args: ref prev_args, + } => { + let (x_arg, y_arg) = { + let args = pos.func.dfg.inst_args(prev_inst); + (args[0], args[1]) + }; + + ( + inst, + args.clone(), + destination, + prev_inst, + prev_args.clone(), + *cond_dest, + BranchOrderKind::InvertIcmpCond(*cond, x_arg, y_arg), + ) + } + _ => return, + } + } + + _ => return, + }; + + let cond_args = { cond_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec() }; + let term_args = { term_inst_args.as_slice(&pos.func.dfg.value_lists).to_vec() }; + + match kind { + BranchOrderKind::BrnzToBrz(cond_arg) => { + pos.func + .dfg + .replace(term_inst) + .jump(cond_dest, &cond_args[1..]); + pos.func + .dfg + .replace(cond_inst) + .brz(cond_arg, term_dest, &term_args); + } + BranchOrderKind::BrzToBrnz(cond_arg) => { + pos.func + .dfg + .replace(term_inst) + .jump(cond_dest, &cond_args[1..]); + pos.func + .dfg + .replace(cond_inst) + .brnz(cond_arg, term_dest, &term_args); + } + BranchOrderKind::InvertIcmpCond(cond, x_arg, y_arg) => { + pos.func + .dfg + .replace(term_inst) + .jump(cond_dest, &cond_args[2..]); + pos.func.dfg.replace(cond_inst).br_icmp( + cond.inverse(), + x_arg, + y_arg, + term_dest, + &term_args, + ); + } + } + + cfg.recompute_block(pos.func, block); +} + +/// The main pre-opt pass. +pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { + let _tt = timing::preopt(); + let mut pos = FuncCursor::new(func); + let native_word_width = isa.pointer_bytes(); + while let Some(block) = pos.next_block() { + while let Some(inst) = pos.next_inst() { + // Apply basic simplifications. + simplify(&mut pos, inst, native_word_width as u32); + + // Try to transform divide-by-constant into simpler operations. + if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) { + do_divrem_transformation(&divrem_info, &mut pos, inst); + continue; + } + + branch_opt(&mut pos, inst); + branch_order(&mut pos, cfg, block, inst); + } + } +} diff --git a/cranelift/codegen/src/stack_layout.rs b/cranelift/codegen/src/stack_layout.rs new file mode 100644 index 0000000000..55f3eb864f --- /dev/null +++ b/cranelift/codegen/src/stack_layout.rs @@ -0,0 +1,241 @@ +//! Computing stack layout. + +use crate::ir::stackslot::{StackOffset, StackSize, StackSlotKind}; +use crate::ir::{StackLayoutInfo, StackSlots}; +use crate::result::{CodegenError, CodegenResult}; +use core::cmp::{max, min}; + +/// Compute the stack frame layout. +/// +/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit` +/// stack slots. +/// +/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the +/// function doesn't perform any call. +/// +/// Returns the total stack frame size which is also saved in `frame.frame_size`. +/// +/// If the stack frame is too big, returns an `ImplLimitExceeded` error. +pub fn layout_stack( + frame: &mut StackSlots, + is_leaf: bool, + alignment: StackSize, +) -> CodegenResult { + // Each object and the whole stack frame must fit in 2 GB such that any relative offset within + // the frame fits in a `StackOffset`. + let max_size = StackOffset::max_value() as StackSize; + debug_assert!(alignment.is_power_of_two() && alignment <= max_size); + + // We assume a stack that grows toward lower addresses as implemented by modern ISAs. The + // stack layout from high to low addresses will be: + // + // 1. incoming arguments. + // 2. spills + explicits + struct returns. + // 3. outgoing arguments. + // + // The incoming arguments can have both positive and negative offsets. A negative offset + // incoming arguments is usually the x86 return address pushed by the call instruction, but + // it can also be fixed stack slots pushed by an externally generated prologue. + // + // Both incoming and outgoing argument slots have fixed offsets that are treated as + // reserved zones by the layout algorithm. + // + // If a function only has incoming arguments and does not perform any calls, then it doesn't + // require the stack to be aligned. + + let mut incoming_min = 0; + let mut incoming_max = 0; + let mut outgoing_max = 0; + let mut min_align = alignment; + let mut must_align = !is_leaf; + + for slot in frame.values() { + if slot.size > max_size { + return Err(CodegenError::ImplLimitExceeded); + } + + match slot.kind { + StackSlotKind::IncomingArg => { + incoming_min = min(incoming_min, slot.offset.unwrap()); + incoming_max = max(incoming_max, slot.offset.unwrap() + slot.size as i32); + } + StackSlotKind::OutgoingArg => { + let offset = slot + .offset + .unwrap() + .checked_add(slot.size as StackOffset) + .ok_or(CodegenError::ImplLimitExceeded)?; + outgoing_max = max(outgoing_max, offset); + must_align = true; + } + StackSlotKind::StructReturnSlot + | StackSlotKind::SpillSlot + | StackSlotKind::ExplicitSlot + | StackSlotKind::EmergencySlot => { + // Determine the smallest alignment of any explicit or spill slot. + min_align = slot.alignment(min_align); + must_align = true; + } + } + } + + // Lay out spill slots, struct return slots, and explicit slots below the + // incoming arguments. The offset is negative, growing downwards. Start with + // the smallest alignments for better packing. + let mut offset = incoming_min; + debug_assert!(min_align.is_power_of_two()); + while min_align <= alignment { + for slot in frame.values_mut() { + // Pick out explicit and spill slots with exact alignment `min_align`. + match slot.kind { + StackSlotKind::SpillSlot + | StackSlotKind::StructReturnSlot + | StackSlotKind::ExplicitSlot + | StackSlotKind::EmergencySlot => { + if slot.alignment(alignment) != min_align { + continue; + } + } + StackSlotKind::IncomingArg | StackSlotKind::OutgoingArg => continue, + } + + offset = offset + .checked_sub(slot.size as StackOffset) + .ok_or(CodegenError::ImplLimitExceeded)?; + + // Aligning the negative offset can never cause overflow. We're only clearing bits. + offset &= -(min_align as StackOffset); + slot.offset = Some(offset); + } + + // Move on to the next higher alignment. + min_align *= 2; + } + + // Finally, make room for the outgoing arguments. + offset = offset + .checked_sub(outgoing_max) + .ok_or(CodegenError::ImplLimitExceeded)?; + + if must_align { + offset &= -(alignment as StackOffset); + } + + // Set the computed layout information for the frame + let frame_size = (offset as StackSize).wrapping_neg(); + let inbound_args_size = incoming_max as u32; + frame.layout_info = Some(StackLayoutInfo { + frame_size, + inbound_args_size, + }); + + Ok(frame_size) +} + +#[cfg(test)] +mod tests { + use super::layout_stack; + use crate::ir::stackslot::StackOffset; + use crate::ir::types; + use crate::ir::{StackSlotData, StackSlotKind, StackSlots}; + use crate::result::CodegenError; + + #[test] + fn layout() { + let sss = &mut StackSlots::new(); + + // For all these test cases, assume it will call. + let is_leaf = false; + + // An empty layout should have 0-sized stack frame. + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); + + // Same for incoming arguments with non-negative offsets. + let in0 = sss.make_incoming_arg(types::I64, 0); + let in1 = sss.make_incoming_arg(types::I64, 8); + + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + + // Add some spill slots. + let ss0 = sss.make_spill_slot(types::I64); + let ss1 = sss.make_spill_slot(types::I32); + + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + assert_eq!(sss[ss0].offset, Some(-8)); + assert_eq!(sss[ss1].offset, Some(-12)); + + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + assert_eq!(sss[ss0].offset, Some(-16)); + assert_eq!(sss[ss1].offset, Some(-4)); + + // An incoming argument with negative offset counts towards the total frame size, but it + // should still pack nicely with the spill slots. + let in2 = sss.make_incoming_arg(types::I32, -4); + + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + assert_eq!(sss[in2].offset, Some(-4)); + assert_eq!(sss[ss0].offset, Some(-12)); + assert_eq!(sss[ss1].offset, Some(-16)); + + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + assert_eq!(sss[in2].offset, Some(-4)); + assert_eq!(sss[ss0].offset, Some(-16)); + assert_eq!(sss[ss1].offset, Some(-8)); + + // Finally, make sure there is room for the outgoing args. + let out0 = sss.get_outgoing_arg(types::I32, 0); + + assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + assert_eq!(sss[in2].offset, Some(-4)); + assert_eq!(sss[ss0].offset, Some(-12)); + assert_eq!(sss[ss1].offset, Some(-16)); + assert_eq!(sss[out0].offset, Some(0)); + + assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32)); + assert_eq!(sss[in0].offset, Some(0)); + assert_eq!(sss[in1].offset, Some(8)); + assert_eq!(sss[in2].offset, Some(-4)); + assert_eq!(sss[ss0].offset, Some(-16)); + assert_eq!(sss[ss1].offset, Some(-8)); + assert_eq!(sss[out0].offset, Some(0)); + + // Also test that an unsupported offset is rejected. + sss.get_outgoing_arg(types::I8, StackOffset::max_value() - 1); + assert_eq!( + layout_stack(sss, is_leaf, 1), + Err(CodegenError::ImplLimitExceeded) + ); + } + + #[test] + fn slot_kinds() { + let sss = &mut StackSlots::new(); + + // Add some slots of various kinds. + let ss0 = sss.make_spill_slot(types::I32); + let ss1 = sss.push(StackSlotData::new( + StackSlotKind::ExplicitSlot, + types::I32.bytes(), + )); + let ss2 = sss.get_emergency_slot(types::I32, &[]); + + assert_eq!(layout_stack(sss, true, 1), Ok(12)); + assert_eq!(sss[ss0].offset, Some(-4)); + assert_eq!(sss[ss1].offset, Some(-8)); + assert_eq!(sss[ss2].offset, Some(-12)); + } +} diff --git a/cranelift/codegen/src/timing.rs b/cranelift/codegen/src/timing.rs new file mode 100644 index 0000000000..3cfc67f79d --- /dev/null +++ b/cranelift/codegen/src/timing.rs @@ -0,0 +1,262 @@ +//! Pass timing. +//! +//! This modules provides facilities for timing the execution of individual compilation passes. + +use core::fmt; + +pub use self::details::{add_to_current, take_current, PassTimes, TimingToken}; + +// Each pass that can be timed is predefined with the `define_passes!` macro. Each pass has a +// snake_case name and a plain text description used when printing out the timing report. +// +// This macro defines: +// +// - A C-style enum containing all the pass names and a `None` variant. +// - A usize constant with the number of defined passes. +// - A const array of pass descriptions. +// - A public function per pass used to start the timing of that pass. +macro_rules! define_passes { + { $enum:ident, $num_passes:ident, $descriptions:ident; + $($pass:ident: $desc:expr,)+ + } => { + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + enum $enum { $($pass,)+ None} + + const $num_passes: usize = $enum::None as usize; + + const $descriptions: [&str; $num_passes] = [ $($desc),+ ]; + + $( + #[doc=$desc] + pub fn $pass() -> TimingToken { + details::start_pass($enum::$pass) + } + )+ + } +} + +// Pass definitions. +define_passes! { + Pass, NUM_PASSES, DESCRIPTIONS; + + process_file: "Processing test file", + parse_text: "Parsing textual Cranelift IR", + wasm_translate_module: "Translate WASM module", + wasm_translate_function: "Translate WASM function", + + verifier: "Verify Cranelift IR", + verify_cssa: "Verify CSSA", + verify_liveness: "Verify live ranges", + verify_locations: "Verify value locations", + verify_flags: "Verify CPU flags", + + compile: "Compilation passes", + flowgraph: "Control flow graph", + domtree: "Dominator tree", + loop_analysis: "Loop analysis", + postopt: "Post-legalization rewriting", + preopt: "Pre-legalization rewriting", + dce: "Dead code elimination", + legalize: "Legalization", + gvn: "Global value numbering", + licm: "Loop invariant code motion", + unreachable_code: "Remove unreachable blocks", + + regalloc: "Register allocation", + ra_liveness: "RA liveness analysis", + ra_cssa: "RA coalescing CSSA", + ra_spilling: "RA spilling", + ra_reload: "RA reloading", + ra_coloring: "RA coloring", + + prologue_epilogue: "Prologue/epilogue insertion", + shrink_instructions: "Instruction encoding shrinking", + relax_branches: "Branch relaxation", + binemit: "Binary machine code emission", + layout_renumber: "Layout full renumbering", + + canonicalize_nans: "Canonicalization of NaNs", +} + +impl Pass { + pub fn idx(self) -> usize { + self as usize + } +} + +impl fmt::Display for Pass { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match DESCRIPTIONS.get(self.idx()) { + Some(s) => f.write_str(s), + None => f.write_str(""), + } + } +} + +/// Implementation details. +/// +/// This whole module can be gated on a `cfg` feature to provide a dummy implementation for +/// performance-sensitive builds or restricted environments. The dummy implementation must provide +/// `TimingToken` and `PassTimes` types and `take_current`, `add_to_current`, and `start_pass` funcs +#[cfg(feature = "std")] +mod details { + use super::{Pass, DESCRIPTIONS, NUM_PASSES}; + use log::debug; + use std::cell::{Cell, RefCell}; + use std::fmt; + use std::mem; + use std::time::{Duration, Instant}; + + /// A timing token is responsible for timing the currently running pass. Timing starts when it + /// is created and ends when it is dropped. + /// + /// Multiple passes can be active at the same time, but they must be started and stopped in a + /// LIFO fashion. + pub struct TimingToken { + /// Start time for this pass. + start: Instant, + + // Pass being timed by this token. + pass: Pass, + + // The previously active pass which will be restored when this token is dropped. + prev: Pass, + } + + /// Accumulated timing information for a single pass. + #[derive(Default, Copy, Clone)] + struct PassTime { + /// Total time spent running this pass including children. + total: Duration, + + /// Time spent running in child passes. + child: Duration, + } + + /// Accumulated timing for all passes. + pub struct PassTimes { + pass: [PassTime; NUM_PASSES], + } + + impl Default for PassTimes { + fn default() -> Self { + Self { + pass: [Default::default(); NUM_PASSES], + } + } + } + + impl fmt::Display for PassTimes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "======== ======== ==================================")?; + writeln!(f, " Total Self Pass")?; + writeln!(f, "-------- -------- ----------------------------------")?; + for (time, desc) in self.pass.iter().zip(&DESCRIPTIONS[..]) { + // Omit passes that haven't run. + if time.total == Duration::default() { + continue; + } + + // Write a duration as secs.millis, trailing space. + fn fmtdur(mut dur: Duration, f: &mut fmt::Formatter) -> fmt::Result { + // Round to nearest ms by adding 500us. + dur += Duration::new(0, 500_000); + let ms = dur.subsec_millis(); + write!(f, "{:4}.{:03} ", dur.as_secs(), ms) + } + + fmtdur(time.total, f)?; + if let Some(s) = time.total.checked_sub(time.child) { + fmtdur(s, f)?; + } + writeln!(f, " {}", desc)?; + } + writeln!(f, "======== ======== ==================================") + } + } + + // Information about passes in a single thread. + thread_local! { + static CURRENT_PASS: Cell = Cell::new(Pass::None); + static PASS_TIME: RefCell = RefCell::new(Default::default()); + } + + /// Start timing `pass` as a child of the currently running pass, if any. + /// + /// This function is called by the publicly exposed pass functions. + pub(super) fn start_pass(pass: Pass) -> TimingToken { + let prev = CURRENT_PASS.with(|p| p.replace(pass)); + debug!("timing: Starting {}, (during {})", pass, prev); + TimingToken { + start: Instant::now(), + pass, + prev, + } + } + + /// Dropping a timing token indicated the end of the pass. + impl Drop for TimingToken { + fn drop(&mut self) { + let duration = self.start.elapsed(); + debug!("timing: Ending {}", self.pass); + let old_cur = CURRENT_PASS.with(|p| p.replace(self.prev)); + debug_assert_eq!(self.pass, old_cur, "Timing tokens dropped out of order"); + PASS_TIME.with(|rc| { + let mut table = rc.borrow_mut(); + table.pass[self.pass.idx()].total += duration; + if let Some(parent) = table.pass.get_mut(self.prev.idx()) { + parent.child += duration; + } + }) + } + } + + /// Take the current accumulated pass timings and reset the timings for the current thread. + pub fn take_current() -> PassTimes { + PASS_TIME.with(|rc| mem::replace(&mut *rc.borrow_mut(), Default::default())) + } + + /// Add `timings` to the accumulated timings for the current thread. + pub fn add_to_current(times: &PassTimes) { + PASS_TIME.with(|rc| { + for (a, b) in rc.borrow_mut().pass.iter_mut().zip(×.pass[..]) { + a.total += b.total; + a.child += b.child; + } + }) + } +} + +/// Dummy `debug` implementation +#[cfg(not(feature = "std"))] +mod details { + use super::Pass; + /// Dummy `TimingToken` + pub struct TimingToken; + /// Dummy `PassTimes` + pub struct PassTimes; + /// Returns dummy `PassTimes` + pub fn take_current() -> PassTimes { + PassTimes + } + /// does nothing + pub fn add_to_current(_times: PassTimes) {} + + /// does nothing + pub(super) fn start_pass(_pass: Pass) -> TimingToken { + TimingToken + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + #[test] + fn display() { + assert_eq!(Pass::None.to_string(), ""); + assert_eq!(Pass::regalloc.to_string(), "Register allocation"); + } +} diff --git a/cranelift/codegen/src/topo_order.rs b/cranelift/codegen/src/topo_order.rs new file mode 100644 index 0000000000..8d38e4f324 --- /dev/null +++ b/cranelift/codegen/src/topo_order.rs @@ -0,0 +1,138 @@ +//! Topological order of blocks, according to the dominator tree. + +use crate::dominator_tree::DominatorTree; +use crate::entity::EntitySet; +use crate::ir::{Block, Layout}; +use alloc::vec::Vec; + +/// Present blocks in a topological order such that all dominating blocks are guaranteed to be visited +/// before the current block. +/// +/// There are many topological orders of the blocks in a function, so it is possible to provide a +/// preferred order, and the `TopoOrder` will present blocks in an order that is as close as possible +/// to the preferred order. +pub struct TopoOrder { + /// Preferred order of blocks to visit. + preferred: Vec, + + /// Next entry to get from `preferred`. + next: usize, + + /// Set of visited blocks. + visited: EntitySet, + + /// Stack of blocks to be visited next, already in `visited`. + stack: Vec, +} + +impl TopoOrder { + /// Create a new empty topological order. + pub fn new() -> Self { + Self { + preferred: Vec::new(), + next: 0, + visited: EntitySet::new(), + stack: Vec::new(), + } + } + + /// Clear all data structures in this topological order. + pub fn clear(&mut self) { + self.preferred.clear(); + self.next = 0; + self.visited.clear(); + self.stack.clear(); + } + + /// Reset and initialize with a preferred sequence of blocks. The resulting topological order is + /// guaranteed to contain all of the blocks in `preferred` as well as any dominators. + pub fn reset(&mut self, preferred: Blocks) + where + Blocks: IntoIterator, + { + self.preferred.clear(); + self.preferred.extend(preferred); + self.next = 0; + self.visited.clear(); + self.stack.clear(); + } + + /// Get the next block in the topological order. + /// + /// Two things are guaranteed about the blocks returned by this function: + /// + /// - All blocks in the `preferred` iterator given to `reset` will be returned. + /// - All dominators are visited before the block returned. + pub fn next(&mut self, layout: &Layout, domtree: &DominatorTree) -> Option { + self.visited.resize(layout.block_capacity()); + // Any entries in `stack` should be returned immediately. They have already been added to + // `visited`. + while self.stack.is_empty() { + match self.preferred.get(self.next).cloned() { + None => return None, + Some(mut block) => { + // We have the next block in the preferred order. + self.next += 1; + // Push it along with any non-visited dominators. + while self.visited.insert(block) { + self.stack.push(block); + match domtree.idom(block) { + Some(idom) => { + block = layout.inst_block(idom).expect("idom not in layout") + } + None => break, + } + } + } + } + } + self.stack.pop() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::dominator_tree::DominatorTree; + use crate::flowgraph::ControlFlowGraph; + use crate::ir::{Function, InstBuilder}; + use core::iter; + + #[test] + fn empty() { + let func = Function::new(); + let cfg = ControlFlowGraph::with_function(&func); + let domtree = DominatorTree::with_function(&func, &cfg); + let mut topo = TopoOrder::new(); + + assert_eq!(topo.next(&func.layout, &domtree), None); + topo.reset(func.layout.blocks()); + assert_eq!(topo.next(&func.layout, &domtree), None); + } + + #[test] + fn simple() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + + { + let mut cur = FuncCursor::new(&mut func); + + cur.insert_block(block0); + cur.ins().jump(block1, &[]); + cur.insert_block(block1); + cur.ins().jump(block1, &[]); + } + + let cfg = ControlFlowGraph::with_function(&func); + let domtree = DominatorTree::with_function(&func, &cfg); + let mut topo = TopoOrder::new(); + + topo.reset(iter::once(block1)); + assert_eq!(topo.next(&func.layout, &domtree), Some(block0)); + assert_eq!(topo.next(&func.layout, &domtree), Some(block1)); + assert_eq!(topo.next(&func.layout, &domtree), None); + } +} diff --git a/cranelift/codegen/src/unreachable_code.rs b/cranelift/codegen/src/unreachable_code.rs new file mode 100644 index 0000000000..63e3e230f8 --- /dev/null +++ b/cranelift/codegen/src/unreachable_code.rs @@ -0,0 +1,46 @@ +//! Unreachable code elimination. + +use crate::cursor::{Cursor, FuncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir; +use crate::timing; +use log::debug; + +/// Eliminate unreachable code. +/// +/// This pass deletes whole blocks that can't be reached from the entry block. It does not delete +/// individual instructions whose results are unused. +/// +/// The reachability analysis is performed by the dominator tree analysis. +pub fn eliminate_unreachable_code( + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + domtree: &DominatorTree, +) { + let _tt = timing::unreachable_code(); + let mut pos = FuncCursor::new(func); + while let Some(block) = pos.next_block() { + if domtree.is_reachable(block) { + continue; + } + + debug!("Eliminating unreachable {}", block); + // Move the cursor out of the way and make sure the next lop iteration goes to the right + // block. + pos.prev_block(); + + // Remove all instructions from `block`. + while let Some(inst) = pos.func.layout.first_inst(block) { + debug!(" - {}", pos.func.dfg.display_inst(inst, None)); + pos.func.layout.remove_inst(inst); + } + + // Once the block is completely empty, we can update the CFG which removes it from any + // predecessor lists. + cfg.recompute_block(pos.func, block); + + // Finally, remove the block from the layout. + pos.func.layout.remove_block(block); + } +} diff --git a/cranelift/codegen/src/value_label.rs b/cranelift/codegen/src/value_label.rs new file mode 100644 index 0000000000..94e5c58171 --- /dev/null +++ b/cranelift/codegen/src/value_label.rs @@ -0,0 +1,275 @@ +use crate::ir::{Function, SourceLoc, Value, ValueLabel, ValueLabelAssignments, ValueLoc}; +use crate::isa::TargetIsa; +use crate::regalloc::{Context, RegDiversions}; +use crate::HashMap; +use alloc::collections::BTreeMap; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::iter::Iterator; +use core::ops::Bound::*; +use core::ops::Deref; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// Value location range. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct ValueLocRange { + /// The ValueLoc containing a ValueLabel during this range. + pub loc: ValueLoc, + /// The start of the range. + pub start: u32, + /// The end of the range. + pub end: u32, +} + +/// Resulting map of Value labels and their ranges/locations. +pub type ValueLabelsRanges = HashMap>; + +fn build_value_labels_index(func: &Function) -> BTreeMap +where + T: From + Deref + Ord + Copy, +{ + if func.dfg.values_labels.is_none() { + return BTreeMap::new(); + } + let values_labels = func.dfg.values_labels.as_ref().unwrap(); + + // Index values_labels by srcloc/from + let mut sorted = BTreeMap::new(); + for (val, assigns) in values_labels { + match assigns { + ValueLabelAssignments::Starts(labels) => { + for label in labels { + if label.from.is_default() { + continue; + } + let srcloc = T::from(label.from); + let label = label.label; + sorted.insert(srcloc, (*val, label)); + } + } + ValueLabelAssignments::Alias { from, value } => { + if from.is_default() { + continue; + } + let mut aliased_value = *value; + while let Some(ValueLabelAssignments::Alias { value, .. }) = + values_labels.get(&aliased_value) + { + // TODO check/limit recursion? + aliased_value = *value; + } + let from = T::from(*from); + if let Some(ValueLabelAssignments::Starts(labels)) = + values_labels.get(&aliased_value) + { + for label in labels { + let srcloc = if label.from.is_default() { + from + } else { + from.max(T::from(label.from)) + }; + let label = label.label; + sorted.insert(srcloc, (*val, label)); + } + } + } + } + } + sorted +} + +/// Builds ranges and location for specified value labels. +/// The labels specified at DataFlowGraph's values_labels collection. +pub fn build_value_labels_ranges( + func: &Function, + regalloc: &Context, + isa: &dyn TargetIsa, +) -> ValueLabelsRanges +where + T: From + Deref + Ord + Copy, +{ + let values_labels = build_value_labels_index::(func); + + let mut blocks = func.layout.blocks().collect::>(); + blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase + let encinfo = isa.encoding_info(); + let values_locations = &func.locations; + let liveness_ranges = regalloc.liveness().ranges(); + + let mut ranges = HashMap::new(); + let mut add_range = |label, range: (u32, u32), loc: ValueLoc| { + if range.0 >= range.1 || !loc.is_assigned() { + return; + } + ranges + .entry(label) + .or_insert_with(Vec::new) + .push(ValueLocRange { + loc, + start: range.0, + end: range.1, + }); + }; + + let mut end_offset = 0; + let mut tracked_values: Vec<(Value, ValueLabel, u32, ValueLoc)> = Vec::new(); + let mut divert = RegDiversions::new(); + for block in blocks { + divert.at_block(&func.entry_diversions, block); + let mut last_srcloc: Option = None; + for (offset, inst, size) in func.inst_offsets(block, &encinfo) { + divert.apply(&func.dfg[inst]); + end_offset = offset + size; + // Remove killed values. + tracked_values.retain(|(x, label, start_offset, last_loc)| { + let range = liveness_ranges.get(*x); + if range.expect("value").killed_at(inst, block, &func.layout) { + add_range(*label, (*start_offset, end_offset), *last_loc); + return false; + } + true + }); + + let srcloc = func.srclocs[inst]; + if srcloc.is_default() { + // Don't process instructions without srcloc. + continue; + } + let srcloc = T::from(srcloc); + + // Record and restart ranges if Value location was changed. + for (val, label, start_offset, last_loc) in &mut tracked_values { + let new_loc = divert.get(*val, values_locations); + if new_loc == *last_loc { + continue; + } + add_range(*label, (*start_offset, end_offset), *last_loc); + *start_offset = end_offset; + *last_loc = new_loc; + } + + // New source locations range started: abandon all tracked values. + if last_srcloc.is_some() && last_srcloc.unwrap() > srcloc { + for (_, label, start_offset, last_loc) in &tracked_values { + add_range(*label, (*start_offset, end_offset), *last_loc); + } + tracked_values.clear(); + last_srcloc = None; + } + + // Get non-processed Values based on srcloc + let range = ( + match last_srcloc { + Some(a) => Excluded(a), + None => Unbounded, + }, + Included(srcloc), + ); + let active_values = values_labels.range(range); + let active_values = active_values.filter(|(_, (v, _))| { + // Ignore dead/inactive Values. + let range = liveness_ranges.get(*v); + match range { + Some(r) => r.reaches_use(inst, block, &func.layout), + None => false, + } + }); + // Append new Values to the tracked_values. + for (_, (val, label)) in active_values { + let loc = divert.get(*val, values_locations); + tracked_values.push((*val, *label, end_offset, loc)); + } + + last_srcloc = Some(srcloc); + } + // Finish all started ranges. + for (_, label, start_offset, last_loc) in &tracked_values { + add_range(*label, (*start_offset, end_offset), *last_loc); + } + } + + // Optimize ranges in-place + for (_, label_ranges) in ranges.iter_mut() { + assert!(!label_ranges.is_empty()); + label_ranges.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end))); + + // Merge ranges + let mut i = 1; + let mut j = 0; + while i < label_ranges.len() { + assert!(label_ranges[j].start <= label_ranges[i].end); + if label_ranges[j].loc != label_ranges[i].loc { + // Different location + if label_ranges[j].end >= label_ranges[i].end { + // Consumed by previous range, skipping + i += 1; + continue; + } + j += 1; + label_ranges[j] = label_ranges[i]; + i += 1; + continue; + } + if label_ranges[j].end < label_ranges[i].start { + // Gap in the range location + j += 1; + label_ranges[j] = label_ranges[i]; + i += 1; + continue; + } + // Merge i-th and j-th ranges + if label_ranges[j].end < label_ranges[i].end { + label_ranges[j].end = label_ranges[i].end; + } + i += 1; + } + label_ranges.truncate(j + 1); + + // Cut/move start position of next range, if two neighbor ranges intersect. + for i in 0..j { + if label_ranges[i].end > label_ranges[i + 1].start { + label_ranges[i + 1].start = label_ranges[i].end; + assert!(label_ranges[i + 1].start < label_ranges[i + 1].end); + } + assert!(label_ranges[i].end <= label_ranges[i + 1].start); + } + } + ranges +} + +#[derive(Eq, Clone, Copy)] +pub struct ComparableSourceLoc(SourceLoc); + +impl From for ComparableSourceLoc { + fn from(s: SourceLoc) -> Self { + Self(s) + } +} + +impl Deref for ComparableSourceLoc { + type Target = SourceLoc; + fn deref(&self) -> &SourceLoc { + &self.0 + } +} + +impl PartialOrd for ComparableSourceLoc { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ComparableSourceLoc { + fn cmp(&self, other: &Self) -> Ordering { + self.0.bits().cmp(&other.0.bits()) + } +} + +impl PartialEq for ComparableSourceLoc { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} diff --git a/cranelift/codegen/src/verifier/cssa.rs b/cranelift/codegen/src/verifier/cssa.rs new file mode 100644 index 0000000000..f1ff72597a --- /dev/null +++ b/cranelift/codegen/src/verifier/cssa.rs @@ -0,0 +1,172 @@ +//! Verify conventional SSA form. + +use crate::dbg::DisplayList; +use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::{ExpandedProgramPoint, Function}; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::virtregs::VirtRegs; +use crate::timing; +use crate::verifier::{VerifierErrors, VerifierStepResult}; + +/// Verify conventional SSA form for `func`. +/// +/// Conventional SSA form is represented in Cranelift with the help of virtual registers: +/// +/// - Two values are said to be *PHI-related* if one is an block argument and the other is passed as +/// a branch argument in a location that matches the first value. +/// - PHI-related values must belong to the same virtual register. +/// - Two values in the same virtual register must not have overlapping live ranges. +/// +/// Additionally, we verify this property of virtual registers: +/// +/// - The values in a virtual register are topologically ordered w.r.t. dominance. +/// +/// We don't verify that virtual registers are minimal. Minimal CSSA is not required. +pub fn verify_cssa( + func: &Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + liveness: &Liveness, + virtregs: &VirtRegs, + errors: &mut VerifierErrors, +) -> VerifierStepResult<()> { + let _tt = timing::verify_cssa(); + + let mut preorder = DominatorTreePreorder::new(); + preorder.compute(domtree, &func.layout); + + let verifier = CssaVerifier { + func, + cfg, + domtree, + virtregs, + liveness, + preorder, + }; + verifier.check_virtregs(errors)?; + verifier.check_cssa(errors)?; + Ok(()) +} + +struct CssaVerifier<'a> { + func: &'a Function, + cfg: &'a ControlFlowGraph, + domtree: &'a DominatorTree, + virtregs: &'a VirtRegs, + liveness: &'a Liveness, + preorder: DominatorTreePreorder, +} + +impl<'a> CssaVerifier<'a> { + fn check_virtregs(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + for vreg in self.virtregs.all_virtregs() { + let values = self.virtregs.values(vreg); + + for (idx, &val) in values.iter().enumerate() { + if !self.func.dfg.value_is_valid(val) { + return errors.fatal((val, format!("Invalid value in {}", vreg))); + } + if !self.func.dfg.value_is_attached(val) { + return errors.fatal((val, format!("Detached value in {}", vreg))); + } + if self.liveness.get(val).is_none() { + return errors.fatal((val, format!("Value in {} has no live range", vreg))); + }; + + // Check topological ordering with the previous values in the virtual register. + let def: ExpandedProgramPoint = self.func.dfg.value_def(val).into(); + let def_block = self.func.layout.pp_block(def); + for &prev_val in &values[0..idx] { + let prev_def: ExpandedProgramPoint = self.func.dfg.value_def(prev_val).into(); + let prev_block = self.func.layout.pp_block(prev_def); + + if prev_def == def { + return errors.fatal(( + val, + format!( + "Values {} and {} in {} = {} defined at the same program point", + prev_val, + val, + vreg, + DisplayList(values) + ), + )); + } + + // Enforce topological ordering of defs in the virtual register. + if self.preorder.dominates(def_block, prev_block) + && self.domtree.dominates(def, prev_def, &self.func.layout) + { + return errors.fatal(( + val, + format!( + "Value in {} = {} def dominates previous {}", + vreg, + DisplayList(values), + prev_val + ), + )); + } + } + + // Knowing that values are in topo order, we can check for interference this + // way. + // We only have to check against the nearest dominating value. + for &prev_val in values[0..idx].iter().rev() { + let prev_def: ExpandedProgramPoint = self.func.dfg.value_def(prev_val).into(); + let prev_block = self.func.layout.pp_block(prev_def); + + if self.preorder.dominates(prev_block, def_block) + && self.domtree.dominates(prev_def, def, &self.func.layout) + { + if self.liveness[prev_val].overlaps_def(def, def_block, &self.func.layout) { + return errors.fatal(( + val, + format!( + "Value def in {} = {} interferes with {}", + vreg, + DisplayList(values), + prev_val + ), + )); + } else { + break; + } + } + } + } + } + + Ok(()) + } + + fn check_cssa(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + for block in self.func.layout.blocks() { + let block_params = self.func.dfg.block_params(block); + for BlockPredecessor { inst: pred, .. } in self.cfg.pred_iter(block) { + let pred_args = self.func.dfg.inst_variable_args(pred); + // This should have been caught by an earlier verifier pass. + assert_eq!( + block_params.len(), + pred_args.len(), + "Wrong arguments on branch." + ); + + for (&block_param, &pred_arg) in block_params.iter().zip(pred_args) { + if !self.virtregs.same_class(block_param, pred_arg) { + return errors.fatal(( + pred, + format!( + "{} and {} must be in the same virtual register", + block_param, pred_arg + ), + )); + } + } + } + } + + Ok(()) + } +} diff --git a/cranelift/codegen/src/verifier/flags.rs b/cranelift/codegen/src/verifier/flags.rs new file mode 100644 index 0000000000..1a20303d20 --- /dev/null +++ b/cranelift/codegen/src/verifier/flags.rs @@ -0,0 +1,180 @@ +//! Verify CPU flags values. + +use crate::entity::{EntitySet, SecondaryMap}; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir; +use crate::ir::instructions::BranchInfo; +use crate::isa; +use crate::packed_option::PackedOption; +use crate::timing; +use crate::verifier::{VerifierErrors, VerifierStepResult}; + +/// Verify that CPU flags are used correctly. +/// +/// The value types `iflags` and `fflags` represent CPU flags which usually live in a +/// special-purpose register, so they can't be used as freely as other value types that can live in +/// any register. +/// +/// We verify the following conditions: +/// +/// - At most one flags value can be live at a time. +/// - A flags value can not be live across an instruction that clobbers the flags. +/// +/// +pub fn verify_flags( + func: &ir::Function, + cfg: &ControlFlowGraph, + isa: Option<&dyn isa::TargetIsa>, + errors: &mut VerifierErrors, +) -> VerifierStepResult<()> { + let _tt = timing::verify_flags(); + let mut verifier = FlagsVerifier { + func, + cfg, + encinfo: isa.map(|isa| isa.encoding_info()), + livein: SecondaryMap::new(), + }; + verifier.check(errors) +} + +struct FlagsVerifier<'a> { + func: &'a ir::Function, + cfg: &'a ControlFlowGraph, + encinfo: Option, + + /// The single live-in flags value (if any) for each block. + livein: SecondaryMap>, +} + +impl<'a> FlagsVerifier<'a> { + fn check(&mut self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + // List of blocks that need to be processed. blocks may be re-added to this list when we detect + // that one of their successor blocks needs a live-in flags value. + let mut worklist = EntitySet::with_capacity(self.func.layout.block_capacity()); + for block in self.func.layout.blocks() { + worklist.insert(block); + } + + while let Some(block) = worklist.pop() { + if let Some(value) = self.visit_block(block, errors)? { + // The block has live-in flags. Check if the value changed. + match self.livein[block].expand() { + // Revisit any predecessor blocks the first time we see a live-in for `block`. + None => { + self.livein[block] = value.into(); + for BlockPredecessor { block: pred, .. } in self.cfg.pred_iter(block) { + worklist.insert(pred); + } + } + Some(old) if old != value => { + return errors.fatal(( + block, + format!("conflicting live-in CPU flags: {} and {}", old, value), + )); + } + x => assert_eq!(x, Some(value)), + } + } else { + // Existing live-in flags should never be able to disappear. + assert_eq!(self.livein[block].expand(), None); + } + } + + Ok(()) + } + + /// Check flags usage in `block` and return the live-in flags value, if any. + fn visit_block( + &self, + block: ir::Block, + errors: &mut VerifierErrors, + ) -> VerifierStepResult> { + // The single currently live flags value. + let mut live_val = None; + + // Visit instructions backwards so we can track liveness accurately. + for inst in self.func.layout.block_insts(block).rev() { + // Check if `inst` interferes with existing live flags. + if let Some(live) = live_val { + for &res in self.func.dfg.inst_results(inst) { + if res == live { + // We've reached the def of `live_flags`, so it is no longer live above. + live_val = None; + } else if self.func.dfg.value_type(res).is_flags() { + errors + .report((inst, format!("{} clobbers live CPU flags in {}", res, live))); + return Err(()); + } + } + + // Does the instruction have an encoding that clobbers the CPU flags? + if self + .encinfo + .as_ref() + .and_then(|ei| ei.operand_constraints(self.func.encodings[inst])) + .map_or(false, |c| c.clobbers_flags) + && live_val.is_some() + { + errors.report(( + inst, + format!("encoding clobbers live CPU flags in {}", live), + )); + return Err(()); + } + } + + // Now look for live ranges of CPU flags that end here. + for &arg in self.func.dfg.inst_args(inst) { + if self.func.dfg.value_type(arg).is_flags() { + merge(&mut live_val, arg, inst, errors)?; + } + } + + // Include live-in flags to successor blocks. + match self.func.dfg.analyze_branch(inst) { + BranchInfo::NotABranch => {} + BranchInfo::SingleDest(dest, _) => { + if let Some(val) = self.livein[dest].expand() { + merge(&mut live_val, val, inst, errors)?; + } + } + BranchInfo::Table(jt, dest) => { + if let Some(dest) = dest { + if let Some(val) = self.livein[dest].expand() { + merge(&mut live_val, val, inst, errors)?; + } + } + for dest in self.func.jump_tables[jt].iter() { + if let Some(val) = self.livein[*dest].expand() { + merge(&mut live_val, val, inst, errors)?; + } + } + } + } + } + + // Return the required live-in flags value. + Ok(live_val) + } +} + +// Merge live flags values, or return an error on conflicting values. +fn merge( + a: &mut Option, + b: ir::Value, + inst: ir::Inst, + errors: &mut VerifierErrors, +) -> VerifierStepResult<()> { + if let Some(va) = *a { + if b != va { + return errors.fatal(( + inst, + format!("conflicting live CPU flags: {} and {}", va, b), + )); + } + } else { + *a = Some(b); + } + + Ok(()) +} diff --git a/cranelift/codegen/src/verifier/liveness.rs b/cranelift/codegen/src/verifier/liveness.rs new file mode 100644 index 0000000000..921babc6a0 --- /dev/null +++ b/cranelift/codegen/src/verifier/liveness.rs @@ -0,0 +1,235 @@ +//! Liveness verifier. + +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir::entities::AnyEntity; +use crate::ir::{ExpandedProgramPoint, Function, ProgramPoint, Value}; +use crate::isa::TargetIsa; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::liverange::LiveRange; +use crate::timing; +use crate::verifier::{VerifierErrors, VerifierStepResult}; + +/// Verify liveness information for `func`. +/// +/// The provided control flow graph is assumed to be sound. +/// +/// - All values in the program must have a live range. +/// - The live range def point must match where the value is defined. +/// - The live range must reach all uses. +/// - When a live range is live-in to an block, it must be live at all the predecessors. +/// - The live range affinity must be compatible with encoding constraints. +/// +/// We don't verify that live ranges are minimal. This would require recomputing live ranges for +/// all values. +pub fn verify_liveness( + isa: &dyn TargetIsa, + func: &Function, + cfg: &ControlFlowGraph, + liveness: &Liveness, + errors: &mut VerifierErrors, +) -> VerifierStepResult<()> { + let _tt = timing::verify_liveness(); + let verifier = LivenessVerifier { + isa, + func, + cfg, + liveness, + }; + verifier.check_blocks(errors)?; + verifier.check_insts(errors)?; + Ok(()) +} + +struct LivenessVerifier<'a> { + isa: &'a dyn TargetIsa, + func: &'a Function, + cfg: &'a ControlFlowGraph, + liveness: &'a Liveness, +} + +impl<'a> LivenessVerifier<'a> { + /// Check all block arguments. + fn check_blocks(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + for block in self.func.layout.blocks() { + for &val in self.func.dfg.block_params(block) { + let lr = match self.liveness.get(val) { + Some(lr) => lr, + None => { + return errors + .fatal((block, format!("block arg {} has no live range", val))) + } + }; + self.check_lr(block.into(), val, lr, errors)?; + } + } + Ok(()) + } + + /// Check all instructions. + fn check_insts(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + for block in self.func.layout.blocks() { + for inst in self.func.layout.block_insts(block) { + let encoding = self.func.encodings[inst]; + + // Check the defs. + for &val in self.func.dfg.inst_results(inst) { + let lr = match self.liveness.get(val) { + Some(lr) => lr, + None => return errors.fatal((inst, format!("{} has no live range", val))), + }; + self.check_lr(inst.into(), val, lr, errors)?; + + if encoding.is_legal() { + // A legal instruction is not allowed to define ghost values. + if lr.affinity.is_unassigned() { + return errors.fatal(( + inst, + format!( + "{} is a ghost value defined by a real [{}] instruction", + val, + self.isa.encoding_info().display(encoding) + ), + )); + } + } else if !lr.affinity.is_unassigned() { + // A non-encoded instruction can only define ghost values. + return errors.fatal(( + inst, + format!( + "{} is a real {} value defined by a ghost instruction", + val, + lr.affinity.display(&self.isa.register_info()) + ), + )); + } + } + + // Check the uses. + for &val in self.func.dfg.inst_args(inst) { + let lr = match self.liveness.get(val) { + Some(lr) => lr, + None => return errors.fatal((inst, format!("{} has no live range", val))), + }; + + debug_assert!(self.func.layout.inst_block(inst).unwrap() == block); + if !lr.reaches_use(inst, block, &self.func.layout) { + return errors.fatal((inst, format!("{} is not live at this use", val))); + } + + // A legal instruction is not allowed to depend on ghost values. + if encoding.is_legal() && lr.affinity.is_unassigned() { + return errors.fatal(( + inst, + format!( + "{} is a ghost value used by a real [{}] instruction", + val, + self.isa.encoding_info().display(encoding), + ), + )); + } + } + } + } + Ok(()) + } + + /// Check the integrity of the live range `lr`. + fn check_lr( + &self, + def: ProgramPoint, + val: Value, + lr: &LiveRange, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let l = &self.func.layout; + + let loc: AnyEntity = match def.into() { + ExpandedProgramPoint::Block(e) => e.into(), + ExpandedProgramPoint::Inst(i) => i.into(), + }; + if lr.def() != def { + return errors.fatal(( + loc, + format!("Wrong live range def ({}) for {}", lr.def(), val), + )); + } + if lr.is_dead() { + if !lr.is_local() { + return errors.fatal((loc, format!("Dead live range {} should be local", val))); + } else { + return Ok(()); + } + } + let def_block = match def.into() { + ExpandedProgramPoint::Block(e) => e, + ExpandedProgramPoint::Inst(i) => l.inst_block(i).unwrap(), + }; + match lr.def_local_end().into() { + ExpandedProgramPoint::Block(e) => { + return errors.fatal(( + loc, + format!("Def local range for {} can't end at {}", val, e), + )); + } + ExpandedProgramPoint::Inst(i) => { + if self.func.layout.inst_block(i) != Some(def_block) { + return errors + .fatal((loc, format!("Def local end for {} in wrong block", val))); + } + } + } + + // Now check the live-in intervals against the CFG. + for (mut block, end) in lr.liveins() { + if !l.is_block_inserted(block) { + return errors.fatal(( + loc, + format!("{} livein at {} which is not in the layout", val, block), + )); + } + let end_block = match l.inst_block(end) { + Some(e) => e, + None => { + return errors.fatal(( + loc, + format!( + "{} livein for {} ends at {} which is not in the layout", + val, block, end + ), + )); + } + }; + + // Check all the blocks in the interval independently. + loop { + // If `val` is live-in at `block`, it must be live at all the predecessors. + for BlockPredecessor { inst: pred, block } in self.cfg.pred_iter(block) { + if !lr.reaches_use(pred, block, &self.func.layout) { + return errors.fatal(( + pred, + format!( + "{} is live in to {} but not live at predecessor", + val, block + ), + )); + } + } + + if block == end_block { + break; + } + block = match l.next_block(block) { + Some(e) => e, + None => { + return errors.fatal(( + loc, + format!("end of {} livein ({}) never reached", val, end_block), + )); + } + }; + } + } + + Ok(()) + } +} diff --git a/cranelift/codegen/src/verifier/locations.rs b/cranelift/codegen/src/verifier/locations.rs new file mode 100644 index 0000000000..287413a412 --- /dev/null +++ b/cranelift/codegen/src/verifier/locations.rs @@ -0,0 +1,399 @@ +//! Verify value locations. + +use crate::flowgraph::ControlFlowGraph; +use crate::ir; +use crate::isa; +use crate::regalloc::liveness::Liveness; +use crate::regalloc::RegDiversions; +use crate::timing; +use crate::verifier::{VerifierErrors, VerifierStepResult}; + +/// Verify value locations for `func`. +/// +/// After register allocation, every value must be assigned to a location - either a register or a +/// stack slot. These locations must be compatible with the constraints described by the +/// instruction encoding recipes. +/// +/// Values can be temporarily diverted to a different location by using the `regmove`, `regspill`, +/// and `regfill` instructions, but only inside an block. +/// +/// If a liveness analysis is provided, it is used to verify that there are no active register +/// diversions across control flow edges. +pub fn verify_locations( + isa: &dyn isa::TargetIsa, + func: &ir::Function, + cfg: &ControlFlowGraph, + liveness: Option<&Liveness>, + errors: &mut VerifierErrors, +) -> VerifierStepResult<()> { + let _tt = timing::verify_locations(); + let verifier = LocationVerifier { + isa, + func, + reginfo: isa.register_info(), + encinfo: isa.encoding_info(), + cfg, + liveness, + }; + verifier.check_constraints(errors)?; + Ok(()) +} + +struct LocationVerifier<'a> { + isa: &'a dyn isa::TargetIsa, + func: &'a ir::Function, + reginfo: isa::RegInfo, + encinfo: isa::EncInfo, + cfg: &'a ControlFlowGraph, + liveness: Option<&'a Liveness>, +} + +impl<'a> LocationVerifier<'a> { + /// Check that the assigned value locations match the operand constraints of their uses. + fn check_constraints(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + let dfg = &self.func.dfg; + let mut divert = RegDiversions::new(); + + for block in self.func.layout.blocks() { + divert.at_block(&self.func.entry_diversions, block); + + let mut is_after_branch = false; + for inst in self.func.layout.block_insts(block) { + let enc = self.func.encodings[inst]; + + if enc.is_legal() { + self.check_enc_constraints(inst, enc, &divert, errors)? + } else { + self.check_ghost_results(inst, errors)?; + } + + if let Some(sig) = dfg.call_signature(inst) { + self.check_call_abi(inst, sig, &divert, errors)?; + } + + let opcode = dfg[inst].opcode(); + if opcode.is_return() { + self.check_return_abi(inst, &divert, errors)?; + } else if opcode.is_branch() && !divert.is_empty() { + self.check_cfg_edges(inst, &mut divert, is_after_branch, errors)?; + } + + self.update_diversions(inst, &mut divert, errors)?; + is_after_branch = opcode.is_branch(); + } + } + + Ok(()) + } + + /// Check encoding constraints against the current value locations. + fn check_enc_constraints( + &self, + inst: ir::Inst, + enc: isa::Encoding, + divert: &RegDiversions, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let constraints = self + .encinfo + .operand_constraints(enc) + .expect("check_enc_constraints requires a legal encoding"); + + if constraints.satisfied(inst, divert, self.func) { + return Ok(()); + } + + // TODO: We could give a better error message here. + errors.fatal(( + inst, + format!( + "{} constraints not satisfied in: {}\n{}", + self.encinfo.display(enc), + self.func.dfg.display_inst(inst, self.isa), + self.func.display(self.isa), + ), + )) + } + + /// Check that the result values produced by a ghost instruction are not assigned a value + /// location. + fn check_ghost_results( + &self, + inst: ir::Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let results = self.func.dfg.inst_results(inst); + + for &res in results { + let loc = self.func.locations[res]; + if loc.is_assigned() { + return errors.fatal(( + inst, + format!( + "ghost result {} value must not have a location ({}).", + res, + loc.display(&self.reginfo) + ), + )); + } + } + + Ok(()) + } + + /// Check the ABI argument and result locations for a call. + fn check_call_abi( + &self, + inst: ir::Inst, + sig: ir::SigRef, + divert: &RegDiversions, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let sig = &self.func.dfg.signatures[sig]; + let varargs = self.func.dfg.inst_variable_args(inst); + let results = self.func.dfg.inst_results(inst); + + for (abi, &value) in sig.params.iter().zip(varargs) { + self.check_abi_location( + inst, + value, + abi, + divert.get(value, &self.func.locations), + ir::StackSlotKind::OutgoingArg, + errors, + )?; + } + + for (abi, &value) in sig.returns.iter().zip(results) { + self.check_abi_location( + inst, + value, + abi, + self.func.locations[value], + ir::StackSlotKind::OutgoingArg, + errors, + )?; + } + + Ok(()) + } + + /// Check the ABI argument locations for a return. + fn check_return_abi( + &self, + inst: ir::Inst, + divert: &RegDiversions, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let sig = &self.func.signature; + let varargs = self.func.dfg.inst_variable_args(inst); + + for (abi, &value) in sig.returns.iter().zip(varargs) { + self.check_abi_location( + inst, + value, + abi, + divert.get(value, &self.func.locations), + ir::StackSlotKind::IncomingArg, + errors, + )?; + } + + Ok(()) + } + + /// Check a single ABI location. + fn check_abi_location( + &self, + inst: ir::Inst, + value: ir::Value, + abi: &ir::AbiParam, + loc: ir::ValueLoc, + want_kind: ir::StackSlotKind, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + match abi.location { + ir::ArgumentLoc::Unassigned => {} + ir::ArgumentLoc::Reg(reg) => { + if loc != ir::ValueLoc::Reg(reg) { + return errors.fatal(( + inst, + format!( + "ABI expects {} in {}, got {}", + value, + abi.location.display(&self.reginfo), + loc.display(&self.reginfo), + ), + )); + } + } + ir::ArgumentLoc::Stack(offset) => { + if let ir::ValueLoc::Stack(ss) = loc { + let slot = &self.func.stack_slots[ss]; + if slot.kind != want_kind { + return errors.fatal(( + inst, + format!( + "call argument {} should be in a {} slot, but {} is {}", + value, want_kind, ss, slot.kind + ), + )); + } + if slot.offset.unwrap() != offset { + return errors.fatal(( + inst, + format!( + "ABI expects {} at stack offset {}, but {} is at {}", + value, + offset, + ss, + slot.offset.unwrap() + ), + )); + } + } else { + return errors.fatal(( + inst, + format!( + "ABI expects {} at stack offset {}, got {}", + value, + offset, + loc.display(&self.reginfo) + ), + )); + } + } + } + + Ok(()) + } + + /// Update diversions to reflect the current instruction and check their consistency. + fn update_diversions( + &self, + inst: ir::Inst, + divert: &mut RegDiversions, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let (arg, src) = match self.func.dfg[inst] { + ir::InstructionData::RegMove { arg, src, .. } + | ir::InstructionData::RegSpill { arg, src, .. } => (arg, ir::ValueLoc::Reg(src)), + ir::InstructionData::RegFill { arg, src, .. } => (arg, ir::ValueLoc::Stack(src)), + _ => return Ok(()), + }; + + if let Some(d) = divert.diversion(arg) { + if d.to != src { + return errors.fatal(( + inst, + format!( + "inconsistent with current diversion to {}", + d.to.display(&self.reginfo) + ), + )); + } + } else if self.func.locations[arg] != src { + return errors.fatal(( + inst, + format!( + "inconsistent with global location {} ({})", + self.func.locations[arg].display(&self.reginfo), + self.func.dfg.display_inst(inst, None) + ), + )); + } + + divert.apply(&self.func.dfg[inst]); + + Ok(()) + } + + /// We have active diversions before a branch. Make sure none of the diverted values are live + /// on the outgoing CFG edges. + fn check_cfg_edges( + &self, + inst: ir::Inst, + divert: &mut RegDiversions, + is_after_branch: bool, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + use crate::ir::instructions::BranchInfo::*; + let dfg = &self.func.dfg; + let branch_kind = dfg.analyze_branch(inst); + + // We can only check CFG edges if we have a liveness analysis. + let liveness = match self.liveness { + Some(l) => l, + None => return Ok(()), + }; + + match branch_kind { + NotABranch => panic!( + "No branch information for {}", + dfg.display_inst(inst, self.isa) + ), + SingleDest(block, _) => { + let unique_predecessor = self.cfg.pred_iter(block).count() == 1; + let mut val_to_remove = vec![]; + for (&value, d) in divert.iter() { + let lr = &liveness[value]; + if is_after_branch && unique_predecessor { + // Forward diversions based on the targeted branch. + if !lr.is_livein(block, &self.func.layout) { + val_to_remove.push(value) + } + } else if lr.is_livein(block, &self.func.layout) { + return errors.fatal(( + inst, + format!( + "SingleDest: {} is diverted to {} and live in to {}", + value, + d.to.display(&self.reginfo), + block, + ), + )); + } + } + if is_after_branch && unique_predecessor { + for val in val_to_remove.into_iter() { + divert.remove(val); + } + debug_assert!(divert.check_block_entry(&self.func.entry_diversions, block)); + } + } + Table(jt, block) => { + for (&value, d) in divert.iter() { + let lr = &liveness[value]; + if let Some(block) = block { + if lr.is_livein(block, &self.func.layout) { + return errors.fatal(( + inst, + format!( + "Table.default: {} is diverted to {} and live in to {}", + value, + d.to.display(&self.reginfo), + block, + ), + )); + } + } + for block in self.func.jump_tables[jt].iter() { + if lr.is_livein(*block, &self.func.layout) { + return errors.fatal(( + inst, + format!( + "Table.case: {} is diverted to {} and live in to {}", + value, + d.to.display(&self.reginfo), + block, + ), + )); + } + } + } + } + } + + Ok(()) + } +} diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs new file mode 100644 index 0000000000..58dbe259c8 --- /dev/null +++ b/cranelift/codegen/src/verifier/mod.rs @@ -0,0 +1,2139 @@ +//! A verifier for ensuring that functions are well formed. +//! It verifies: +//! +//! block integrity +//! +//! - All instructions reached from the `block_insts` iterator must belong to +//! the block as reported by `inst_block()`. +//! - Every block must end in a terminator instruction, and no other instruction +//! can be a terminator. +//! - Every value in the `block_params` iterator belongs to the block as reported by `value_block`. +//! +//! Instruction integrity +//! +//! - The instruction format must match the opcode. +//! - All result values must be created for multi-valued instructions. +//! - All referenced entities must exist. (Values, blocks, stack slots, ...) +//! - Instructions must not reference (eg. branch to) the entry block. +//! +//! SSA form +//! +//! - Values must be defined by an instruction that exists and that is inserted in +//! an block, or be an argument of an existing block. +//! - Values used by an instruction must dominate the instruction. +//! +//! Control flow graph and dominator tree integrity: +//! +//! - All predecessors in the CFG must be branches to the block. +//! - All branches to an block must be present in the CFG. +//! - A recomputed dominator tree is identical to the existing one. +//! +//! Type checking +//! +//! - Compare input and output values against the opcode's type constraints. +//! For polymorphic opcodes, determine the controlling type variable first. +//! - Branches and jumps must pass arguments to destination blocks that match the +//! expected types exactly. The number of arguments must match. +//! - All blocks in a jump table must take no arguments. +//! - Function calls are type checked against their signature. +//! - The entry block must take arguments that match the signature of the current +//! function. +//! - All return instructions must have return value operands matching the current +//! function signature. +//! +//! Global values +//! +//! - Detect cycles in global values. +//! - Detect use of 'vmctx' global value when no corresponding parameter is defined. +//! +//! TODO: +//! Ad hoc checking +//! +//! - Stack slot loads and stores must be in-bounds. +//! - Immediate constraints for certain opcodes, like `udiv_imm v3, 0`. +//! - `Insertlane` and `extractlane` instructions have immediate lane numbers that must be in +//! range for their polymorphic type. +//! - Swizzle and shuffle instructions take a variable number of lane arguments. The number +//! of arguments must match the destination type, and the lane indexes must be in range. + +use self::flags::verify_flags; +use crate::dbg::DisplayList; +use crate::dominator_tree::DominatorTree; +use crate::entity::SparseSet; +use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; +use crate::ir; +use crate::ir::entities::AnyEntity; +use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint}; +use crate::ir::{ + types, ArgumentLoc, Block, FuncRef, Function, GlobalValue, Inst, InstructionData, JumpTable, + Opcode, SigRef, StackSlot, StackSlotKind, Type, Value, ValueDef, ValueList, ValueLoc, +}; +use crate::isa::TargetIsa; +use crate::iterators::IteratorExtras; +use crate::print_errors::pretty_verifier_error; +use crate::settings::FlagsOrIsa; +use crate::timing; +use alloc::collections::BTreeSet; +use alloc::string::{String, ToString}; +use alloc::vec::Vec; +use core::cmp::Ordering; +use core::fmt::{self, Display, Formatter, Write}; +use log::debug; +use thiserror::Error; + +pub use self::cssa::verify_cssa; +pub use self::liveness::verify_liveness; +pub use self::locations::verify_locations; + +mod cssa; +mod flags; +mod liveness; +mod locations; + +/// A verifier error. +#[derive(Error, Debug, PartialEq, Eq, Clone)] +#[error("{}{}: {}", .location, format_context(.context), .message)] +pub struct VerifierError { + /// The entity causing the verifier error. + pub location: AnyEntity, + /// Optionally provide some context for the given location; e.g., for `inst42` provide + /// `Some("v3 = iconst.i32 0")` for more comprehensible errors. + pub context: Option, + /// The error message. + pub message: String, +} + +/// Helper for formatting Verifier::Error context. +fn format_context(context: &Option) -> String { + match context { + None => "".to_string(), + Some(c) => format!(" ({})", c), + } +} + +/// Convenience converter for making error-reporting less verbose. +/// +/// Converts a tuple of `(location, context, message)` to a `VerifierError`. +/// ``` +/// use cranelift_codegen::verifier::VerifierErrors; +/// use cranelift_codegen::ir::Inst; +/// let mut errors = VerifierErrors::new(); +/// errors.report((Inst::from_u32(42), "v3 = iadd v1, v2", "iadd cannot be used with values of this type")); +/// // note the double parenthenses to use this syntax +/// ``` +impl From<(L, C, M)> for VerifierError +where + L: Into, + C: Into, + M: Into, +{ + fn from(items: (L, C, M)) -> Self { + let (location, context, message) = items; + Self { + location: location.into(), + context: Some(context.into()), + message: message.into(), + } + } +} + +/// Convenience converter for making error-reporting less verbose. +/// +/// Same as above but without `context`. +impl From<(L, M)> for VerifierError +where + L: Into, + M: Into, +{ + fn from(items: (L, M)) -> Self { + let (location, message) = items; + Self { + location: location.into(), + context: None, + message: message.into(), + } + } +} + +/// Result of a step in the verification process. +/// +/// Functions that return `VerifierStepResult<()>` should also take a +/// mutable reference to `VerifierErrors` as argument in order to report +/// errors. +/// +/// Here, `Ok` represents a step that **did not lead to a fatal error**, +/// meaning that the verification process may continue. However, other (non-fatal) +/// errors might have been reported through the previously mentioned `VerifierErrors` +/// argument. +pub type VerifierStepResult = Result; + +/// Result of a verification operation. +/// +/// Unlike `VerifierStepResult<()>` which may be `Ok` while still having reported +/// errors, this type always returns `Err` if an error (fatal or not) was reported. +pub type VerifierResult = Result; + +/// List of verifier errors. +#[derive(Error, Debug, Default, PartialEq, Eq, Clone)] +pub struct VerifierErrors(pub Vec); + +impl VerifierErrors { + /// Return a new `VerifierErrors` struct. + #[inline] + pub fn new() -> Self { + Self(Vec::new()) + } + + /// Return whether no errors were reported. + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Return whether one or more errors were reported. + #[inline] + pub fn has_error(&self) -> bool { + !self.0.is_empty() + } + + /// Return a `VerifierStepResult` that is fatal if at least one error was reported, + /// and non-fatal otherwise. + #[inline] + pub fn as_result(&self) -> VerifierStepResult<()> { + if self.is_empty() { + Ok(()) + } else { + Err(()) + } + } + + /// Report an error, adding it to the list of errors. + pub fn report(&mut self, error: impl Into) { + self.0.push(error.into()); + } + + /// Report a fatal error and return `Err`. + pub fn fatal(&mut self, error: impl Into) -> VerifierStepResult<()> { + self.report(error); + Err(()) + } + + /// Report a non-fatal error and return `Ok`. + pub fn nonfatal(&mut self, error: impl Into) -> VerifierStepResult<()> { + self.report(error); + Ok(()) + } +} + +impl From> for VerifierErrors { + fn from(v: Vec) -> Self { + Self(v) + } +} + +impl Into> for VerifierErrors { + fn into(self) -> Vec { + self.0 + } +} + +impl Into> for VerifierErrors { + fn into(self) -> VerifierResult<()> { + if self.is_empty() { + Ok(()) + } else { + Err(self) + } + } +} + +impl Display for VerifierErrors { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + for err in &self.0 { + writeln!(f, "- {}", err)?; + } + Ok(()) + } +} + +/// Verify `func`. +pub fn verify_function<'a, FOI: Into>>( + func: &Function, + fisa: FOI, +) -> VerifierResult<()> { + let _tt = timing::verifier(); + let mut errors = VerifierErrors::default(); + let verifier = Verifier::new(func, fisa.into()); + let result = verifier.run(&mut errors); + if errors.is_empty() { + result.unwrap(); + Ok(()) + } else { + Err(errors) + } +} + +/// Verify `func` after checking the integrity of associated context data structures `cfg` and +/// `domtree`. +pub fn verify_context<'a, FOI: Into>>( + func: &Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + fisa: FOI, + errors: &mut VerifierErrors, +) -> VerifierStepResult<()> { + let _tt = timing::verifier(); + let verifier = Verifier::new(func, fisa.into()); + if cfg.is_valid() { + verifier.cfg_integrity(cfg, errors)?; + } + if domtree.is_valid() { + verifier.domtree_integrity(domtree, errors)?; + } + verifier.run(errors) +} + +struct Verifier<'a> { + func: &'a Function, + expected_cfg: ControlFlowGraph, + expected_domtree: DominatorTree, + isa: Option<&'a dyn TargetIsa>, +} + +impl<'a> Verifier<'a> { + pub fn new(func: &'a Function, fisa: FlagsOrIsa<'a>) -> Self { + let expected_cfg = ControlFlowGraph::with_function(func); + let expected_domtree = DominatorTree::with_function(func, &expected_cfg); + Self { + func, + expected_cfg, + expected_domtree, + isa: fisa.isa, + } + } + + /// Determine a contextual error string for an instruction. + #[inline] + fn context(&self, inst: Inst) -> String { + self.func.dfg.display_inst(inst, self.isa).to_string() + } + + // Check for: + // - cycles in the global value declarations. + // - use of 'vmctx' when no special parameter declares it. + fn verify_global_values(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + let mut cycle_seen = false; + let mut seen = SparseSet::new(); + + 'gvs: for gv in self.func.global_values.keys() { + seen.clear(); + seen.insert(gv); + + let mut cur = gv; + loop { + match self.func.global_values[cur] { + ir::GlobalValueData::Load { base, .. } + | ir::GlobalValueData::IAddImm { base, .. } => { + if seen.insert(base).is_some() { + if !cycle_seen { + errors.report(( + gv, + format!("global value cycle: {}", DisplayList(seen.as_slice())), + )); + // ensures we don't report the cycle multiple times + cycle_seen = true; + } + continue 'gvs; + } + + cur = base; + } + _ => break, + } + } + + match self.func.global_values[gv] { + ir::GlobalValueData::VMContext { .. } => { + if self + .func + .special_param(ir::ArgumentPurpose::VMContext) + .is_none() + { + errors.report((gv, format!("undeclared vmctx reference {}", gv))); + } + } + ir::GlobalValueData::IAddImm { + base, global_type, .. + } => { + if !global_type.is_int() { + errors.report(( + gv, + format!("iadd_imm global value with non-int type {}", global_type), + )); + } else if let Some(isa) = self.isa { + let base_type = self.func.global_values[base].global_type(isa); + if global_type != base_type { + errors.report(( + gv, + format!( + "iadd_imm type {} differs from operand type {}", + global_type, base_type + ), + )); + } + } + } + ir::GlobalValueData::Load { base, .. } => { + if let Some(isa) = self.isa { + let base_type = self.func.global_values[base].global_type(isa); + let pointer_type = isa.pointer_type(); + if base_type != pointer_type { + errors.report(( + gv, + format!( + "base {} has type {}, which is not the pointer type {}", + base, base_type, pointer_type + ), + )); + } + } + } + _ => {} + } + } + + // Invalid global values shouldn't stop us from verifying the rest of the function + Ok(()) + } + + fn verify_heaps(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + if let Some(isa) = self.isa { + for (heap, heap_data) in &self.func.heaps { + let base = heap_data.base; + if !self.func.global_values.is_valid(base) { + return errors.nonfatal((heap, format!("invalid base global value {}", base))); + } + + let pointer_type = isa.pointer_type(); + let base_type = self.func.global_values[base].global_type(isa); + if base_type != pointer_type { + errors.report(( + heap, + format!( + "heap base has type {}, which is not the pointer type {}", + base_type, pointer_type + ), + )); + } + + if let ir::HeapStyle::Dynamic { bound_gv, .. } = heap_data.style { + if !self.func.global_values.is_valid(bound_gv) { + return errors + .nonfatal((heap, format!("invalid bound global value {}", bound_gv))); + } + + let index_type = heap_data.index_type; + let bound_type = self.func.global_values[bound_gv].global_type(isa); + if index_type != bound_type { + errors.report(( + heap, + format!( + "heap index type {} differs from the type of its bound, {}", + index_type, bound_type + ), + )); + } + } + } + } + + Ok(()) + } + + fn verify_tables(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + if let Some(isa) = self.isa { + for (table, table_data) in &self.func.tables { + let base = table_data.base_gv; + if !self.func.global_values.is_valid(base) { + return errors.nonfatal((table, format!("invalid base global value {}", base))); + } + + let pointer_type = isa.pointer_type(); + let base_type = self.func.global_values[base].global_type(isa); + if base_type != pointer_type { + errors.report(( + table, + format!( + "table base has type {}, which is not the pointer type {}", + base_type, pointer_type + ), + )); + } + + let bound_gv = table_data.bound_gv; + if !self.func.global_values.is_valid(bound_gv) { + return errors + .nonfatal((table, format!("invalid bound global value {}", bound_gv))); + } + + let index_type = table_data.index_type; + let bound_type = self.func.global_values[bound_gv].global_type(isa); + if index_type != bound_type { + errors.report(( + table, + format!( + "table index type {} differs from the type of its bound, {}", + index_type, bound_type + ), + )); + } + } + } + + Ok(()) + } + + fn verify_jump_tables(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + for (jt, jt_data) in &self.func.jump_tables { + for &block in jt_data.iter() { + self.verify_block(jt, block, errors)?; + } + } + Ok(()) + } + + /// Check that the given block can be encoded as a BB, by checking that only + /// branching instructions are ending the block. + fn encodable_as_bb(&self, block: Block, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + match self.func.is_block_basic(block) { + Ok(()) => Ok(()), + Err((inst, message)) => errors.fatal((inst, self.context(inst), message)), + } + } + + fn block_integrity( + &self, + block: Block, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let is_terminator = self.func.dfg[inst].opcode().is_terminator(); + let is_last_inst = self.func.layout.last_inst(block) == Some(inst); + + if is_terminator && !is_last_inst { + // Terminating instructions only occur at the end of blocks. + return errors.fatal(( + inst, + self.context(inst), + format!( + "a terminator instruction was encountered before the end of {}", + block + ), + )); + } + if is_last_inst && !is_terminator { + return errors.fatal((block, "block does not end in a terminator instruction")); + } + + // Instructions belong to the correct block. + let inst_block = self.func.layout.inst_block(inst); + if inst_block != Some(block) { + return errors.fatal(( + inst, + self.context(inst), + format!("should belong to {} not {:?}", block, inst_block), + )); + } + + // Parameters belong to the correct block. + for &arg in self.func.dfg.block_params(block) { + match self.func.dfg.value_def(arg) { + ValueDef::Param(arg_block, _) => { + if block != arg_block { + return errors.fatal((arg, format!("does not belong to {}", block))); + } + } + _ => { + return errors.fatal((arg, "expected an argument, found a result")); + } + } + } + + Ok(()) + } + + fn instruction_integrity( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let inst_data = &self.func.dfg[inst]; + let dfg = &self.func.dfg; + + // The instruction format matches the opcode + if inst_data.opcode().format() != InstructionFormat::from(inst_data) { + return errors.fatal(( + inst, + self.context(inst), + "instruction opcode doesn't match instruction format", + )); + } + + let num_fixed_results = inst_data.opcode().constraints().num_fixed_results(); + // var_results is 0 if we aren't a call instruction + let var_results = dfg + .call_signature(inst) + .map_or(0, |sig| dfg.signatures[sig].returns.len()); + let total_results = num_fixed_results + var_results; + + // All result values for multi-valued instructions are created + let got_results = dfg.inst_results(inst).len(); + if got_results != total_results { + return errors.fatal(( + inst, + self.context(inst), + format!( + "expected {} result values, found {}", + total_results, got_results, + ), + )); + } + + self.verify_entity_references(inst, errors) + } + + fn verify_entity_references( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + use crate::ir::instructions::InstructionData::*; + + for &arg in self.func.dfg.inst_args(inst) { + self.verify_inst_arg(inst, arg, errors)?; + + // All used values must be attached to something. + let original = self.func.dfg.resolve_aliases(arg); + if !self.func.dfg.value_is_attached(original) { + errors.report(( + inst, + self.context(inst), + format!("argument {} -> {} is not attached", arg, original), + )); + } + } + + for &res in self.func.dfg.inst_results(inst) { + self.verify_inst_result(inst, res, errors)?; + } + + match self.func.dfg[inst] { + MultiAry { ref args, .. } => { + self.verify_value_list(inst, args, errors)?; + } + Jump { + destination, + ref args, + .. + } + | Branch { + destination, + ref args, + .. + } + | BranchInt { + destination, + ref args, + .. + } + | BranchFloat { + destination, + ref args, + .. + } + | BranchIcmp { + destination, + ref args, + .. + } => { + self.verify_block(inst, destination, errors)?; + self.verify_value_list(inst, args, errors)?; + } + BranchTable { + table, destination, .. + } => { + self.verify_block(inst, destination, errors)?; + self.verify_jump_table(inst, table, errors)?; + } + BranchTableBase { table, .. } + | BranchTableEntry { table, .. } + | IndirectJump { table, .. } => { + self.verify_jump_table(inst, table, errors)?; + } + Call { + func_ref, ref args, .. + } => { + self.verify_func_ref(inst, func_ref, errors)?; + self.verify_value_list(inst, args, errors)?; + } + CallIndirect { + sig_ref, ref args, .. + } => { + self.verify_sig_ref(inst, sig_ref, errors)?; + self.verify_value_list(inst, args, errors)?; + } + FuncAddr { func_ref, .. } => { + self.verify_func_ref(inst, func_ref, errors)?; + } + StackLoad { stack_slot, .. } | StackStore { stack_slot, .. } => { + self.verify_stack_slot(inst, stack_slot, errors)?; + } + UnaryGlobalValue { global_value, .. } => { + self.verify_global_value(inst, global_value, errors)?; + } + HeapAddr { heap, .. } => { + self.verify_heap(inst, heap, errors)?; + } + TableAddr { table, .. } => { + self.verify_table(inst, table, errors)?; + } + RegSpill { dst, .. } => { + self.verify_stack_slot(inst, dst, errors)?; + } + RegFill { src, .. } => { + self.verify_stack_slot(inst, src, errors)?; + } + LoadComplex { ref args, .. } => { + self.verify_value_list(inst, args, errors)?; + } + StoreComplex { ref args, .. } => { + self.verify_value_list(inst, args, errors)?; + } + + NullAry { + opcode: Opcode::GetPinnedReg, + } + | Unary { + opcode: Opcode::SetPinnedReg, + .. + } => { + if let Some(isa) = &self.isa { + if !isa.flags().enable_pinned_reg() { + return errors.fatal(( + inst, + self.context(inst), + "GetPinnedReg/SetPinnedReg cannot be used without enable_pinned_reg", + )); + } + } else { + return errors.fatal(( + inst, + self.context(inst), + "GetPinnedReg/SetPinnedReg need an ISA!", + )); + } + } + + Unary { + opcode: Opcode::Bitcast, + arg, + } => { + self.verify_bitcast(inst, arg, errors)?; + } + + // Exhaustive list so we can't forget to add new formats + Unary { .. } + | UnaryImm { .. } + | UnaryIeee32 { .. } + | UnaryIeee64 { .. } + | UnaryBool { .. } + | Binary { .. } + | BinaryImm { .. } + | Ternary { .. } + | InsertLane { .. } + | ExtractLane { .. } + | UnaryConst { .. } + | Shuffle { .. } + | IntCompare { .. } + | IntCompareImm { .. } + | IntCond { .. } + | FloatCompare { .. } + | FloatCond { .. } + | IntSelect { .. } + | Load { .. } + | Store { .. } + | RegMove { .. } + | CopySpecial { .. } + | CopyToSsa { .. } + | Trap { .. } + | CondTrap { .. } + | IntCondTrap { .. } + | FloatCondTrap { .. } + | NullAry { .. } => {} + } + + Ok(()) + } + + fn verify_block( + &self, + loc: impl Into, + e: Block, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.dfg.block_is_valid(e) || !self.func.layout.is_block_inserted(e) { + return errors.fatal((loc, format!("invalid block reference {}", e))); + } + if let Some(entry_block) = self.func.layout.entry_block() { + if e == entry_block { + return errors.fatal((loc, format!("invalid reference to entry block {}", e))); + } + } + Ok(()) + } + + fn verify_sig_ref( + &self, + inst: Inst, + s: SigRef, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.dfg.signatures.is_valid(s) { + errors.fatal(( + inst, + self.context(inst), + format!("invalid signature reference {}", s), + )) + } else { + Ok(()) + } + } + + fn verify_func_ref( + &self, + inst: Inst, + f: FuncRef, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.dfg.ext_funcs.is_valid(f) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid function reference {}", f), + )) + } else { + Ok(()) + } + } + + fn verify_stack_slot( + &self, + inst: Inst, + ss: StackSlot, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.stack_slots.is_valid(ss) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid stack slot {}", ss), + )) + } else { + Ok(()) + } + } + + fn verify_global_value( + &self, + inst: Inst, + gv: GlobalValue, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.global_values.is_valid(gv) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid global value {}", gv), + )) + } else { + Ok(()) + } + } + + fn verify_heap( + &self, + inst: Inst, + heap: ir::Heap, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.heaps.is_valid(heap) { + errors.nonfatal((inst, self.context(inst), format!("invalid heap {}", heap))) + } else { + Ok(()) + } + } + + fn verify_table( + &self, + inst: Inst, + table: ir::Table, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.tables.is_valid(table) { + errors.nonfatal((inst, self.context(inst), format!("invalid table {}", table))) + } else { + Ok(()) + } + } + + fn verify_value_list( + &self, + inst: Inst, + l: &ValueList, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !l.is_valid(&self.func.dfg.value_lists) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid value list reference {:?}", l), + )) + } else { + Ok(()) + } + } + + fn verify_jump_table( + &self, + inst: Inst, + j: JumpTable, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if !self.func.jump_tables.is_valid(j) { + errors.nonfatal(( + inst, + self.context(inst), + format!("invalid jump table reference {}", j), + )) + } else { + Ok(()) + } + } + + fn verify_value( + &self, + loc_inst: Inst, + v: Value, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let dfg = &self.func.dfg; + if !dfg.value_is_valid(v) { + errors.nonfatal(( + loc_inst, + self.context(loc_inst), + format!("invalid value reference {}", v), + )) + } else { + Ok(()) + } + } + + fn verify_inst_arg( + &self, + loc_inst: Inst, + v: Value, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + self.verify_value(loc_inst, v, errors)?; + + let dfg = &self.func.dfg; + let loc_block = self.func.layout.pp_block(loc_inst); + let is_reachable = self.expected_domtree.is_reachable(loc_block); + + // SSA form + match dfg.value_def(v) { + ValueDef::Result(def_inst, _) => { + // Value is defined by an instruction that exists. + if !dfg.inst_is_valid(def_inst) { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("{} is defined by invalid instruction {}", v, def_inst), + )); + } + // Defining instruction is inserted in an block. + if self.func.layout.inst_block(def_inst) == None { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("{} is defined by {} which has no block", v, def_inst), + )); + } + // Defining instruction dominates the instruction that uses the value. + if is_reachable { + if !self + .expected_domtree + .dominates(def_inst, loc_inst, &self.func.layout) + { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("uses value {} from non-dominating {}", v, def_inst), + )); + } + if def_inst == loc_inst { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("uses value {} from itself", v), + )); + } + } + } + ValueDef::Param(block, _) => { + // Value is defined by an existing block. + if !dfg.block_is_valid(block) { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("{} is defined by invalid block {}", v, block), + )); + } + // Defining block is inserted in the layout + if !self.func.layout.is_block_inserted(block) { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("{} is defined by {} which is not in the layout", v, block), + )); + } + // The defining block dominates the instruction using this value. + if is_reachable + && !self + .expected_domtree + .dominates(block, loc_inst, &self.func.layout) + { + return errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("uses value arg from non-dominating {}", block), + )); + } + } + } + Ok(()) + } + + fn verify_inst_result( + &self, + loc_inst: Inst, + v: Value, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + self.verify_value(loc_inst, v, errors)?; + + match self.func.dfg.value_def(v) { + ValueDef::Result(def_inst, _) => { + if def_inst != loc_inst { + errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("instruction result {} is not defined by the instruction", v), + )) + } else { + Ok(()) + } + } + ValueDef::Param(_, _) => errors.fatal(( + loc_inst, + self.context(loc_inst), + format!("instruction result {} is not defined by the instruction", v), + )), + } + } + + fn verify_bitcast( + &self, + inst: Inst, + arg: Value, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let typ = self.func.dfg.ctrl_typevar(inst); + let value_type = self.func.dfg.value_type(arg); + + if typ.lane_bits() < value_type.lane_bits() { + errors.fatal(( + inst, + format!( + "The bitcast argument {} doesn't fit in a type of {} bits", + arg, + typ.lane_bits() + ), + )) + } else { + Ok(()) + } + } + + fn domtree_integrity( + &self, + domtree: &DominatorTree, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + // We consider two `DominatorTree`s to be equal if they return the same immediate + // dominator for each block. Therefore the current domtree is valid if it matches the freshly + // computed one. + for block in self.func.layout.blocks() { + let expected = self.expected_domtree.idom(block); + let got = domtree.idom(block); + if got != expected { + return errors.fatal(( + block, + format!( + "invalid domtree, expected idom({}) = {:?}, got {:?}", + block, expected, got + ), + )); + } + } + // We also verify if the postorder defined by `DominatorTree` is sane + if domtree.cfg_postorder().len() != self.expected_domtree.cfg_postorder().len() { + return errors.fatal(( + AnyEntity::Function, + "incorrect number of Blocks in postorder traversal", + )); + } + for (index, (&test_block, &true_block)) in domtree + .cfg_postorder() + .iter() + .zip(self.expected_domtree.cfg_postorder().iter()) + .enumerate() + { + if test_block != true_block { + return errors.fatal(( + test_block, + format!( + "invalid domtree, postorder block number {} should be {}, got {}", + index, true_block, test_block + ), + )); + } + } + // We verify rpo_cmp on pairs of adjacent blocks in the postorder + for (&prev_block, &next_block) in domtree.cfg_postorder().iter().adjacent_pairs() { + if self + .expected_domtree + .rpo_cmp(prev_block, next_block, &self.func.layout) + != Ordering::Greater + { + return errors.fatal(( + next_block, + format!( + "invalid domtree, rpo_cmp does not says {} is greater than {}", + prev_block, next_block + ), + )); + } + } + Ok(()) + } + + fn typecheck_entry_block_params(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + if let Some(block) = self.func.layout.entry_block() { + let expected_types = &self.func.signature.params; + let block_param_count = self.func.dfg.num_block_params(block); + + if block_param_count != expected_types.len() { + return errors.fatal(( + block, + format!( + "entry block parameters ({}) must match function signature ({})", + block_param_count, + expected_types.len() + ), + )); + } + + for (i, &arg) in self.func.dfg.block_params(block).iter().enumerate() { + let arg_type = self.func.dfg.value_type(arg); + if arg_type != expected_types[i].value_type { + errors.report(( + block, + format!( + "entry block parameter {} expected to have type {}, got {}", + i, expected_types[i], arg_type + ), + )); + } + } + } + + errors.as_result() + } + + fn typecheck(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + let inst_data = &self.func.dfg[inst]; + let constraints = inst_data.opcode().constraints(); + + let ctrl_type = if let Some(value_typeset) = constraints.ctrl_typeset() { + // For polymorphic opcodes, determine the controlling type variable first. + let ctrl_type = self.func.dfg.ctrl_typevar(inst); + + if !value_typeset.contains(ctrl_type) { + errors.report(( + inst, + self.context(inst), + format!("has an invalid controlling type {}", ctrl_type), + )); + } + + ctrl_type + } else { + // Non-polymorphic instructions don't check the controlling type variable, so `Option` + // is unnecessary and we can just make it `INVALID`. + types::INVALID + }; + + // Typechecking instructions is never fatal + let _ = self.typecheck_results(inst, ctrl_type, errors); + let _ = self.typecheck_fixed_args(inst, ctrl_type, errors); + let _ = self.typecheck_variable_args(inst, errors); + let _ = self.typecheck_return(inst, errors); + let _ = self.typecheck_special(inst, ctrl_type, errors); + + // Misuses of copy_nop instructions are fatal + self.typecheck_copy_nop(inst, errors)?; + + Ok(()) + } + + fn typecheck_results( + &self, + inst: Inst, + ctrl_type: Type, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let mut i = 0; + for &result in self.func.dfg.inst_results(inst) { + let result_type = self.func.dfg.value_type(result); + let expected_type = self.func.dfg.compute_result_type(inst, i, ctrl_type); + if let Some(expected_type) = expected_type { + if result_type != expected_type { + errors.report(( + inst, + self.context(inst), + format!( + "expected result {} ({}) to have type {}, found {}", + i, result, expected_type, result_type + ), + )); + } + } else { + return errors.nonfatal(( + inst, + self.context(inst), + "has more result values than expected", + )); + } + i += 1; + } + + // There aren't any more result types left. + if self.func.dfg.compute_result_type(inst, i, ctrl_type) != None { + return errors.nonfatal(( + inst, + self.context(inst), + "has fewer result values than expected", + )); + } + Ok(()) + } + + fn typecheck_fixed_args( + &self, + inst: Inst, + ctrl_type: Type, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let constraints = self.func.dfg[inst].opcode().constraints(); + + for (i, &arg) in self.func.dfg.inst_fixed_args(inst).iter().enumerate() { + let arg_type = self.func.dfg.value_type(arg); + match constraints.value_argument_constraint(i, ctrl_type) { + ResolvedConstraint::Bound(expected_type) => { + if arg_type != expected_type { + errors.report(( + inst, + self.context(inst), + format!( + "arg {} ({}) has type {}, expected {}", + i, arg, arg_type, expected_type + ), + )); + } + } + ResolvedConstraint::Free(type_set) => { + if !type_set.contains(arg_type) { + errors.report(( + inst, + self.context(inst), + format!( + "arg {} ({}) with type {} failed to satisfy type set {:?}", + i, arg, arg_type, type_set + ), + )); + } + } + } + } + Ok(()) + } + + fn typecheck_variable_args( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + match self.func.dfg.analyze_branch(inst) { + BranchInfo::SingleDest(block, _) => { + let iter = self + .func + .dfg + .block_params(block) + .iter() + .map(|&v| self.func.dfg.value_type(v)); + self.typecheck_variable_args_iterator(inst, iter, errors)?; + } + BranchInfo::Table(table, block) => { + if let Some(block) = block { + let arg_count = self.func.dfg.num_block_params(block); + if arg_count != 0 { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "takes no arguments, but had target {} with {} arguments", + block, arg_count, + ), + )); + } + } + for block in self.func.jump_tables[table].iter() { + let arg_count = self.func.dfg.num_block_params(*block); + if arg_count != 0 { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "takes no arguments, but had target {} with {} arguments", + block, arg_count, + ), + )); + } + } + } + BranchInfo::NotABranch => {} + } + + match self.func.dfg[inst].analyze_call(&self.func.dfg.value_lists) { + CallInfo::Direct(func_ref, _) => { + let sig_ref = self.func.dfg.ext_funcs[func_ref].signature; + let arg_types = self.func.dfg.signatures[sig_ref] + .params + .iter() + .map(|a| a.value_type); + self.typecheck_variable_args_iterator(inst, arg_types, errors)?; + self.check_outgoing_args(inst, sig_ref, errors)?; + } + CallInfo::Indirect(sig_ref, _) => { + let arg_types = self.func.dfg.signatures[sig_ref] + .params + .iter() + .map(|a| a.value_type); + self.typecheck_variable_args_iterator(inst, arg_types, errors)?; + self.check_outgoing_args(inst, sig_ref, errors)?; + } + CallInfo::NotACall => {} + } + Ok(()) + } + + fn typecheck_variable_args_iterator>( + &self, + inst: Inst, + iter: I, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let variable_args = self.func.dfg.inst_variable_args(inst); + let mut i = 0; + + for expected_type in iter { + if i >= variable_args.len() { + // Result count mismatch handled below, we want the full argument count first though + i += 1; + continue; + } + let arg = variable_args[i]; + let arg_type = self.func.dfg.value_type(arg); + if expected_type != arg_type { + errors.report(( + inst, + self.context(inst), + format!( + "arg {} ({}) has type {}, expected {}", + i, variable_args[i], arg_type, expected_type + ), + )); + } + i += 1; + } + if i != variable_args.len() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "mismatched argument count for `{}`: got {}, expected {}", + self.func.dfg.display_inst(inst, None), + variable_args.len(), + i, + ), + )); + } + Ok(()) + } + + /// Check the locations assigned to outgoing call arguments. + /// + /// When a signature has been legalized, all values passed as outgoing arguments on the stack + /// must be assigned to a matching `OutgoingArg` stack slot. + fn check_outgoing_args( + &self, + inst: Inst, + sig_ref: SigRef, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let sig = &self.func.dfg.signatures[sig_ref]; + + let args = self.func.dfg.inst_variable_args(inst); + let expected_args = &sig.params[..]; + + for (&arg, &abi) in args.iter().zip(expected_args) { + // Value types have already been checked by `typecheck_variable_args_iterator()`. + if let ArgumentLoc::Stack(offset) = abi.location { + let arg_loc = self.func.locations[arg]; + if let ValueLoc::Stack(ss) = arg_loc { + // Argument value is assigned to a stack slot as expected. + self.verify_stack_slot(inst, ss, errors)?; + let slot = &self.func.stack_slots[ss]; + if slot.kind != StackSlotKind::OutgoingArg { + return errors.fatal(( + inst, + self.context(inst), + format!( + "Outgoing stack argument {} in wrong stack slot: {} = {}", + arg, ss, slot, + ), + )); + } + if slot.offset != Some(offset) { + return errors.fatal(( + inst, + self.context(inst), + format!( + "Outgoing stack argument {} should have offset {}: {} = {}", + arg, offset, ss, slot, + ), + )); + } + if slot.size != abi.value_type.bytes() { + return errors.fatal(( + inst, + self.context(inst), + format!( + "Outgoing stack argument {} wrong size for {}: {} = {}", + arg, abi.value_type, ss, slot, + ), + )); + } + } else { + let reginfo = self.isa.map(|i| i.register_info()); + return errors.fatal(( + inst, + self.context(inst), + format!( + "Outgoing stack argument {} in wrong location: {}", + arg, + arg_loc.display(reginfo.as_ref()) + ), + )); + } + } + } + Ok(()) + } + + fn typecheck_return(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + if self.func.dfg[inst].opcode().is_return() { + let args = self.func.dfg.inst_variable_args(inst); + let expected_types = &self.func.signature.returns; + if args.len() != expected_types.len() { + return errors.nonfatal(( + inst, + self.context(inst), + "arguments of return must match function signature", + )); + } + for (i, (&arg, &expected_type)) in args.iter().zip(expected_types).enumerate() { + let arg_type = self.func.dfg.value_type(arg); + if arg_type != expected_type.value_type { + errors.report(( + inst, + self.context(inst), + format!( + "arg {} ({}) has type {}, must match function signature of {}", + i, arg, arg_type, expected_type + ), + )); + } + } + } + Ok(()) + } + + // Check special-purpose type constraints that can't be expressed in the normal opcode + // constraints. + fn typecheck_special( + &self, + inst: Inst, + ctrl_type: Type, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + match self.func.dfg[inst] { + ir::InstructionData::Unary { opcode, arg } => { + let arg_type = self.func.dfg.value_type(arg); + match opcode { + Opcode::Bextend | Opcode::Uextend | Opcode::Sextend | Opcode::Fpromote => { + if arg_type.lane_count() != ctrl_type.lane_count() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "input {} and output {} must have same number of lanes", + arg_type, ctrl_type, + ), + )); + } + if arg_type.lane_bits() >= ctrl_type.lane_bits() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "input {} must be smaller than output {}", + arg_type, ctrl_type, + ), + )); + } + } + Opcode::Breduce | Opcode::Ireduce | Opcode::Fdemote => { + if arg_type.lane_count() != ctrl_type.lane_count() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "input {} and output {} must have same number of lanes", + arg_type, ctrl_type, + ), + )); + } + if arg_type.lane_bits() <= ctrl_type.lane_bits() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "input {} must be larger than output {}", + arg_type, ctrl_type, + ), + )); + } + } + _ => {} + } + } + ir::InstructionData::HeapAddr { heap, arg, .. } => { + let index_type = self.func.dfg.value_type(arg); + let heap_index_type = self.func.heaps[heap].index_type; + if index_type != heap_index_type { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "index type {} differs from heap index type {}", + index_type, heap_index_type, + ), + )); + } + } + ir::InstructionData::TableAddr { table, arg, .. } => { + let index_type = self.func.dfg.value_type(arg); + let table_index_type = self.func.tables[table].index_type; + if index_type != table_index_type { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "index type {} differs from table index type {}", + index_type, table_index_type, + ), + )); + } + } + ir::InstructionData::UnaryGlobalValue { global_value, .. } => { + if let Some(isa) = self.isa { + let inst_type = self.func.dfg.value_type(self.func.dfg.first_result(inst)); + let global_type = self.func.global_values[global_value].global_type(isa); + if inst_type != global_type { + return errors.nonfatal(( + inst, self.context(inst), + format!( + "global_value instruction with type {} references global value with type {}", + inst_type, global_type + )), + ); + } + } + } + _ => {} + } + Ok(()) + } + + fn typecheck_copy_nop( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if let InstructionData::Unary { + opcode: Opcode::CopyNop, + arg, + } = self.func.dfg[inst] + { + let dst_vals = self.func.dfg.inst_results(inst); + if dst_vals.len() != 1 { + return errors.fatal(( + inst, + self.context(inst), + "copy_nop must produce exactly one result", + )); + } + let dst_val = dst_vals[0]; + if self.func.dfg.value_type(dst_val) != self.func.dfg.value_type(arg) { + return errors.fatal(( + inst, + self.context(inst), + "copy_nop src and dst types must be the same", + )); + } + let src_loc = self.func.locations[arg]; + let dst_loc = self.func.locations[dst_val]; + let locs_ok = match (src_loc, dst_loc) { + (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot, + _ => false, + }; + if !locs_ok { + return errors.fatal(( + inst, + self.context(inst), + format!( + "copy_nop must refer to identical stack slots, but found {:?} vs {:?}", + src_loc, dst_loc, + ), + )); + } + } + Ok(()) + } + + fn cfg_integrity( + &self, + cfg: &ControlFlowGraph, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let mut expected_succs = BTreeSet::::new(); + let mut got_succs = BTreeSet::::new(); + let mut expected_preds = BTreeSet::::new(); + let mut got_preds = BTreeSet::::new(); + + for block in self.func.layout.blocks() { + expected_succs.extend(self.expected_cfg.succ_iter(block)); + got_succs.extend(cfg.succ_iter(block)); + + let missing_succs: Vec = + expected_succs.difference(&got_succs).cloned().collect(); + if !missing_succs.is_empty() { + errors.report(( + block, + format!("cfg lacked the following successor(s) {:?}", missing_succs), + )); + continue; + } + + let excess_succs: Vec = got_succs.difference(&expected_succs).cloned().collect(); + if !excess_succs.is_empty() { + errors.report(( + block, + format!("cfg had unexpected successor(s) {:?}", excess_succs), + )); + continue; + } + + expected_preds.extend( + self.expected_cfg + .pred_iter(block) + .map(|BlockPredecessor { inst, .. }| inst), + ); + got_preds.extend( + cfg.pred_iter(block) + .map(|BlockPredecessor { inst, .. }| inst), + ); + + let missing_preds: Vec = expected_preds.difference(&got_preds).cloned().collect(); + if !missing_preds.is_empty() { + errors.report(( + block, + format!( + "cfg lacked the following predecessor(s) {:?}", + missing_preds + ), + )); + continue; + } + + let excess_preds: Vec = got_preds.difference(&expected_preds).cloned().collect(); + if !excess_preds.is_empty() { + errors.report(( + block, + format!("cfg had unexpected predecessor(s) {:?}", excess_preds), + )); + continue; + } + + expected_succs.clear(); + got_succs.clear(); + expected_preds.clear(); + got_preds.clear(); + } + errors.as_result() + } + + /// If the verifier has been set up with an ISA, make sure that the recorded encoding for the + /// instruction (if any) matches how the ISA would encode it. + fn verify_encoding(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + // When the encodings table is empty, we don't require any instructions to be encoded. + // + // Once some instructions are encoded, we require all side-effecting instructions to have a + // legal encoding. + if self.func.encodings.is_empty() { + return Ok(()); + } + + let isa = match self.isa { + Some(isa) => isa, + None => return Ok(()), + }; + + let encoding = self.func.encodings[inst]; + if encoding.is_legal() { + if self.func.dfg[inst].opcode().is_ghost() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "Ghost instruction has an encoding: {}", + isa.encoding_info().display(encoding), + ), + )); + } + + let mut encodings = isa + .legal_encodings( + &self.func, + &self.func.dfg[inst], + self.func.dfg.ctrl_typevar(inst), + ) + .peekable(); + + if encodings.peek().is_none() { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "Instruction failed to re-encode {}", + isa.encoding_info().display(encoding), + ), + )); + } + + let has_valid_encoding = encodings.any(|possible_enc| encoding == possible_enc); + + if !has_valid_encoding { + let mut possible_encodings = String::new(); + let mut multiple_encodings = false; + + for enc in isa.legal_encodings( + &self.func, + &self.func.dfg[inst], + self.func.dfg.ctrl_typevar(inst), + ) { + if !possible_encodings.is_empty() { + possible_encodings.push_str(", "); + multiple_encodings = true; + } + possible_encodings + .write_fmt(format_args!("{}", isa.encoding_info().display(enc))) + .unwrap(); + } + + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "encoding {} should be {}{}", + isa.encoding_info().display(encoding), + if multiple_encodings { "one of: " } else { "" }, + possible_encodings, + ), + )); + } + return Ok(()); + } + + // Instruction is not encoded, so it is a ghost instruction. + // Instructions with side effects are not allowed to be ghost instructions. + let opcode = self.func.dfg[inst].opcode(); + + // The `fallthrough`, `fallthrough_return`, and `safepoint` instructions are not required + // to have an encoding. + if opcode == Opcode::Fallthrough + || opcode == Opcode::FallthroughReturn + || opcode == Opcode::Safepoint + { + return Ok(()); + } + + // Check if this opcode must be encoded. + let mut needs_enc = None; + if opcode.is_branch() { + needs_enc = Some("Branch"); + } else if opcode.is_call() { + needs_enc = Some("Call"); + } else if opcode.is_return() { + needs_enc = Some("Return"); + } else if opcode.can_store() { + needs_enc = Some("Store"); + } else if opcode.can_trap() { + needs_enc = Some("Trapping instruction"); + } else if opcode.other_side_effects() { + needs_enc = Some("Instruction with side effects"); + } + + if let Some(text) = needs_enc { + // This instruction needs an encoding, so generate an error. + // Provide the ISA default encoding as a hint. + match self.func.encode(inst, isa) { + Ok(enc) => { + return errors.nonfatal(( + inst, + self.context(inst), + format!( + "{} must have an encoding (e.g., {})))", + text, + isa.encoding_info().display(enc), + ), + )); + } + Err(_) => { + return errors.nonfatal(( + inst, + self.context(inst), + format!("{} must have an encoding", text), + )) + } + } + } + + Ok(()) + } + + fn immediate_constraints( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + let inst_data = &self.func.dfg[inst]; + + match *inst_data { + ir::InstructionData::Store { flags, .. } + | ir::InstructionData::StoreComplex { flags, .. } => { + if flags.readonly() { + errors.fatal(( + inst, + self.context(inst), + "A store instruction cannot have the `readonly` MemFlag", + )) + } else { + Ok(()) + } + } + ir::InstructionData::ExtractLane { + opcode: ir::instructions::Opcode::Extractlane, + lane, + arg, + .. + } + | ir::InstructionData::InsertLane { + opcode: ir::instructions::Opcode::Insertlane, + lane, + args: [arg, _], + .. + } => { + // We must be specific about the opcodes above because other instructions are using + // the ExtractLane/InsertLane formats. + let ty = self.func.dfg.value_type(arg); + if u16::from(lane) >= ty.lane_count() { + errors.fatal(( + inst, + self.context(inst), + format!("The lane {} does not index into the type {}", lane, ty,), + )) + } else { + Ok(()) + } + } + _ => Ok(()), + } + } + + fn verify_safepoint_unused( + &self, + inst: Inst, + errors: &mut VerifierErrors, + ) -> VerifierStepResult<()> { + if let Some(isa) = self.isa { + if !isa.flags().enable_safepoints() && self.func.dfg[inst].opcode() == Opcode::Safepoint + { + return errors.fatal(( + inst, + self.context(inst), + "safepoint instruction cannot be used when it is not enabled.", + )); + } + } + Ok(()) + } + + fn typecheck_function_signature(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + self.func + .signature + .params + .iter() + .enumerate() + .filter(|(_, ¶m)| param.value_type == types::INVALID) + .for_each(|(i, _)| { + errors.report(( + AnyEntity::Function, + format!("Parameter at position {} has an invalid type", i), + )); + }); + + self.func + .signature + .returns + .iter() + .enumerate() + .filter(|(_, &ret)| ret.value_type == types::INVALID) + .for_each(|(i, _)| { + errors.report(( + AnyEntity::Function, + format!("Return value at position {} has an invalid type", i), + )) + }); + + if errors.has_error() { + Err(()) + } else { + Ok(()) + } + } + + pub fn run(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + self.verify_global_values(errors)?; + self.verify_heaps(errors)?; + self.verify_tables(errors)?; + self.verify_jump_tables(errors)?; + self.typecheck_entry_block_params(errors)?; + self.typecheck_function_signature(errors)?; + + for block in self.func.layout.blocks() { + if self.func.layout.first_inst(block).is_none() { + return errors.fatal((block, format!("{} cannot be empty", block))); + } + for inst in self.func.layout.block_insts(block) { + self.block_integrity(block, inst, errors)?; + self.instruction_integrity(inst, errors)?; + self.verify_safepoint_unused(inst, errors)?; + self.typecheck(inst, errors)?; + self.verify_encoding(inst, errors)?; + self.immediate_constraints(inst, errors)?; + } + + self.encodable_as_bb(block, errors)?; + } + + verify_flags(self.func, &self.expected_cfg, self.isa, errors)?; + + if !errors.is_empty() { + debug!( + "Found verifier errors in function:\n{}", + pretty_verifier_error(self.func, None, None, errors.clone()) + ); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::{Verifier, VerifierError, VerifierErrors}; + use crate::entity::EntityList; + use crate::ir::instructions::{InstructionData, Opcode}; + use crate::ir::{types, AbiParam, Function}; + use crate::settings; + + macro_rules! assert_err_with_msg { + ($e:expr, $msg:expr) => { + match $e.0.get(0) { + None => panic!("Expected an error"), + Some(&VerifierError { ref message, .. }) => { + if !message.contains($msg) { + #[cfg(feature = "std")] + panic!(format!( + "'{}' did not contain the substring '{}'", + message, $msg + )); + #[cfg(not(feature = "std"))] + panic!("error message did not contain the expected substring"); + } + } + } + }; + } + + #[test] + fn empty() { + let func = Function::new(); + let flags = &settings::Flags::new(settings::builder()); + let verifier = Verifier::new(&func, flags.into()); + let mut errors = VerifierErrors::default(); + + assert_eq!(verifier.run(&mut errors), Ok(())); + assert!(errors.0.is_empty()); + } + + #[test] + fn bad_instruction_format() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + func.layout.append_block(block0); + let nullary_with_bad_opcode = func.dfg.make_inst(InstructionData::UnaryImm { + opcode: Opcode::F32const, + imm: 0.into(), + }); + func.layout.append_inst(nullary_with_bad_opcode, block0); + func.layout.append_inst( + func.dfg.make_inst(InstructionData::Jump { + opcode: Opcode::Jump, + destination: block0, + args: EntityList::default(), + }), + block0, + ); + let flags = &settings::Flags::new(settings::builder()); + let verifier = Verifier::new(&func, flags.into()); + let mut errors = VerifierErrors::default(); + + let _ = verifier.run(&mut errors); + + assert_err_with_msg!(errors, "instruction format"); + } + + #[test] + fn test_function_invalid_param() { + let mut func = Function::new(); + func.signature.params.push(AbiParam::new(types::INVALID)); + + let mut errors = VerifierErrors::default(); + let flags = &settings::Flags::new(settings::builder()); + let verifier = Verifier::new(&func, flags.into()); + + let _ = verifier.typecheck_function_signature(&mut errors); + assert_err_with_msg!(errors, "Parameter at position 0 has an invalid type"); + } + + #[test] + fn test_function_invalid_return_value() { + let mut func = Function::new(); + func.signature.returns.push(AbiParam::new(types::INVALID)); + + let mut errors = VerifierErrors::default(); + let flags = &settings::Flags::new(settings::builder()); + let verifier = Verifier::new(&func, flags.into()); + + let _ = verifier.typecheck_function_signature(&mut errors); + assert_err_with_msg!(errors, "Return value at position 0 has an invalid type"); + } + + #[test] + fn test_printing_contextual_errors() { + // Build function. + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + func.layout.append_block(block0); + + // Build instruction: v0, v1 = iconst 42 + let inst = func.dfg.make_inst(InstructionData::UnaryImm { + opcode: Opcode::Iconst, + imm: 42.into(), + }); + func.dfg.append_result(inst, types::I32); + func.dfg.append_result(inst, types::I32); + func.layout.append_inst(inst, block0); + + // Setup verifier. + let mut errors = VerifierErrors::default(); + let flags = &settings::Flags::new(settings::builder()); + let verifier = Verifier::new(&func, flags.into()); + + // Now the error message, when printed, should contain the instruction sequence causing the + // error (i.e. v0, v1 = iconst.i32 42) and not only its entity value (i.e. inst0) + let _ = verifier.typecheck_results(inst, types::I32, &mut errors); + assert_eq!( + format!("{}", errors.0[0]), + "inst0 (v0, v1 = iconst.i32 42): has more result values than expected" + ) + } + + #[test] + fn test_empty_block() { + let mut func = Function::new(); + let block0 = func.dfg.make_block(); + func.layout.append_block(block0); + + let flags = &settings::Flags::new(settings::builder()); + let verifier = Verifier::new(&func, flags.into()); + let mut errors = VerifierErrors::default(); + let _ = verifier.run(&mut errors); + + assert_err_with_msg!(errors, "block0 cannot be empty"); + } +} diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs new file mode 100644 index 0000000000..6d109f4c04 --- /dev/null +++ b/cranelift/codegen/src/write.rs @@ -0,0 +1,843 @@ +//! Converting Cranelift IR to text. +//! +//! The `write` module provides the `write_function` function which converts an IR `Function` to an +//! equivalent textual form. This textual form can be read back by the `cranelift-reader` crate. + +use crate::entity::SecondaryMap; +use crate::ir::entities::AnyEntity; +use crate::ir::{ + Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value, + ValueDef, ValueLoc, +}; +use crate::isa::{RegInfo, TargetIsa}; +use crate::packed_option::ReservedValue; +use crate::value_label::ValueLabelsRanges; +use crate::HashSet; +use alloc::string::String; +use alloc::vec::Vec; +use core::fmt::{self, Write}; + +/// A `FuncWriter` used to decorate functions during printing. +pub trait FuncWriter { + /// Write the basic block header for the current function. + fn write_block_header( + &mut self, + w: &mut dyn Write, + func: &Function, + isa: Option<&dyn TargetIsa>, + block: Block, + indent: usize, + ) -> fmt::Result; + + /// Write the given `inst` to `w`. + fn write_instruction( + &mut self, + w: &mut dyn Write, + func: &Function, + aliases: &SecondaryMap>, + isa: Option<&dyn TargetIsa>, + inst: Inst, + indent: usize, + ) -> fmt::Result; + + /// Write the preamble to `w`. By default, this uses `write_entity_definition`. + fn write_preamble( + &mut self, + w: &mut dyn Write, + func: &Function, + regs: Option<&RegInfo>, + ) -> Result { + self.super_preamble(w, func, regs) + } + + /// Default impl of `write_preamble` + fn super_preamble( + &mut self, + w: &mut dyn Write, + func: &Function, + regs: Option<&RegInfo>, + ) -> Result { + let mut any = false; + + for (ss, slot) in func.stack_slots.iter() { + any = true; + self.write_entity_definition(w, func, ss.into(), slot)?; + } + + for (gv, gv_data) in &func.global_values { + any = true; + self.write_entity_definition(w, func, gv.into(), gv_data)?; + } + + for (heap, heap_data) in &func.heaps { + if !heap_data.index_type.is_invalid() { + any = true; + self.write_entity_definition(w, func, heap.into(), heap_data)?; + } + } + + for (table, table_data) in &func.tables { + if !table_data.index_type.is_invalid() { + any = true; + self.write_entity_definition(w, func, table.into(), table_data)?; + } + } + + // Write out all signatures before functions since function declarations can refer to + // signatures. + for (sig, sig_data) in &func.dfg.signatures { + any = true; + self.write_entity_definition(w, func, sig.into(), &sig_data.display(regs))?; + } + + for (fnref, ext_func) in &func.dfg.ext_funcs { + if ext_func.signature != SigRef::reserved_value() { + any = true; + self.write_entity_definition(w, func, fnref.into(), ext_func)?; + } + } + + for (jt, jt_data) in &func.jump_tables { + any = true; + self.write_entity_definition(w, func, jt.into(), jt_data)?; + } + + Ok(any) + } + + /// Write an entity definition defined in the preamble to `w`. + fn write_entity_definition( + &mut self, + w: &mut dyn Write, + func: &Function, + entity: AnyEntity, + value: &dyn fmt::Display, + ) -> fmt::Result { + self.super_entity_definition(w, func, entity, value) + } + + /// Default impl of `write_entity_definition` + #[allow(unused_variables)] + fn super_entity_definition( + &mut self, + w: &mut dyn Write, + func: &Function, + entity: AnyEntity, + value: &dyn fmt::Display, + ) -> fmt::Result { + writeln!(w, " {} = {}", entity, value) + } +} + +/// A `PlainWriter` that doesn't decorate the function. +pub struct PlainWriter; + +impl FuncWriter for PlainWriter { + fn write_instruction( + &mut self, + w: &mut dyn Write, + func: &Function, + aliases: &SecondaryMap>, + isa: Option<&dyn TargetIsa>, + inst: Inst, + indent: usize, + ) -> fmt::Result { + write_instruction(w, func, aliases, isa, inst, indent) + } + + fn write_block_header( + &mut self, + w: &mut dyn Write, + func: &Function, + isa: Option<&dyn TargetIsa>, + block: Block, + indent: usize, + ) -> fmt::Result { + write_block_header(w, func, isa, block, indent) + } +} + +/// Write `func` to `w` as equivalent text. +/// Use `isa` to emit ISA-dependent annotations. +pub fn write_function( + w: &mut dyn Write, + func: &Function, + annotations: &DisplayFunctionAnnotations, +) -> fmt::Result { + decorate_function(&mut PlainWriter, w, func, annotations) +} + +/// Create a reverse-alias map from a value to all aliases having that value as a direct target +fn alias_map(func: &Function) -> SecondaryMap> { + let mut aliases = SecondaryMap::<_, Vec<_>>::new(); + for v in func.dfg.values() { + // VADFS returns the immediate target of an alias + if let Some(k) = func.dfg.value_alias_dest_for_serialization(v) { + aliases[k].push(v); + } + } + aliases +} + +/// Writes `func` to `w` as text. +/// write_function_plain is passed as 'closure' to print instructions as text. +/// pretty_function_error is passed as 'closure' to add error decoration. +pub fn decorate_function( + func_w: &mut FW, + w: &mut dyn Write, + func: &Function, + annotations: &DisplayFunctionAnnotations, +) -> fmt::Result { + let regs = annotations.isa.map(TargetIsa::register_info); + let regs = regs.as_ref(); + + write!(w, "function ")?; + write_spec(w, func, regs)?; + writeln!(w, " {{")?; + let aliases = alias_map(func); + let mut any = func_w.write_preamble(w, func, regs)?; + for block in &func.layout { + if any { + writeln!(w)?; + } + decorate_block(func_w, w, func, &aliases, annotations, block)?; + any = true; + } + writeln!(w, "}}") +} + +//---------------------------------------------------------------------- +// +// Function spec. + +fn write_spec(w: &mut dyn Write, func: &Function, regs: Option<&RegInfo>) -> fmt::Result { + write!(w, "{}{}", func.name, func.signature.display(regs)) +} + +//---------------------------------------------------------------------- +// +// Basic blocks + +fn write_arg( + w: &mut dyn Write, + func: &Function, + regs: Option<&RegInfo>, + arg: Value, +) -> fmt::Result { + write!(w, "{}: {}", arg, func.dfg.value_type(arg))?; + let loc = func.locations[arg]; + if loc.is_assigned() { + write!(w, " [{}]", loc.display(regs))? + } + + Ok(()) +} + +/// Write out the basic block header, outdented: +/// +/// block1: +/// block1(v1: i32): +/// block10(v4: f64, v5: b1): +/// +pub fn write_block_header( + w: &mut dyn Write, + func: &Function, + isa: Option<&dyn TargetIsa>, + block: Block, + indent: usize, +) -> fmt::Result { + // The `indent` is the instruction indentation. block headers are 4 spaces out from that. + write!(w, "{1:0$}{2}", indent - 4, "", block)?; + + let regs = isa.map(TargetIsa::register_info); + let regs = regs.as_ref(); + + let mut args = func.dfg.block_params(block).iter().cloned(); + match args.next() { + None => return writeln!(w, ":"), + Some(arg) => { + write!(w, "(")?; + write_arg(w, func, regs, arg)?; + } + } + // Remaining arguments. + for arg in args { + write!(w, ", ")?; + write_arg(w, func, regs, arg)?; + } + writeln!(w, "):") +} + +fn write_valueloc(w: &mut dyn Write, loc: ValueLoc, regs: &RegInfo) -> fmt::Result { + match loc { + ValueLoc::Reg(r) => write!(w, "{}", regs.display_regunit(r)), + ValueLoc::Stack(ss) => write!(w, "{}", ss), + ValueLoc::Unassigned => write!(w, "?"), + } +} + +fn write_value_range_markers( + w: &mut dyn Write, + val_ranges: &ValueLabelsRanges, + regs: &RegInfo, + offset: u32, + indent: usize, +) -> fmt::Result { + let mut result = String::new(); + let mut shown = HashSet::new(); + for (val, rng) in val_ranges { + for i in (0..rng.len()).rev() { + if rng[i].start == offset { + write!(&mut result, " {}@", val)?; + write_valueloc(&mut result, rng[i].loc, regs)?; + shown.insert(val); + break; + } + } + } + for (val, rng) in val_ranges { + for i in (0..rng.len()).rev() { + if rng[i].end == offset && !shown.contains(val) { + write!(&mut result, " {}\u{2620}", val)?; + break; + } + } + } + if !result.is_empty() { + writeln!(w, ";{1:0$}; {2}", indent + 24, "", result)?; + } + Ok(()) +} + +fn decorate_block( + func_w: &mut FW, + w: &mut dyn Write, + func: &Function, + aliases: &SecondaryMap>, + annotations: &DisplayFunctionAnnotations, + block: Block, +) -> fmt::Result { + // Indent all instructions if any encodings are present. + let indent = if func.encodings.is_empty() && func.srclocs.is_empty() { + 4 + } else { + 36 + }; + let isa = annotations.isa; + + func_w.write_block_header(w, func, isa, block, indent)?; + for a in func.dfg.block_params(block).iter().cloned() { + write_value_aliases(w, aliases, a, indent)?; + } + + if let Some(isa) = isa { + if !func.offsets.is_empty() { + let encinfo = isa.encoding_info(); + let regs = &isa.register_info(); + for (offset, inst, size) in func.inst_offsets(block, &encinfo) { + func_w.write_instruction(w, func, aliases, Some(isa), inst, indent)?; + if size > 0 { + if let Some(val_ranges) = annotations.value_ranges { + write_value_range_markers(w, val_ranges, regs, offset + size, indent)?; + } + } + } + return Ok(()); + } + } + + for inst in func.layout.block_insts(block) { + func_w.write_instruction(w, func, aliases, isa, inst, indent)?; + } + + Ok(()) +} + +//---------------------------------------------------------------------- +// +// Instructions + +// Should `inst` be printed with a type suffix? +// +// Polymorphic instructions may need a suffix indicating the value of the controlling type variable +// if it can't be trivially inferred. +// +fn type_suffix(func: &Function, inst: Inst) -> Option { + let inst_data = &func.dfg[inst]; + let constraints = inst_data.opcode().constraints(); + + if !constraints.is_polymorphic() { + return None; + } + + // If the controlling type variable can be inferred from the type of the designated value input + // operand, we don't need the type suffix. + if constraints.use_typevar_operand() { + let ctrl_var = inst_data.typevar_operand(&func.dfg.value_lists).unwrap(); + let def_block = match func.dfg.value_def(ctrl_var) { + ValueDef::Result(instr, _) => func.layout.inst_block(instr), + ValueDef::Param(block, _) => Some(block), + }; + if def_block.is_some() && def_block == func.layout.inst_block(inst) { + return None; + } + } + + let rtype = func.dfg.ctrl_typevar(inst); + assert!( + !rtype.is_invalid(), + "Polymorphic instruction must produce a result" + ); + Some(rtype) +} + +/// Write out any aliases to the given target, including indirect aliases +fn write_value_aliases( + w: &mut dyn Write, + aliases: &SecondaryMap>, + target: Value, + indent: usize, +) -> fmt::Result { + let mut todo_stack = vec![target]; + while let Some(target) = todo_stack.pop() { + for &a in &aliases[target] { + writeln!(w, "{1:0$}{2} -> {3}", indent, "", a, target)?; + todo_stack.push(a); + } + } + + Ok(()) +} + +fn write_instruction( + w: &mut dyn Write, + func: &Function, + aliases: &SecondaryMap>, + isa: Option<&dyn TargetIsa>, + inst: Inst, + indent: usize, +) -> fmt::Result { + // Prefix containing source location, encoding, and value locations. + let mut s = String::with_capacity(16); + + // Source location goes first. + let srcloc = func.srclocs[inst]; + if !srcloc.is_default() { + write!(s, "{} ", srcloc)?; + } + + // Write out encoding info. + if let Some(enc) = func.encodings.get(inst).cloned() { + if let Some(isa) = isa { + write!(s, "[{}", isa.encoding_info().display(enc))?; + // Write value locations, if we have them. + if !func.locations.is_empty() { + let regs = isa.register_info(); + for &r in func.dfg.inst_results(inst) { + write!(s, ",{}", func.locations[r].display(®s))? + } + } + write!(s, "] ")?; + } else { + write!(s, "[{}] ", enc)?; + } + } + + // Write out prefix and indent the instruction. + write!(w, "{1:0$}", indent, s)?; + + // Write out the result values, if any. + let mut has_results = false; + for r in func.dfg.inst_results(inst) { + if !has_results { + has_results = true; + write!(w, "{}", r)?; + } else { + write!(w, ", {}", r)?; + } + } + if has_results { + write!(w, " = ")?; + } + + // Then the opcode, possibly with a '.type' suffix. + let opcode = func.dfg[inst].opcode(); + + match type_suffix(func, inst) { + Some(suf) => write!(w, "{}.{}", opcode, suf)?, + None => write!(w, "{}", opcode)?, + } + + write_operands(w, &func.dfg, isa, inst)?; + writeln!(w)?; + + // Value aliases come out on lines after the instruction defining the referent. + for r in func.dfg.inst_results(inst) { + write_value_aliases(w, aliases, *r, indent)?; + } + Ok(()) +} + +/// Write the operands of `inst` to `w` with a prepended space. +pub fn write_operands( + w: &mut dyn Write, + dfg: &DataFlowGraph, + isa: Option<&dyn TargetIsa>, + inst: Inst, +) -> fmt::Result { + let pool = &dfg.value_lists; + use crate::ir::instructions::InstructionData::*; + match dfg[inst] { + Unary { arg, .. } => write!(w, " {}", arg), + UnaryImm { imm, .. } => write!(w, " {}", imm), + UnaryIeee32 { imm, .. } => write!(w, " {}", imm), + UnaryIeee64 { imm, .. } => write!(w, " {}", imm), + UnaryBool { imm, .. } => write!(w, " {}", imm), + UnaryGlobalValue { global_value, .. } => write!(w, " {}", global_value), + Binary { args, .. } => write!(w, " {}, {}", args[0], args[1]), + BinaryImm { arg, imm, .. } => write!(w, " {}, {}", arg, imm), + Ternary { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]), + MultiAry { ref args, .. } => { + if args.is_empty() { + write!(w, "") + } else { + write!(w, " {}", DisplayValues(args.as_slice(pool))) + } + } + NullAry { .. } => write!(w, " "), + InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]), + ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane), + UnaryConst { + constant_handle, .. + } => { + let constant_data = dfg.constants.get(constant_handle); + write!(w, " {}", constant_data) + } + Shuffle { mask, args, .. } => { + let data = dfg.immediates.get(mask).expect( + "Expected the shuffle mask to already be inserted into the immediates table", + ); + write!(w, " {}, {}, {}", args[0], args[1], data) + } + IntCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]), + IntCompareImm { cond, arg, imm, .. } => write!(w, " {} {}, {}", cond, arg, imm), + IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg), + FloatCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]), + FloatCond { cond, arg, .. } => write!(w, " {} {}", cond, arg), + IntSelect { cond, args, .. } => { + write!(w, " {} {}, {}, {}", cond, args[0], args[1], args[2]) + } + Jump { + destination, + ref args, + .. + } => { + write!(w, " {}", destination)?; + write_block_args(w, args.as_slice(pool)) + } + Branch { + destination, + ref args, + .. + } => { + let args = args.as_slice(pool); + write!(w, " {}, {}", args[0], destination)?; + write_block_args(w, &args[1..]) + } + BranchInt { + cond, + destination, + ref args, + .. + } => { + let args = args.as_slice(pool); + write!(w, " {} {}, {}", cond, args[0], destination)?; + write_block_args(w, &args[1..]) + } + BranchFloat { + cond, + destination, + ref args, + .. + } => { + let args = args.as_slice(pool); + write!(w, " {} {}, {}", cond, args[0], destination)?; + write_block_args(w, &args[1..]) + } + BranchIcmp { + cond, + destination, + ref args, + .. + } => { + let args = args.as_slice(pool); + write!(w, " {} {}, {}, {}", cond, args[0], args[1], destination)?; + write_block_args(w, &args[2..]) + } + BranchTable { + arg, + destination, + table, + .. + } => write!(w, " {}, {}, {}", arg, destination, table), + BranchTableBase { table, .. } => write!(w, " {}", table), + BranchTableEntry { + args, imm, table, .. + } => write!(w, " {}, {}, {}, {}", args[0], args[1], imm, table), + IndirectJump { arg, table, .. } => write!(w, " {}, {}", arg, table), + Call { + func_ref, ref args, .. + } => write!(w, " {}({})", func_ref, DisplayValues(args.as_slice(pool))), + CallIndirect { + sig_ref, ref args, .. + } => { + let args = args.as_slice(pool); + write!( + w, + " {}, {}({})", + sig_ref, + args[0], + DisplayValues(&args[1..]) + ) + } + FuncAddr { func_ref, .. } => write!(w, " {}", func_ref), + StackLoad { + stack_slot, offset, .. + } => write!(w, " {}{}", stack_slot, offset), + StackStore { + arg, + stack_slot, + offset, + .. + } => write!(w, " {}, {}{}", arg, stack_slot, offset), + HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm), + TableAddr { table, arg, .. } => write!(w, " {}, {}", table, arg), + Load { + flags, arg, offset, .. + } => write!(w, "{} {}{}", flags, arg, offset), + LoadComplex { + flags, + ref args, + offset, + .. + } => { + let args = args.as_slice(pool); + write!( + w, + "{} {}{}", + flags, + DisplayValuesWithDelimiter(&args, '+'), + offset + ) + } + Store { + flags, + args, + offset, + .. + } => write!(w, "{} {}, {}{}", flags, args[0], args[1], offset), + StoreComplex { + flags, + ref args, + offset, + .. + } => { + let args = args.as_slice(pool); + write!( + w, + "{} {}, {}{}", + flags, + args[0], + DisplayValuesWithDelimiter(&args[1..], '+'), + offset + ) + } + RegMove { arg, src, dst, .. } => { + if let Some(isa) = isa { + let regs = isa.register_info(); + write!( + w, + " {}, {} -> {}", + arg, + regs.display_regunit(src), + regs.display_regunit(dst) + ) + } else { + write!(w, " {}, %{} -> %{}", arg, src, dst) + } + } + CopySpecial { src, dst, .. } => { + if let Some(isa) = isa { + let regs = isa.register_info(); + write!( + w, + " {} -> {}", + regs.display_regunit(src), + regs.display_regunit(dst) + ) + } else { + write!(w, " %{} -> %{}", src, dst) + } + } + CopyToSsa { src, .. } => { + if let Some(isa) = isa { + let regs = isa.register_info(); + write!(w, " {}", regs.display_regunit(src)) + } else { + write!(w, " %{}", src) + } + } + RegSpill { arg, src, dst, .. } => { + if let Some(isa) = isa { + let regs = isa.register_info(); + write!(w, " {}, {} -> {}", arg, regs.display_regunit(src), dst) + } else { + write!(w, " {}, %{} -> {}", arg, src, dst) + } + } + RegFill { arg, src, dst, .. } => { + if let Some(isa) = isa { + let regs = isa.register_info(); + write!(w, " {}, {} -> {}", arg, src, regs.display_regunit(dst)) + } else { + write!(w, " {}, {} -> %{}", arg, src, dst) + } + } + Trap { code, .. } => write!(w, " {}", code), + CondTrap { arg, code, .. } => write!(w, " {}, {}", arg, code), + IntCondTrap { + cond, arg, code, .. + } => write!(w, " {} {}, {}", cond, arg, code), + FloatCondTrap { + cond, arg, code, .. + } => write!(w, " {} {}, {}", cond, arg, code), + } +} + +/// Write block args using optional parantheses. +fn write_block_args(w: &mut dyn Write, args: &[Value]) -> fmt::Result { + if args.is_empty() { + Ok(()) + } else { + write!(w, "({})", DisplayValues(args)) + } +} + +/// Displayable slice of values. +struct DisplayValues<'a>(&'a [Value]); + +impl<'a> fmt::Display for DisplayValues<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (i, val) in self.0.iter().enumerate() { + if i == 0 { + write!(f, "{}", val)?; + } else { + write!(f, ", {}", val)?; + } + } + Ok(()) + } +} + +struct DisplayValuesWithDelimiter<'a>(&'a [Value], char); + +impl<'a> fmt::Display for DisplayValuesWithDelimiter<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for (i, val) in self.0.iter().enumerate() { + if i == 0 { + write!(f, "{}", val)?; + } else { + write!(f, "{}{}", self.1, val)?; + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, CursorPosition, FuncCursor}; + use crate::ir::types; + use crate::ir::{ExternalName, Function, InstBuilder, StackSlotData, StackSlotKind}; + use alloc::string::ToString; + + #[test] + fn basic() { + let mut f = Function::new(); + assert_eq!(f.to_string(), "function u0:0() fast {\n}\n"); + + f.name = ExternalName::testcase("foo"); + assert_eq!(f.to_string(), "function %foo() fast {\n}\n"); + + f.create_stack_slot(StackSlotData::new(StackSlotKind::ExplicitSlot, 4)); + assert_eq!( + f.to_string(), + "function %foo() fast {\n ss0 = explicit_slot 4\n}\n" + ); + + let block = f.dfg.make_block(); + f.layout.append_block(block); + assert_eq!( + f.to_string(), + "function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0:\n}\n" + ); + + f.dfg.append_block_param(block, types::I8); + assert_eq!( + f.to_string(), + "function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0(v0: i8):\n}\n" + ); + + f.dfg.append_block_param(block, types::F32.by(4).unwrap()); + assert_eq!( + f.to_string(), + "function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0(v0: i8, v1: f32x4):\n}\n" + ); + + { + let mut cursor = FuncCursor::new(&mut f); + cursor.set_position(CursorPosition::After(block)); + cursor.ins().return_(&[]) + }; + assert_eq!( + f.to_string(), + "function %foo() fast {\n ss0 = explicit_slot 4\n\nblock0(v0: i8, v1: f32x4):\n return\n}\n" + ); + } + + #[test] + fn aliases() { + use crate::ir::InstBuilder; + + let mut func = Function::new(); + { + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + + // make some detached values for change_to_alias + let v0 = pos.func.dfg.append_block_param(block0, types::I32); + let v1 = pos.func.dfg.append_block_param(block0, types::I32); + let v2 = pos.func.dfg.append_block_param(block0, types::I32); + pos.func.dfg.detach_block_params(block0); + + // alias to a param--will be printed at beginning of block defining param + let v3 = pos.func.dfg.append_block_param(block0, types::I32); + pos.func.dfg.change_to_alias(v0, v3); + + // alias to an alias--should print attached to alias, not ultimate target + pos.func.dfg.make_value_alias_for_serialization(v0, v2); // v0 <- v2 + + // alias to a result--will be printed after instruction producing result + let _dummy0 = pos.ins().iconst(types::I32, 42); + let v4 = pos.ins().iadd(v0, v0); + pos.func.dfg.change_to_alias(v1, v4); + let _dummy1 = pos.ins().iconst(types::I32, 23); + let _v7 = pos.ins().iadd(v1, v1); + } + assert_eq!( + func.to_string(), + "function u0:0() fast {\nblock0(v3: i32):\n v0 -> v3\n v2 -> v0\n v4 = iconst.i32 42\n v5 = iadd v0, v0\n v1 -> v5\n v6 = iconst.i32 23\n v7 = iadd v1, v1\n}\n" + ); + } +} diff --git a/cranelift/docs/.gitignore b/cranelift/docs/.gitignore new file mode 100644 index 0000000000..e35d8850c9 --- /dev/null +++ b/cranelift/docs/.gitignore @@ -0,0 +1 @@ +_build diff --git a/cranelift/docs/Makefile b/cranelift/docs/Makefile new file mode 100644 index 0000000000..dbb8bd0e2c --- /dev/null +++ b/cranelift/docs/Makefile @@ -0,0 +1,24 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXABUILD = sphinx-autobuild +SPHINXPROJ = cranelift +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +autohtml: html + $(SPHINXABUILD) -z ../cranelift-codegen/meta-python --ignore '.*' -b html -E $(ALLSPHINXOPTS) $(BUILDDIR)/html + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/cranelift/docs/callex.clif b/cranelift/docs/callex.clif new file mode 100644 index 0000000000..5b3ba2fe3d --- /dev/null +++ b/cranelift/docs/callex.clif @@ -0,0 +1,16 @@ +test verifier + +function %gcd(i32 uext, i32 uext) -> i32 uext system_v { + fn0 = %divmod(i32 uext, i32 uext) -> i32 uext, i32 uext + +block1(v0: i32, v1: i32): + brz v1, block3 + jump block2 + +block2: + v2, v3 = call fn0(v0, v1) + return v2 + +block3: + return v0 +} diff --git a/cranelift/docs/clif_lexer.py b/cranelift/docs/clif_lexer.py new file mode 100644 index 0000000000..2bcde35348 --- /dev/null +++ b/cranelift/docs/clif_lexer.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# +# Pygments lexer for Cranelift. +from __future__ import absolute_import + +from pygments.lexer import RegexLexer, bygroups, words +from pygments.token import Comment, String, Keyword, Whitespace, Number, Name +from pygments.token import Operator, Punctuation, Text + + +def keywords(*args): + return words(args, prefix=r'\b', suffix=r'\b') + + +class CraneliftLexer(RegexLexer): + name = 'Cranelift' + aliases = ['clif'] + filenames = ['*.clif'] + + tokens = { + 'root': [ + # Test header lines. + (r'^(test|isa|set)(?:( +)([-\w]+)' + + r'(?:(=)(?:(\d+)|(yes|no|true|false|on|off)|(\w+)))?)*' + + r'( *)$', + bygroups(Keyword.Namespace, Whitespace, Name.Attribute, + Operator, Number.Integer, Keyword.Constant, + Name.Constant, Whitespace)), + # Comments with filecheck or other test directive. + (r'(; *)([a-z]+:)(.*?)$', + bygroups(Comment.Single, Comment.Special, Comment.Single)), + # Plain comments. + (r';.*?$', Comment.Single), + # Strings are prefixed by % or # with hex. + (r'%\w+|#[0-9a-fA-F]*', String), + # Numbers. + (r'[-+]?0[xX][0-9a-fA-F_]+', Number.Hex), + (r'[-+]?0[xX][0-9a-fA-F_]*\.[0-9a-fA-F_]*([pP]\d+)?', Number.Hex), + (r'[-+]?([0-9_]+\.[0-9_]+([eE]\d+)?|s?NaN|Inf)', Number.Float), + (r'[-+]?[0-9_]+', Number.Integer), + # Known attributes. + (keywords('uext', 'sext'), Name.Attribute), + # Well known value types. + (r'\b(b\d+|i\d+|f32|f64)(x\d+)?\b', Keyword.Type), + # v = value + # ss = stack slot + # jt = jump table + (r'(v|ss|gv|jt|fn|sig|heap)\d+', Name.Variable), + # ebb = extended basic block + (r'(ebb)\d+', Name.Label), + # Match instruction names in context. + (r'(=)( *)([a-z]\w*)', + bygroups(Operator, Whitespace, Name.Function)), + (r'^( *)([a-z]\w*\b)(?! *[,=])', + bygroups(Whitespace, Name.Function)), + # Other names: results and arguments + (r'[a-z]\w*', Name), + (r'->|=|:', Operator), + (r'[{}(),.]', Punctuation), + (r'[ \t]+', Text), + ], + } + + +def setup(app): + """Setup Sphinx extension.""" + app.add_lexer('clif', CraneliftLexer()) + + return {'version': '0.1'} diff --git a/cranelift/docs/compare-llvm.rst b/cranelift/docs/compare-llvm.rst new file mode 100644 index 0000000000..ad0e71eaf4 --- /dev/null +++ b/cranelift/docs/compare-llvm.rst @@ -0,0 +1,210 @@ +************************** +Cranelift compared to LLVM +************************** + +`LLVM `_ is a collection of compiler components implemented as +a set of C++ libraries. It can be used to build both JIT compilers and static +compilers like `Clang `_, and it is deservedly very +popular. `Chris Lattner's chapter about LLVM +`_ in the `Architecture of Open Source +Applications `_ book gives an excellent +overview of the architecture and design of LLVM. + +Cranelift and LLVM are superficially similar projects, so it is worth +highlighting some of the differences and similarities. Both projects: + +- Use an ISA-agnostic input language in order to mostly abstract away the + differences between target instruction set architectures. +- Depend extensively on SSA form. +- Have both textual and in-memory forms of their primary intermediate + representation. (LLVM also has a binary bitcode format; Cranelift doesn't.) +- Can target multiple ISAs. +- Can cross-compile by default without rebuilding the code generator. + +However, there are also some major differences, described in the following sections. + +Intermediate representations +============================ + +LLVM uses multiple intermediate representations as it translates a program to +binary machine code: + +`LLVM IR `_ + This is the primary intermediate representation which has textual, binary, and + in-memory forms. It serves two main purposes: + + - An ISA-agnostic, stable(ish) input language that front ends can generate + easily. + - Intermediate representation for common mid-level optimizations. A large + library of code analysis and transformation passes operate on LLVM IR. + +`SelectionDAG `_ + A graph-based representation of the code in a single basic block is used by + the instruction selector. It has both ISA-agnostic and ISA-specific + opcodes. These main passes are run on the SelectionDAG representation: + + - Type legalization eliminates all value types that don't have a + representation in the target ISA registers. + - Operation legalization eliminates all opcodes that can't be mapped to + target ISA instructions. + - DAG-combine cleans up redundant code after the legalization passes. + - Instruction selection translates ISA-agnostic expressions to ISA-specific + instructions. + + The SelectionDAG representation automatically eliminates common + subexpressions and dead code. + +`MachineInstr `_ + A linear representation of ISA-specific instructions that initially is in + SSA form, but it can also represent non-SSA form during and after register + allocation. Many low-level optimizations run on MI code. The most important + passes are: + + - Scheduling. + - Register allocation. + +`MC `_ + MC serves as the output abstraction layer and is the basis for LLVM's + integrated assembler. It is used for: + + - Branch relaxation. + - Emitting assembly or binary object code. + - Assemblers. + - Disassemblers. + +There is an ongoing "global instruction selection" project to replace the +SelectionDAG representation with ISA-agnostic opcodes on the MachineInstr +representation. Some target ISAs have a fast instruction selector that can +translate simple code directly to MachineInstrs, bypassing SelectionDAG when +possible. + +:doc:`Cranelift ` uses a single intermediate representation to cover +these levels of abstraction. This is possible in part because of Cranelift's +smaller scope. + +- Cranelift does not provide assemblers and disassemblers, so it is not + necessary to be able to represent every weird instruction in an ISA. Only + those instructions that the code generator emits have a representation. +- Cranelift's opcodes are ISA-agnostic, but after legalization / instruction + selection, each instruction is annotated with an ISA-specific encoding which + represents a native instruction. +- SSA form is preserved throughout. After register allocation, each SSA value + is annotated with an assigned ISA register or stack slot. + +The Cranelift intermediate representation is similar to LLVM IR, but at a slightly +lower level of abstraction, to allow it to be used all the way through the +codegen process. + +This design tradeoff does mean that Cranelift IR is less friendly for mid-level +optimizations. Cranelift doesn't currently perform mid-level optimizations, +however if it should grow to where this becomes important, the vision is that +Cranelift would add a separate IR layer, or possibly an separate IR, to support +this. Instead of frontends producing optimizer IR which is then translated to +codegen IR, Cranelift would have frontends producing codegen IR, which can be +translated to optimizer IR and back. + +This biases the overall system towards fast compilation when mid-level +optimization is not needed, such as when emitting unoptimized code for or when +low-level optimizations are sufficient. + +And, it removes some constraints in the mid-level optimize IR design space, +making it more feasible to consider ideas such as using a +`VSDG-based IR `_. + +Program structure +----------------- + +In LLVM IR, the largest representable unit is the *module* which corresponds +more or less to a C translation unit. It is a collection of functions and +global variables that may contain references to external symbols too. + +In `Cranelift's IR `_, +used by the `cranelift-codegen `_ crate, +functions are self-contained, allowing them to be compiled independently. At +this level, there is no explicit module that contains the functions. + +Module functionality in Cranelift is provided as an optional library layer, in +the `cranelift-module `_ crate. It provides +facilities for working with modules, which can contain multiple functions as +well as data objects, and it links them together. + +An LLVM IR function is a graph of *basic blocks*. A Cranelift IR function is a +graph of *extended basic blocks* that may contain internal branch instructions. +The main difference is that an LLVM conditional branch instruction has two +target basic blocks---a true and a false edge. A Cranelift branch instruction +only has a single target and falls through to the next instruction when its +condition is false. The Cranelift representation is closer to how machine code +works; LLVM's representation is more abstract. + +LLVM uses `phi instructions +`_ in its SSA +representation. Cranelift passes arguments to EBBs instead. The two +representations are equivalent, but the EBB arguments are better suited to +handle EBBs that may contain multiple branches to the same destination block +with different arguments. Passing arguments to an EBB looks a lot like passing +arguments to a function call, and the register allocator treats them very +similarly. Arguments are assigned to registers or stack locations. + +Value types +----------- + +:ref:`Cranelift's type system ` is mostly a subset of LLVM's type +system. It is less abstract and closer to the types that common ISA registers +can hold. + +- Integer types are limited to powers of two from :clif:type:`i8` to + :clif:type:`i64`. LLVM can represent integer types of arbitrary bit width. +- Floating point types are limited to :clif:type:`f32` and :clif:type:`f64` + which is what WebAssembly provides. It is possible that 16-bit and 128-bit + types will be added in the future. +- Addresses are represented as integers---There are no Cranelift pointer types. + LLVM currently has rich pointer types that include the pointee type. It may + move to a simpler 'address' type in the future. Cranelift may add a single + address type too. +- SIMD vector types are limited to a power-of-two number of vector lanes up to + 256. LLVM allows an arbitrary number of SIMD lanes. +- Cranelift has no aggregate types. LLVM has named and anonymous struct types as + well as array types. + +Cranelift has multiple boolean types, whereas LLVM simply uses `i1`. The sized +Cranelift boolean types are used to represent SIMD vector masks like ``b32x4`` +where each lane is either all 0 or all 1 bits. + +Cranelift instructions and function calls can return multiple result values. LLVM +instead models this by returning a single value of an aggregate type. + +Instruction set +--------------- + +LLVM has a small well-defined basic instruction set and a large number of +intrinsics, some of which are ISA-specific. Cranelift has a larger instruction +set and no intrinsics. Some Cranelift instructions are ISA-specific. + +Since Cranelift instructions are used all the way until the binary machine code +is emitted, there are opcodes for every native instruction that can be +generated. There is a lot of overlap between different ISAs, so for example the +:clif:inst:`iadd_imm` instruction is used by every ISA that can add an +immediate integer to a register. A simple RISC ISA like RISC-V can be defined +with only shared instructions, while x86 needs a number of specific +instructions to model addressing modes. + +Undefined behavior +================== + +Cranelift does not generally exploit undefined behavior in its optimizations. +LLVM's mid-level optimizations do, but it should be noted that LLVM's low-level code +generator rarely needs to make use of undefined behavior either. + +LLVM provides ``nsw`` and ``nuw`` flags for its arithmetic that invoke +undefined behavior on overflow. Cranelift does not provide this functionality. +Its arithmetic instructions either produce a value or a trap. + +LLVM has an ``unreachable`` instruction which is used to indicate impossible +code paths. Cranelift only has an explicit :clif:inst:`trap` instruction. + +Cranelift does make assumptions about aliasing. For example, it assumes that it +has full control of the stack objects in a function, and that they can only be +modified by function calls if their address have escaped. It is quite likely +that Cranelift will admit more detailed aliasing annotations on load/store +instructions in the future. When these annotations are incorrect, undefined +behavior ensues. diff --git a/cranelift/docs/conf.py b/cranelift/docs/conf.py new file mode 100644 index 0000000000..05be11238a --- /dev/null +++ b/cranelift/docs/conf.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +# +# cranelift documentation build configuration file, created by +# sphinx-quickstart on Fri Mar 2 12:49:24 2018. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +from __future__ import absolute_import +import os +import sys +sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# We don't support Sphinx versions before 1.4 since the format of index +# tuples has changed. +needs_sphinx = '1.4' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.todo', + 'sphinx.ext.mathjax', + 'sphinx.ext.ifconfig', + 'sphinx.ext.graphviz', + 'sphinx.ext.inheritance_diagram', + 'clif_lexer', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'cranelift' +copyright = u'2019, Cranelift Developers' +author = u'Cranelift Developers' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = u'0.0' +# The full version, including alpha/beta/rc tags. +release = u'0.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# +# html_static_path = ['_static'] + + +# -- Options for HTMLHelp output ------------------------------------------ + +# Output file base name for HTML help builder. +htmlhelp_basename = 'craneliftdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'cranelift.tex', u'cranelift Documentation', + author, 'manual'), +] + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'cranelift', u'cranelift Documentation', + [author], 1) +] + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'cranelift', u'cranelift Documentation', + author, 'cranelift', 'One line description of project.', + 'Miscellaneous'), +] + + +# -- Options for Graphviz ------------------------------------------------- + +graphviz_output_format = 'svg' + +inheritance_graph_attrs = dict(rankdir='TD') diff --git a/cranelift/docs/example.c b/cranelift/docs/example.c new file mode 100644 index 0000000000..0523123301 --- /dev/null +++ b/cranelift/docs/example.c @@ -0,0 +1,8 @@ +float +average(const float *array, size_t count) +{ + double sum = 0; + for (size_t i = 0; i < count; i++) + sum += array[i]; + return sum / count; +} diff --git a/cranelift/docs/example.clif b/cranelift/docs/example.clif new file mode 100644 index 0000000000..a465c85d0b --- /dev/null +++ b/cranelift/docs/example.clif @@ -0,0 +1,39 @@ +test verifier + +function %average(i32, i32) -> f32 system_v { + ss0 = explicit_slot 8 ; Stack slot for ``sum``. + +block1(v0: i32, v1: i32): + v2 = f64const 0x0.0 + stack_store v2, ss0 + brz v1, block5 ; Handle count == 0. + jump block2 + +block2: + v3 = iconst.i32 0 + jump block3(v3) + +block3(v4: i32): + v5 = imul_imm v4, 4 + v6 = iadd v0, v5 + v7 = load.f32 v6 ; array[i] + v8 = fpromote.f64 v7 + v9 = stack_load.f64 ss0 + v10 = fadd v8, v9 + stack_store v10, ss0 + v11 = iadd_imm v4, 1 + v12 = icmp ult v11, v1 + brnz v12, block3(v11) ; Loop backedge. + jump block4 + +block4: + v13 = stack_load.f64 ss0 + v14 = fcvt_from_uint.f64 v1 + v15 = fdiv v13, v14 + v16 = fdemote.f32 v15 + return v16 + +block5: + v100 = f32const +NaN + return v100 +} diff --git a/cranelift/docs/heapex-dyn.clif b/cranelift/docs/heapex-dyn.clif new file mode 100644 index 0000000000..161bb4887a --- /dev/null +++ b/cranelift/docs/heapex-dyn.clif @@ -0,0 +1,16 @@ +test verifier + +function %add_members(i32, i64 vmctx) -> f32 baldrdash_system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+64 + gv2 = load.i32 notrap aligned gv0+72 + heap0 = dynamic gv1, min 0x1000, bound gv2, offset_guard 0 + +block0(v0: i32, v6: i64): + v1 = heap_addr.i64 heap0, v0, 20 + v2 = load.f32 v1+16 + v3 = heap_addr.i64 heap0, v0, 24 + v4 = load.f32 v3+20 + v5 = fadd v2, v4 + return v5 +} diff --git a/cranelift/docs/heapex-sm32.clif b/cranelift/docs/heapex-sm32.clif new file mode 100644 index 0000000000..ce8fffb914 --- /dev/null +++ b/cranelift/docs/heapex-sm32.clif @@ -0,0 +1,15 @@ +test verifier + +function %add_members(i32, i32 vmctx) -> f32 baldrdash_system_v { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0+64 + heap0 = static gv1, min 0x1000, bound 0x10_0000, offset_guard 0x1000 + +block0(v0: i32, v5: i32): + v1 = heap_addr.i32 heap0, v0, 1 + v2 = load.f32 v1+16 + v3 = load.f32 v1+20 + v4 = fadd v2, v3 + return v4 +} + diff --git a/cranelift/docs/heapex-sm64.clif b/cranelift/docs/heapex-sm64.clif new file mode 100644 index 0000000000..89c2df841b --- /dev/null +++ b/cranelift/docs/heapex-sm64.clif @@ -0,0 +1,14 @@ +test verifier + +function %add_members(i32, i64 vmctx) -> f32 baldrdash_system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+64 + heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v5: i64): + v1 = heap_addr.i64 heap0, v0, 1 + v2 = load.f32 v1+16 + v3 = load.f32 v1+20 + v4 = fadd v2, v3 + return v4 +} diff --git a/cranelift/docs/index.rst b/cranelift/docs/index.rst new file mode 100644 index 0000000000..457cd3bacf --- /dev/null +++ b/cranelift/docs/index.rst @@ -0,0 +1,73 @@ +Cranelift Code Generator +======================== + +Contents: + +.. toctree:: + :maxdepth: 1 + + ir + meta + testing + regalloc + compare-llvm + +Rust Crate Documentation +======================== + +`cranelift `_ + This is an umbrella crate that re-exports the codegen and frontend crates, + to make them easier to use. + +`cranelift-codegen `_ + This is the core code generator crate. It takes Cranelift IR as input + and emits encoded machine instructions, along with symbolic relocations, + as output. + +`cranelift-codegen-meta `_ + This crate contains the meta-language utilities and descriptions used by the + code generator. + +`cranelift-wasm `_ + This crate translates WebAssembly code into Cranelift IR. + +`cranelift-frontend `_ + This crate provides utilities for translating code into Cranelift IR. + +`cranelift-native `_ + This crate performs auto-detection of the host, allowing Cranelift to + generate code optimized for the machine it's running on. + +`cranelift-reader `_ + This crate translates from Cranelift IR's text format into Cranelift IR + in in-memory data structures. + +`cranelift-module `_ + This crate manages compiling multiple functions and data objects + together. + +`cranelift-object `_ + This crate provides a object-based backend for `cranelift-module`, which + emits native object files using the + `object `_ library. + +`cranelift-faerie `_ + This crate provides a faerie-based backend for `cranelift-module`, which + emits native object files using the + `faerie `_ library. + +`cranelift-simplejit `_ + This crate provides a simple JIT backend for `cranelift-module`, which + emits code and data into memory. + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + +Todo list +========= + +.. todolist:: diff --git a/cranelift/docs/ir.rst b/cranelift/docs/ir.rst new file mode 100644 index 0000000000..dca2399991 --- /dev/null +++ b/cranelift/docs/ir.rst @@ -0,0 +1,1028 @@ +********************** +Cranelift IR Reference +********************** + +.. default-domain:: clif +.. highlight:: clif + +.. todo:: Update the IR reference + + This document is likely to be outdated and missing some important + information. It is recommended to look at the list of instructions as + documented in the `InstBuilder` documentation: + https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html + +The Cranelift intermediate representation (:term:`IR`) has two primary forms: +an *in-memory data structure* that the code generator library is using, and a +*text format* which is used for test cases and debug output. +Files containing Cranelift textual IR have the ``.clif`` filename extension. + +This reference uses the text format to describe IR semantics but glosses over +the finer details of the lexical and syntactic structure of the format. + + +Overall structure +================= + +Cranelift compiles functions independently. A ``.clif`` IR file may contain +multiple functions, and the programmatic API can create multiple function +handles at the same time, but the functions don't share any data or reference +each other directly. + +This is a simple C function that computes the average of an array of floats: + +.. literalinclude:: example.c + :language: c + +Here is the same function compiled into Cranelift IR: + +.. literalinclude:: example.clif + :language: clif + :lines: 2- + +The first line of a function definition provides the function *name* and +the :term:`function signature` which declares the parameter and return types. +Then follows the :term:`function preamble` which declares a number of entities +that can be referenced inside the function. In the example above, the preamble +declares a single explicit stack slot, ``ss0``. + +After the preamble follows the :term:`function body` which consists of +:term:`extended basic block`\s (EBBs), the first of which is the +:term:`entry block`. Every EBB ends with a :term:`terminator instruction`, so +execution can never fall through to the next EBB without an explicit branch. + +A ``.clif`` file consists of a sequence of independent function definitions: + +.. productionlist:: + function_list : { function } + function : "function" function_name signature "{" preamble function_body "}" + preamble : { preamble_decl } + function_body : { extended_basic_block } + +Static single assignment form +----------------------------- + +The instructions in the function body use and produce *values* in SSA form. This +means that every value is defined exactly once, and every use of a value must be +dominated by the definition. + +Cranelift does not have phi instructions but uses :term:`EBB parameter`\s +instead. An EBB can be defined with a list of typed parameters. Whenever control +is transferred to the EBB, argument values for the parameters must be provided. +When entering a function, the incoming function parameters are passed as +arguments to the entry EBB's parameters. + +Instructions define zero, one, or more result values. All SSA values are either +EBB parameters or instruction results. + +In the example above, the loop induction variable ``i`` is represented as three +SSA values: In the entry block, ``v4`` is the initial value. In the loop block +``ebb2``, the EBB parameter ``v5`` represents the value of the induction +variable during each iteration. Finally, ``v12`` is computed as the induction +variable value for the next iteration. + +The `cranelift_frontend` crate contains utilities for translating from programs +containing multiple assignments to the same variables into SSA form for +Cranelift :term:`IR`. + +Such variables can also be presented to Cranelift as :term:`stack slot`\s. +Stack slots are accessed with the `stack_store` and `stack_load` instructions, +and can have their address taken with `stack_addr`, which supports C-like +programming languages where local variables can have their address taken. + +.. _value-types: + +Value types +=========== + +All SSA values have a type which determines the size and shape (for SIMD +vectors) of the value. Many instructions are polymorphic -- they can operate on +different types. + +Boolean types +------------- + +Boolean values are either true or false. + +The `b1` type represents an abstract boolean value. It can only exist as +an SSA value, and can't be directly stored in memory. It can, however, be +converted into an integer with value 0 or 1 by the `bint` instruction (and +converted back with `icmp_imm` with 0). + +Several larger boolean types are also defined, primarily to be used as SIMD +element types. They can be stored in memory, and are represented as either all +zero bits or all one bits. + +- b1 +- b8 +- b16 +- b32 +- b64 + +Integer types +------------- + +Integer values have a fixed size and can be interpreted as either signed or +unsigned. Some instructions will interpret an operand as a signed or unsigned +number, others don't care. + +The support for i8 and i16 arithmetic is incomplete and use could lead to bugs. + +- i8 +- i16 +- i32 +- i64 + +Floating point types +-------------------- + +The floating point types have the IEEE 754 semantics that are supported by most +hardware, except that non-default rounding modes, unmasked exceptions, and +exception flags are not currently supported. + +There is currently no support for higher-precision types like quad-precision, +double-double, or extended-precision, nor for narrower-precision types like +half-precision. + +NaNs are encoded following the IEEE 754-2008 recommendation, with quiet NaN +being encoded with the MSB of the trailing significand set to 1, and signaling +NaNs being indicated by the MSB of the trailing significand set to 0. + +Except for bitwise and memory instructions, NaNs returned from arithmetic +instructions are encoded as follows: + +- If all NaN inputs to an instruction are quiet NaNs with all bits of the + trailing significand other than the MSB set to 0, the result is a quiet + NaN with a nondeterministic sign bit and all bits of the trailing + significand other than the MSB set to 0. +- Otherwise the result is a quiet NaN with a nondeterministic sign bit + and all bits of the trailing significand other than the MSB set to + nondeterministic values. + +- f32 +- f64 + +CPU flags types +--------------- + +Some target ISAs use CPU flags to represent the result of a comparison. These +CPU flags are represented as two value types depending on the type of values +compared. + +Since some ISAs don't have CPU flags, these value types should not be used +until the legalization phase of compilation where the code is adapted to fit +the target ISA. Use instructions like `icmp` instead. + +The CPU flags types are also restricted such that two flags values can not be +live at the same time. After legalization, some instruction encodings will +clobber the flags, and flags values are not allowed to be live across such +instructions either. The verifier enforces these rules. + +- iflags +- fflags + +SIMD vector types +----------------- + +A SIMD vector type represents a vector of values from one of the scalar types +(boolean, integer, and floating point). Each scalar value in a SIMD type is +called a *lane*. The number of lanes must be a power of two in the range 2-256. + +i%Bx%N + A SIMD vector of integers. The lane type `iB` is one of the integer + types `i8` ... `i64`. + + Some concrete integer vector types are `i32x4`, `i64x8`, and + `i16x4`. + + The size of a SIMD integer vector in memory is :math:`N B\over 8` bytes. + +f32x%N + A SIMD vector of single precision floating point numbers. + + Some concrete `f32` vector types are: `f32x2`, `f32x4`, + and `f32x8`. + + The size of a `f32` vector in memory is :math:`4N` bytes. + +f64x%N + A SIMD vector of double precision floating point numbers. + + Some concrete `f64` vector types are: `f64x2`, `f64x4`, + and `f64x8`. + + The size of a `f64` vector in memory is :math:`8N` bytes. + +b1x%N + A boolean SIMD vector. + + Boolean vectors are used when comparing SIMD vectors. For example, + comparing two `i32x4` values would produce a `b1x4` result. + + Like the `b1` type, a boolean vector cannot be stored in memory. + +Pseudo-types and type classes +----------------------------- + +These are not concrete types, but convenient names used to refer to real types +in this reference. + +iAddr + A Pointer-sized integer representing an address. + + This is either `i32`, or `i64`, depending on whether the target + platform has 32-bit or 64-bit pointers. + +iB + Any of the scalar integer types `i8` -- `i64`. + +Int + Any scalar *or vector* integer type: `iB` or `iBxN`. + +fB + Either of the floating point scalar types: `f32` or `f64`. + +Float + Any scalar *or vector* floating point type: `fB` or `fBxN`. + +%Tx%N + Any SIMD vector type. + +Mem + Any type that can be stored in memory: `Int` or `Float`. + +Testable + Either `b1` or `iN`. + +Immediate operand types +----------------------- + +These types are not part of the normal SSA type system. They are used to +indicate the different kinds of immediate operands on an instruction. + +imm64 + A 64-bit immediate integer. The value of this operand is interpreted as a + signed two's complement integer. Instruction encodings may limit the valid + range. + + In the textual format, `imm64` immediates appear as decimal or + hexadecimal literals using the same syntax as C. + +offset32 + A signed 32-bit immediate address offset. + + In the textual format, `offset32` immediates always have an explicit + sign, and a 0 offset may be omitted. + +ieee32 + A 32-bit immediate floating point number in the IEEE 754-2008 binary32 + interchange format. All bit patterns are allowed. + +ieee64 + A 64-bit immediate floating point number in the IEEE 754-2008 binary64 + interchange format. All bit patterns are allowed. + +bool + A boolean immediate value, either false or true. + + In the textual format, `bool` immediates appear as 'false' + and 'true'. + +intcc + An integer condition code. See the `icmp` instruction for details. + +floatcc + A floating point condition code. See the `fcmp` instruction for details. + +The two IEEE floating point immediate types `ieee32` and `ieee64` +are displayed as hexadecimal floating point literals in the textual :term:`IR` +format. Decimal floating point literals are not allowed because some computer +systems can round differently when converting to binary. The hexadecimal +floating point format is mostly the same as the one used by C99, but extended +to represent all NaN bit patterns: + +Normal numbers + Compatible with C99: ``-0x1.Tpe`` where ``T`` are the trailing + significand bits encoded as hexadecimal, and ``e`` is the unbiased exponent + as a decimal number. `ieee32` has 23 trailing significand bits. They + are padded with an extra LSB to produce 6 hexadecimal digits. This is not + necessary for `ieee64` which has 52 trailing significand bits + forming 13 hexadecimal digits with no padding. + +Zeros + Positive and negative zero are displayed as ``0.0`` and ``-0.0`` respectively. + +Subnormal numbers + Compatible with C99: ``-0x0.Tpemin`` where ``T`` are the trailing + significand bits encoded as hexadecimal, and ``emin`` is the minimum exponent + as a decimal number. + +Infinities + Either ``-Inf`` or ``Inf``. + +Quiet NaNs + Quiet NaNs have the MSB of the trailing significand set. If the remaining + bits of the trailing significand are all zero, the value is displayed as + ``-NaN`` or ``NaN``. Otherwise, ``-NaN:0xT`` where ``T`` are the trailing + significand bits encoded as hexadecimal. + +Signaling NaNs + Displayed as ``-sNaN:0xT``. + + +Control flow +============ + +Branches transfer control to a new EBB and provide values for the target EBB's +arguments, if it has any. Conditional branches only take the branch if their +condition is satisfied, otherwise execution continues at the following +instruction in the EBB. + +JT = jump_table [EBB0, EBB1, ..., EBBn] + Declare a jump table in the :term:`function preamble`. + + This declares a jump table for use by the `br_table` indirect branch + instruction. Entries in the table are EBB names. + + The EBBs listed must belong to the current function, and they can't have + any arguments. + + :arg EBB0: Target EBB when ``x = 0``. + :arg EBB1: Target EBB when ``x = 1``. + :arg EBBn: Target EBB when ``x = n``. + :result: A jump table identifier. (Not an SSA value). + +Traps stop the program because something went wrong. The exact behavior depends +on the target instruction set architecture and operating system. There are +explicit trap instructions defined below, but some instructions may also cause +traps for certain input value. For example, `udiv` traps when the divisor +is zero. + + +Function calls +============== + +A function call needs a target function and a :term:`function signature`. The +target function may be determined dynamically at runtime, but the signature must +be known when the function call is compiled. The function signature describes +how to call the function, including parameters, return values, and the calling +convention: + +.. productionlist:: + signature : "(" [paramlist] ")" ["->" retlist] [call_conv] + paramlist : param { "," param } + retlist : paramlist + param : type [paramext] [paramspecial] + paramext : "uext" | "sext" + paramspecial : "sret" | "link" | "fp" | "csr" | "vmctx" | "sigid" | "stack_limit" + callconv : "fast" | "cold" | "system_v" | "fastcall" | "baldrdash_system_v" | "baldrdash_windows" + +A function's calling convention determines exactly how arguments and return +values are passed, and how stack frames are managed. Since all of these details +depend on both the instruction set /// architecture and possibly the operating +system, a function's calling convention is only fully determined by a +`(TargetIsa, CallConv)` tuple. + +=========== =========================================== +Name Description +=========== =========================================== +sret pointer to a return value in memory +link return address +fp the initial value of the frame pointer +csr callee-saved register +vmctx VM context pointer, which may contain pointers to heaps etc. +sigid signature id, for checking caller/callee signature compatibility +stack_limit limit value for the size of the stack +=========== =========================================== + +========== =========================================== +Name Description +========== =========================================== +fast not-ABI-stable convention for best performance +cold not-ABI-stable convention for infrequently executed code +system_v System V-style convention used on many platforms +fastcall Windows "fastcall" convention, also used for x64 and ARM +baldrdash_system_v SpiderMonkey WebAssembly convention on platforms natively using SystemV. +baldrdash_windows SpiderMonkey WebAssembly convention on platforms natively using Windows. +========== =========================================== + +The "not-ABI-stable" conventions do not follow an external specification and +may change between versions of Cranelift. + +The "fastcall" convention is not yet implemented. + +Parameters and return values have flags whose meaning is mostly target +dependent. These flags support interfacing with code produced by other +compilers. + +Functions that are called directly must be declared in the :term:`function +preamble`: + +FN = [colocated] NAME signature + Declare a function so it can be called directly. + + If the colocated keyword is present, the symbol's definition will be + defined along with the current function, such that it can use more + efficient addressing. + + :arg NAME: Name of the function, passed to the linker for resolution. + :arg signature: Function signature. See below. + :result FN: A function identifier that can be used with `call`. + +This simple example illustrates direct function calls and signatures: + +.. literalinclude:: callex.clif + :language: clif + :lines: 3- + +Indirect function calls use a signature declared in the preamble. + +.. _memory: + +Memory +====== + +Cranelift provides fully general `load` and `store` instructions for accessing +memory, as well as :ref:`extending loads and truncating stores +`. + +If the memory at the given address is not :term:`addressable`, the behavior of +these instructions is undefined. If it is addressable but not +:term:`accessible`, they :term:`trap`. + +There are also more restricted operations for accessing specific types of memory +objects. + +Additionally, instructions are provided for handling multi-register addressing. + +Memory operation flags +---------------------- + +Loads and stores can have flags that loosen their semantics in order to enable +optimizations. + +======== =========================================== +Flag Description +======== =========================================== +notrap Memory is assumed to be :term:`accessible`. +aligned Trapping allowed for misaligned accesses. +readonly The data at the specified address will not + modified between when this function is + called and exited. +======== =========================================== + +When the ``accessible`` flag is set, the behavior is undefined if the memory +is not :term:`accessible`. + +Loads and stores are *misaligned* if the resultant address is not a multiple of +the expected alignment. By default, misaligned loads and stores are allowed, +but when the ``aligned`` flag is set, a misaligned memory access is allowed to +:term:`trap`. + +Explicit Stack Slots +-------------------- + +One set of restricted memory operations access the current function's stack +frame. The stack frame is divided into fixed-size stack slots that are +allocated in the :term:`function preamble`. Stack slots are not typed, they +simply represent a contiguous sequence of :term:`accessible` bytes in the stack +frame. + +SS = explicit_slot Bytes, Flags... + Allocate a stack slot in the preamble. + + If no alignment is specified, Cranelift will pick an appropriate alignment + for the stack slot based on its size and access patterns. + + :arg Bytes: Stack slot size on bytes. + :flag align(N): Request at least N bytes alignment. + :result SS: Stack slot index. + +The dedicated stack access instructions are easy for the compiler to reason +about because stack slots and offsets are fixed at compile time. For example, +the alignment of these stack memory accesses can be inferred from the offsets +and stack slot alignments. + +It's also possible to obtain the address of a stack slot, which can be used +in :ref:`unrestricted loads and stores `. + +The `stack_addr` instruction can be used to macro-expand the stack access +instructions before instruction selection:: + + v0 = stack_load.f64 ss3, 16 + ; Expands to: + v1 = stack_addr ss3, 16 + v0 = load.f64 v1 + +When Cranelift code is running in a sandbox, it can also be necessary to include +stack overflow checks in the prologue. + +Global values +------------- + +A *global value* is an object whose value is not known at compile time. The +value is computed at runtime by `global_value`, possibly using +information provided by the linker via relocations. There are multiple +kinds of global values using different methods for determining their value. +Cranelift does not track the type of a global value, for they are just +values stored in non-stack memory. + +When Cranelift is generating code for a virtual machine environment, globals can +be used to access data structures in the VM's runtime. This requires functions +to have access to a *VM context pointer* which is used as the base address. +Typically, the VM context pointer is passed as a hidden function argument to +Cranelift functions. + +Chains of global value expressions are possible, but cycles are not allowed. +They will be caught by the IR verifier. + +GV = vmctx + Declare a global value of the address of the VM context struct. + + This declares a global value which is the VM context pointer which may + be passed as a hidden argument to functions JIT-compiled for a VM. + + Typically, the VM context is a `#[repr(C, packed)]` struct. + + :result GV: Global value. + +A global value can also be derived by treating another global variable as a +struct pointer and loading from one of its fields. This makes it possible to +chase pointers into VM runtime data structures. + +GV = load.Type BaseGV [Offset] + Declare a global value pointed to by BaseGV plus Offset, with type Type. + + It is assumed the BaseGV plus Offset resides in accessible memory with the + appropriate alignment for storing a value with type Type. + + :arg BaseGV: Global value providing the base pointer. + :arg Offset: Offset added to the base before loading. + :result GV: Global value. + +GV = iadd_imm BaseGV, Offset + Declare a global value which has the value of BaseGV offset by Offset. + + :arg BaseGV: Global value providing the base value. + :arg Offset: Offset added to the base value. + +GV = [colocated] symbol Name + Declare a symbolic address global value. + + The value of GV is symbolic and will be assigned a relocation, so that + it can be resolved by a later linking phase. + + If the colocated keyword is present, the symbol's definition will be + defined along with the current function, such that it can use more + efficient addressing. + + :arg Name: External name. + :result GV: Global value. + +Heaps +----- + +Code compiled from WebAssembly or asm.js runs in a sandbox where it can't access +all process memory. Instead, it is given a small set of memory areas to work +in, and all accesses are bounds checked. Cranelift models this through the +concept of *heaps*. + +A heap is declared in the function preamble and can be accessed with the +`heap_addr` instruction that :term:`traps` on out-of-bounds accesses or +returns a pointer that is guaranteed to trap. Heap addresses can be smaller than +the native pointer size, for example unsigned `i32` offsets on a 64-bit +architecture. + +.. digraph:: static + :align: center + :caption: Heap address space layout + + node [ + shape=record, + fontsize=10, + fontname="Vera Sans, DejaVu Sans, Liberation Sans, Arial, Helvetica, sans" + ] + "static" [label="mapped\npages|unmapped\npages|offset_guard\npages"] + +A heap appears as three consecutive ranges of address space: + +1. The *mapped pages* are the :term:`accessible` memory range in the heap. A + heap may have a minimum guaranteed size which means that some mapped pages + are always present. +2. The *unmapped pages* is a possibly empty range of address space that may be + mapped in the future when the heap is grown. They are :term:`addressable` but + not :term:`accessible`. +3. The *offset-guard pages* is a range of address space that is guaranteed to + always cause a trap when accessed. It is used to optimize bounds checking for + heap accesses with a shared base pointer. They are :term:`addressable` but + not :term:`accessible`. + +The *heap bound* is the total size of the mapped and unmapped pages. This is +the bound that `heap_addr` checks against. Memory accesses inside the +heap bounds can trap if they hit an unmapped page (which is not +:term:`accessible`). + +Two styles of heaps are supported, *static* and *dynamic*. They behave +differently when resized. + +Static heaps +~~~~~~~~~~~~ + +A *static heap* starts out with all the address space it will ever need, so it +never moves to a different address. At the base address is a number of mapped +pages corresponding to the heap's current size. Then follows a number of +unmapped pages where the heap can grow up to its maximum size. After the +unmapped pages follow the offset-guard pages which are also guaranteed to +generate a trap when accessed. + +H = static Base, min MinBytes, bound BoundBytes, offset_guard OffsetGuardBytes + Declare a static heap in the preamble. + + :arg Base: Global value holding the heap's base address. + :arg MinBytes: Guaranteed minimum heap size in bytes. Accesses below this + size will never trap. + :arg BoundBytes: Fixed heap bound in bytes. This defines the amount of + address space reserved for the heap, not including the offset-guard + pages. + :arg OffsetGuardBytes: Size of the offset-guard pages in bytes. + +Dynamic heaps +~~~~~~~~~~~~~ + +A *dynamic heap* can be relocated to a different base address when it is +resized, and its bound can move dynamically. The offset-guard pages move when +the heap is resized. The bound of a dynamic heap is stored in a global value. + +H = dynamic Base, min MinBytes, bound BoundGV, offset_guard OffsetGuardBytes + Declare a dynamic heap in the preamble. + + :arg Base: Global value holding the heap's base address. + :arg MinBytes: Guaranteed minimum heap size in bytes. Accesses below this + size will never trap. + :arg BoundGV: Global value containing the current heap bound in bytes. + :arg OffsetGuardBytes: Size of the offset-guard pages in bytes. + +Heap examples +~~~~~~~~~~~~~ + +The SpiderMonkey VM prefers to use fixed heaps with a 4 GB bound and 2 GB of +offset-guard pages when running WebAssembly code on 64-bit CPUs. The combination +of a 4 GB fixed bound and 1-byte bounds checks means that no code needs to be +generated for bounds checks at all: + +.. literalinclude:: heapex-sm64.clif + :language: clif + :lines: 2- + +A static heap can also be used for 32-bit code when the WebAssembly module +declares a small upper bound on its memory. A 1 MB static bound with a single 4 +KB offset-guard page still has opportunities for sharing bounds checking code: + +.. literalinclude:: heapex-sm32.clif + :language: clif + :lines: 2- + +If the upper bound on the heap size is too large, a dynamic heap is required +instead. + +Finally, a runtime environment that simply allocates a heap with +:c:func:`malloc()` may not have any offset-guard pages at all. In that case, +full bounds checking is required for each access: + +.. literalinclude:: heapex-dyn.clif + :language: clif + :lines: 2- + + +Tables +------ + +Code compiled from WebAssembly often needs access to objects outside of its +linear memory. WebAssembly uses *tables* to allow programs to refer to opaque +values through integer indices. + +A table is declared in the function preamble and can be accessed with the +`table_addr` instruction that :term:`traps` on out-of-bounds accesses. +Table addresses can be smaller than the native pointer size, for example +unsigned `i32` offsets on a 64-bit architecture. + +A table appears as a consecutive range of address space, conceptually +divided into elements of fixed sizes, which are identified by their index. +The memory is :term:`accessible`. + +The *table bound* is the number of elements currently in the table. This is +the bound that `table_addr` checks against. + +A table can be relocated to a different base address when it is resized, and +its bound can move dynamically. The bound of a table is stored in a global +value. + +T = dynamic Base, min MinElements, bound BoundGV, element_size ElementSize + Declare a table in the preamble. + + :arg Base: Global value holding the table's base address. + :arg MinElements: Guaranteed minimum table size in elements. + :arg BoundGV: Global value containing the current heap bound in elements. + :arg ElementSize: Size of each element. + +Constant materialization +------------------------ + +A few instructions have variants that take immediate operands, but in general +an instruction is required to load a constant into an SSA value: `iconst`, +`f32const`, `f64const` and `bconst` serve this purpose. + +Bitwise operations +------------------ + +The bitwise operations and operate on any value type: Integers, floating point +numbers, and booleans. When operating on integer or floating point types, the +bitwise operations are working on the binary representation of the values. When +operating on boolean values, the bitwise operations work as logical operators. + +The shift and rotate operations only work on integer types (scalar and vector). +The shift amount does not have to be the same type as the value being shifted. +Only the low `B` bits of the shift amount is significant. + +When operating on an integer vector type, the shift amount is still a scalar +type, and all the lanes are shifted the same amount. The shift amount is masked +to the number of bits in a *lane*, not the full size of the vector type. + +The bit-counting instructions are scalar only. + +Floating point operations +------------------------- + +These operations generally follow IEEE 754-2008 semantics. + +Sign bit manipulations +~~~~~~~~~~~~~~~~~~~~~~ + +The sign manipulating instructions work as bitwise operations, so they don't +have special behavior for signaling NaN operands. The exponent and trailing +significand bits are always preserved. + +Minimum and maximum +~~~~~~~~~~~~~~~~~~~ + +These instructions return the larger or smaller of their operands. Note that +unlike the IEEE 754-2008 `minNum` and `maxNum` operations, these instructions +return NaN when either input is NaN. + +When comparing zeroes, these instructions behave as if :math:`-0.0 < 0.0`. + +Rounding +~~~~~~~~ + +These instructions round their argument to a nearby integral value, still +represented as a floating point number. + +Conversion operations +--------------------- + +.. _extload-truncstore: + +Extending loads and truncating stores +------------------------------------- + +Most ISAs provide instructions that load an integer value smaller than a register +and extends it to the width of the register. Similarly, store instructions that +only write the low bits of an integer register are common. + +In addition to the normal `load` and `store` instructions, Cranelift +provides extending loads and truncation stores for 8, 16, and 32-bit memory +accesses. + +These instructions succeed, trap, or have undefined behavior, under the same +conditions as :ref:`normal loads and stores `. + +ISA-specific instructions +========================= + +Target ISAs can define supplemental instructions that do not make sense to +support generally. + +x86 +----- + +Instructions that can only be used by the x86 target ISA. + +Codegen implementation instructions +=================================== + +Frontends don't need to emit the instructions in this section themselves; +Cranelift will generate them automatically as needed. + +Legalization operations +----------------------- + +These instructions are used as helpers when legalizing types and operations for +the target ISA. + +Special register operations +--------------------------- + +The prologue and epilogue of a function needs to manipulate special registers like the stack +pointer and the frame pointer. These instructions should not be used in regular code. + +CPU flag operations +------------------- + +These operations are for working with the "flags" registers of some CPU +architectures. + +Live range splitting +-------------------- + +Cranelift's register allocator assigns each SSA value to a register or a spill +slot on the stack for its entire live range. Since the live range of an SSA +value can be quite large, it is sometimes beneficial to split the live range +into smaller parts. + +A live range is split by creating new SSA values that are copies or the +original value or each other. The copies are created by inserting `copy`, +`spill`, or `fill` instructions, depending on whether the values +are assigned to registers or stack slots. + +This approach permits SSA form to be preserved throughout the register +allocation pass and beyond. + +Register values can be temporarily diverted to other registers by the +`regmove` instruction, and to and from stack slots by `regspill` +and `regfill`. + +Instruction groups +================== + +All of the shared instructions are part of the `base` instruction +group. + +Target ISAs may define further instructions in their own instruction groups. + +Implementation limits +===================== + +Cranelift's intermediate representation imposes some limits on the size of +functions and the number of entities allowed. If these limits are exceeded, the +implementation will panic. + +Number of instructions in a function + At most :math:`2^{31} - 1`. + +Number of EBBs in a function + At most :math:`2^{31} - 1`. + + Every EBB needs at least a terminator instruction anyway. + +Number of secondary values in a function + At most :math:`2^{31} - 1`. + + Secondary values are any SSA values that are not the first result of an + instruction. + +Other entities declared in the preamble + At most :math:`2^{32} - 1`. + + This covers things like stack slots, jump tables, external functions, and + function signatures, etc. + +Number of arguments to an EBB + At most :math:`2^{16}`. + +Number of arguments to a function + At most :math:`2^{16}`. + + This follows from the limit on arguments to the entry EBB. Note that + Cranelift may add a handful of ABI register arguments as function signatures + are lowered. This is for representing things like the link register, the + incoming frame pointer, and callee-saved registers that are saved in the + prologue. + +Size of function call arguments on the stack + At most :math:`2^{32} - 1` bytes. + + This is probably not possible to achieve given the limit on the number of + arguments, except by requiring extremely large offsets for stack arguments. + +Glossary +======== + +.. glossary:: + + addressable + Memory in which loads and stores have defined behavior. They either + succeed or :term:`trap`, depending on whether the memory is + :term:`accessible`. + + accessible + :term:`Addressable` memory in which loads and stores always succeed + without :term:`trapping`, except where specified otherwise (eg. with the + `aligned` flag). Heaps, globals, tables, and the stack may contain + accessible, merely addressable, and outright unaddressable regions. + There may also be additional regions of addressable and/or accessible + memory not explicitly declared. + + basic block + A maximal sequence of instructions that can only be entered from the + top, and that contains no branch or terminator instructions except for + the last instruction. + + entry block + The :term:`EBB` that is executed first in a function. Currently, a + Cranelift function must have exactly one entry block which must be the + first block in the function. The types of the entry block arguments must + match the types of arguments in the function signature. + + extended basic block + EBB + A maximal sequence of instructions that can only be entered from the + top, and that contains no :term:`terminator instruction`\s except for + the last one. An EBB can contain conditional branches that can fall + through to the following instructions in the block, but only the first + instruction in the EBB can be a branch target. + + The last instruction in an EBB must be a :term:`terminator instruction`, + so execution cannot flow through to the next EBB in the function. (But + there may be a branch to the next EBB.) + + Note that some textbooks define an EBB as a maximal *subtree* in the + control flow graph where only the root can be a join node. This + definition is not equivalent to Cranelift EBBs. + + EBB parameter + A formal parameter for an EBB is an SSA value that dominates everything + in the EBB. For each parameter declared by an EBB, a corresponding + argument value must be passed when branching to the EBB. The function's + entry EBB has parameters that correspond to the function's parameters. + + EBB argument + Similar to function arguments, EBB arguments must be provided when + branching to an EBB that declares formal parameters. When execution + begins at the top of an EBB, the formal parameters have the values of + the arguments passed in the branch. + + function signature + A function signature describes how to call a function. It consists of: + + - The calling convention. + - The number of arguments and return values. (Functions can return + multiple values.) + - Type and flags of each argument. + - Type and flags of each return value. + + Not all function attributes are part of the signature. For example, a + function that never returns could be marked as ``noreturn``, but that + is not necessary to know when calling it, so it is just an attribute, + and not part of the signature. + + function preamble + A list of declarations of entities that are used by the function body. + Some of the entities that can be declared in the preamble are: + + - Stack slots. + - Functions that are called directly. + - Function signatures for indirect function calls. + - Function flags and attributes that are not part of the signature. + + function body + The extended basic blocks which contain all the executable code in a + function. The function body follows the function preamble. + + intermediate representation + IR + The language used to describe functions to Cranelift. This reference + describes the syntax and semantics of Cranelift IR. The IR has two + forms: Textual, and an in-memory data structure. + + stack slot + A fixed size memory allocation in the current function's activation + frame. These include :term:`explicit stack slot`\s and + :term:`spill stack slot`\s. + + explicit stack slot + A fixed size memory allocation in the current function's activation + frame. These differ from :term:`spill stack slot`\s in that they can + be created by frontends and they may have their addresses taken. + + spill stack slot + A fixed size memory allocation in the current function's activation + frame. These differ from :term:`explicit stack slot`\s in that they are + only created during register allocation, and they may not have their + address taken. + + terminator instruction + A control flow instruction that unconditionally directs the flow of + execution somewhere else. Execution never continues at the instruction + following a terminator instruction. + + The basic terminator instructions are `br`, `return`, and + `trap`. Conditional branches and instructions that trap + conditionally are not terminator instructions. + + trap + traps + trapping + Terminates execution of the current thread. The specific behavior after + a trap depends on the underlying OS. For example, a common behavior is + delivery of a signal, with the specific signal depending on the event + that triggered it. diff --git a/cranelift/docs/langref.rst b/cranelift/docs/langref.rst new file mode 100644 index 0000000000..904584ab13 --- /dev/null +++ b/cranelift/docs/langref.rst @@ -0,0 +1,8 @@ +:orphan: + +**************** +Redirection Page +**************** + +Cranelift's IR is documented in :doc:`ir`. Please update links to point to +this new page. diff --git a/cranelift/docs/make.bat b/cranelift/docs/make.bat new file mode 100644 index 0000000000..1b9759a7b5 --- /dev/null +++ b/cranelift/docs/make.bat @@ -0,0 +1,36 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=cranelift + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/cranelift/docs/meta.rst b/cranelift/docs/meta.rst new file mode 100644 index 0000000000..5bc000a800 --- /dev/null +++ b/cranelift/docs/meta.rst @@ -0,0 +1,386 @@ +********************************* +Cranelift Meta Language Reference +********************************* + +.. default-domain:: py +.. highlight:: python + +The Cranelift meta language is used to define instructions for Cranelift. It is a +domain specific language embedded in Rust. + +.. todo:: Point to the Rust documentation of the meta crate here. + + This document is very out-of-date. Instead, you can have a look at the + work-in-progress documentation of the `meta` crate there: + https://docs.rs/cranelift-codegen-meta/0.34.0/cranelift_codegen_meta/. + +This document describes the Python modules that form the embedded DSL. + +The meta language descriptions are Python modules under the +`cranelift-codegen/meta-python` directory. The descriptions are processed in two +steps: + +1. The Python modules are imported. This has the effect of building static data + structures in global values in the modules. These static data structures + in the `base` and `isa` packages use the classes in the + `cdsl` package to describe instruction sets and other properties. + +2. The static data structures are processed to produce Rust source code and + constant tables. + +The main driver for this source code generation process is the +`cranelift-codegen/meta-python/build.py` script which is invoked as part of the build +process if anything in the `cranelift-codegen/meta-python` directory has changed +since the last build. + +Settings +======== + +Settings are used by the environment embedding Cranelift to control the details +of code generation. Each setting is defined in the meta language so a compact +and consistent Rust representation can be generated. Shared settings are defined +in the `base.settings` module. Some settings are specific to a target ISA, +and defined in a `settings.py` module under the appropriate +`cranelift-codegen/meta-python/isa/*` directory. + +Settings can take boolean on/off values, small numbers, or explicitly enumerated +symbolic values. + +All settings must belong to a *group*, represented by a :class:`SettingGroup` object. + +Normally, a setting group corresponds to all settings defined in a module. Such +a module looks like this:: + + group = SettingGroup('example') + + foo = BoolSetting('use the foo') + bar = BoolSetting('enable bars', True) + opt = EnumSetting('optimization level', 'Debug', 'Release') + + group.close(globals()) + +Instruction descriptions +======================== + +New instructions are defined as instances of the :class:`Instruction` +class. As instruction instances are created, they are added to the currently +open :class:`InstructionGroup`. + +The basic Cranelift instruction set described in :doc:`ir` is defined by the +Python module `base.instructions`. This module has a global value +`base.instructions.GROUP` which is an :class:`InstructionGroup` instance +containing all the base instructions. + +An instruction is defined with a set of distinct input and output operands which +must be instances of the :class:`Operand` class. + +Cranelift uses two separate type systems for operand kinds and SSA values. + +Type variables +-------------- + +Instruction descriptions can be made polymorphic by using +:class:`cdsl.operands.Operand` instances that refer to a *type variable* +instead of a concrete value type. Polymorphism only works for SSA value +operands. Other operands have a fixed operand kind. + +If multiple operands refer to the same type variable they will be required to +have the same concrete type. For example, this defines an integer addition +instruction:: + + Int = TypeVar('Int', 'A scalar or vector integer type', ints=True, simd=True) + a = Operand('a', Int) + x = Operand('x', Int) + y = Operand('y', Int) + + iadd = Instruction('iadd', 'Integer addition', ins=(x, y), outs=a) + +The type variable `Int` is allowed to vary over all scalar and vector integer +value types, but in a given instance of the `iadd` instruction, the two +operands must have the same type, and the result will be the same type as the +inputs. + +There are some practical restrictions on the use of type variables, see +:ref:`restricted-polymorphism`. + +Immediate operands +------------------ + +Immediate instruction operands don't correspond to SSA values, but have values +that are encoded directly in the instruction. Immediate operands don't +have types from the :class:`cdsl.types.ValueType` type system; they often have +enumerated values of a specific type. The type of an immediate operand is +indicated with an instance of :class:`ImmediateKind`. + +Entity references +----------------- + +Instruction operands can also refer to other entities in the same function. This +can be extended basic blocks, or entities declared in the function preamble. + +Value types +----------- + +Concrete value types are represented as instances of :class:`ValueType`. There +are subclasses to represent scalar and vector types. + +There are no predefined vector types, but they can be created as needed with +the :func:`LaneType.by` function. + +Instruction representation +========================== + +The Rust in-memory representation of instructions is derived from the +instruction descriptions. Part of the representation is generated, and part is +written as Rust code in the ``cranelift.instructions`` module. The instruction +representation depends on the input operand kinds and whether the instruction +can produce multiple results. + +Since all SSA value operands are represented as a `Value` in Rust code, value +types don't affect the representation. + +When an instruction description is created, it is automatically assigned a +predefined instruction format which is an instance of +:class:`InstructionFormat`. + +.. _restricted-polymorphism: + +Restricted polymorphism +----------------------- + +The instruction format strictly controls the kinds of operands on an +instruction, but it does not constrain value types at all. A given instruction +description typically does constrain the allowed value types for its value +operands. The type variables give a lot of freedom in describing the value type +constraints, in practice more freedom than what is needed for normal instruction +set architectures. In order to simplify the Rust representation of value type +constraints, some restrictions are imposed on the use of type variables. + +A polymorphic instruction has a single *controlling type variable*. For a given +opcode, this type variable must be the type of the first result or the type of +the input value operand designated by the `typevar_operand` argument to the +:py:class:`InstructionFormat` constructor. By default, this is the first value +operand, which works most of the time. + +The value types of instruction results must be one of the following: + +1. A concrete value type. +2. The controlling type variable. +3. A type variable derived from the controlling type variable. + +This means that all result types can be computed from the controlling type +variable. + +Input values to the instruction are allowed a bit more freedom. Input value +types must be one of: + +1. A concrete value type. +2. The controlling type variable. +3. A type variable derived from the controlling type variable. +4. A free type variable that is not used by any other operands. + +This means that the type of an input operand can either be computed from the +controlling type variable, or it can vary independently of the other operands. + + +Encodings +========= + +Encodings describe how Cranelift instructions are mapped to binary machine code +for the target architecture. After the legalization pass, all remaining +instructions are expected to map 1-1 to native instruction encodings. Cranelift +instructions that can't be encoded for the current architecture are called +:term:`illegal instruction`\s. + +Some instruction set architectures have different :term:`CPU mode`\s with +incompatible encodings. For example, a modern ARMv8 CPU might support three +different CPU modes: *A64* where instructions are encoded in 32 bits, *A32* +where all instructions are 32 bits, and *T32* which has a mix of 16-bit and +32-bit instruction encodings. These are incompatible encoding spaces, and while +an `iadd` instruction can be encoded in 32 bits in each of them, it's +not the same 32 bits. It's a judgement call if CPU modes should be modelled as +separate targets, or as sub-modes of the same target. In the ARMv8 case, the +different register banks means that it makes sense to model A64 as a separate +target architecture, while A32 and T32 are CPU modes of the 32-bit ARM target. + +In a given CPU mode, there may be multiple valid encodings of the same +instruction. Both RISC-V and ARMv8's T32 mode have 32-bit encodings of all +instructions with 16-bit encodings available for some opcodes if certain +constraints are satisfied. + +Encodings are guarded by :term:`sub-target predicate`\s. For example, the RISC-V +"C" extension which specifies the compressed encodings may not be supported, and +a predicate would be used to disable all of the 16-bit encodings in that case. +This can also affect whether an instruction is legal. For example, x86 has a +predicate that controls the SSE 4.1 instruction encodings. When that predicate +is false, the SSE 4.1 instructions are not available. + +Encodings also have a :term:`instruction predicate` which depends on the +specific values of the instruction's immediate fields. This is used to ensure +that immediate address offsets are within range, for example. The instructions +in the base Cranelift instruction set can often represent a wider range of +immediates than any specific encoding. The fixed-size RISC-style encodings tend +to have more range limitations than CISC-style variable length encodings like +x86. + +The diagram below shows the relationship between the classes involved in +specifying instruction encodings: + +.. digraph:: encoding + + node [shape=record] + EncRecipe -> SubtargetPred + EncRecipe -> InstrFormat + EncRecipe -> InstrPred + Encoding [label="{Encoding|Opcode+TypeVars}"] + Encoding -> EncRecipe [label="+EncBits"] + Encoding -> CPUMode + Encoding -> SubtargetPred + Encoding -> InstrPred + Encoding -> Opcode + Opcode -> InstrFormat + CPUMode -> Target + +An :py:class:`Encoding` instance specifies the encoding of a concrete +instruction. The following properties are used to select instructions to be +encoded: + +- An opcode, i.e. `iadd_imm`, that must match the instruction's + opcode. +- Values for any type variables if the opcode represents a polymorphic + instruction. +- An :term:`instruction predicate` that must be satisfied by the instruction's + immediate operands. +- The CPU mode that must be active. +- A :term:`sub-target predicate` that must be satisfied by the currently active + sub-target. + +An encoding specifies an *encoding recipe* along with some *encoding bits* that +the recipe can use for native opcode fields etc. The encoding recipe has +additional constraints that must be satisfied: + +- An :py:class:`InstructionFormat` that must match the format required by the + opcodes of any encodings that use this recipe. +- An additional :term:`instruction predicate`. +- An additional :term:`sub-target predicate`. + +The additional predicates in the :py:class:`EncRecipe` are merged with the +per-encoding predicates when generating the encoding matcher code. Often +encodings only need the recipe predicates. + +Register constraints +==================== + +After an encoding recipe has been chosen for an instruction, it is the register +allocator's job to make sure that the recipe's :term:`Register constraint`\s +are satisfied. Most ISAs have separate integer and floating point registers, +and instructions can usually only use registers from one of the banks. Some +instruction encodings are even more constrained and can only use a subset of +the registers in a bank. These constraints are expressed in terms of register +classes. + +Sometimes the result of an instruction is placed in a register that must be the +same as one of the input registers. Some instructions even use a fixed register +for inputs or results. + +Each encoding recipe specifies separate constraints for its value operands and +result. These constraints are separate from the instruction predicate which can +only evaluate the instruction's immediate operands. + +Register class constraints +-------------------------- + +The most common type of register constraint is the register class. It specifies +that an operand or result must be allocated one of the registers from the given +register class:: + + IntRegs = RegBank('IntRegs', ISA, 'General purpose registers', units=16, prefix='r') + GPR = RegClass(IntRegs) + R = EncRecipe('R', Binary, ins=(GPR, GPR), outs=GPR) + +This defines an encoding recipe for the ``Binary`` instruction format where +both input operands must be allocated from the ``GPR`` register class. + +Tied register operands +---------------------- + +In more compact machine code encodings, it is common to require that the result +register is the same as one of the inputs. This is represented with tied +operands:: + + CR = EncRecipe('CR', Binary, ins=(GPR, GPR), outs=0) + +This indicates that the result value must be allocated to the same register as +the first input value. Tied operand constraints can only be used for result +values, so the number always refers to one of the input values. + +Fixed register operands +----------------------- + +Some instructions use hard-coded input and output registers for some value +operands. An example is the ``pblendvb`` x86 SSE instruction which takes one +of its three value operands in the hard-coded ``%xmm0`` register:: + + XMM0 = FPR[0] + SSE66_XMM0 = EncRecipe('SSE66_XMM0', Ternary, ins=(FPR, FPR, XMM0), outs=0) + +The syntax ``FPR[0]`` selects the first register from the ``FPR`` register +class which consists of all the XMM registers. + +Stack operands +-------------- + +Cranelift's register allocator can assign an SSA value to a stack slot if there +isn't enough registers. It will insert `spill` and `fill` +instructions as needed to satisfy instruction operand constraints, but it is +also possible to have instructions that can access stack slots directly:: + + CSS = EncRecipe('CSS', Unary, ins=GPR, outs=Stack(GPR)) + +An output stack value implies a store to the stack, an input value implies a +load. + +Targets +======= + +Cranelift can be compiled with support for multiple target instruction set +architectures. Each ISA is represented by a :py:class:`cdsl.isa.TargetISA` instance. + +The definitions for each supported target live in a package under +`cranelift-codegen/meta-python/isa`. + +Glossary +======== + +.. glossary:: + + Illegal instruction + An instruction is considered illegal if there is no encoding available + for the current CPU mode. The legality of an instruction depends on the + value of :term:`sub-target predicate`\s, so it can't always be + determined ahead of time. + + CPU mode + Every target defines one or more CPU modes that determine how the CPU + decodes binary instructions. Some CPUs can switch modes dynamically with + a branch instruction (like ARM/Thumb), while other modes are + process-wide (like x86 32/64-bit). + + Sub-target predicate + A predicate that depends on the current sub-target configuration. + Examples are "Use SSE 4.1 instructions", "Use RISC-V compressed + encodings". Sub-target predicates can depend on both detected CPU + features and configuration settings. + + Instruction predicate + A predicate that depends on the immediate fields of an instruction. An + example is "the load address offset must be a 10-bit signed integer". + Instruction predicates do not depend on the registers selected for value + operands. + + Register constraint + Value operands and results correspond to machine registers. Encodings may + constrain operands to either a fixed register or a register class. There + may also be register constraints between operands, for example some + encodings require that the result register is one of the input + registers. diff --git a/cranelift/docs/regalloc.rst b/cranelift/docs/regalloc.rst new file mode 100644 index 0000000000..da227586a1 --- /dev/null +++ b/cranelift/docs/regalloc.rst @@ -0,0 +1,332 @@ +******************************** +Register Allocation in Cranelift +******************************** + +.. default-domain:: clif +.. highlight:: clif + +Cranelift uses a *decoupled, SSA-based* register allocator. Decoupled means that +register allocation is split into two primary phases: *spilling* and +*coloring*. SSA-based means that the code stays in SSA form throughout the +register allocator, and in fact is still in SSA form after register allocation. + +Before the register allocator is run, all instructions in the function must be +*legalized*, which means that every instruction has an entry in the +``encodings`` table. The encoding entries also provide register class +constraints on the instruction's operands that the register allocator must +satisfy. + +After the register allocator has run, the ``locations`` table provides a +register or stack slot location for all SSA values used by the function. The +register allocator may have inserted :inst:`spill`, :inst:`fill`, and +:inst:`copy` instructions to make that possible. + +SSA-based register allocation +============================= + +The phases of the SSA-based register allocator are: + +Liveness analysis + For each SSA value, determine exactly where it is live. + +Coalescing + Form *virtual registers* which are sets of SSA values that should be + assigned to the same location. Split live ranges such that values that + belong to the same virtual register don't have interfering live ranges. + +Spilling + The process of deciding which SSA values go in a stack slot and which + values go in a register. The spilling phase can also split live ranges by + inserting :inst:`copy` instructions, or transform the code in other ways to + reduce the number of values kept in registers. + + After spilling, the number of live register values never exceeds the number + of available registers. + +Reload + Insert :inst:`spill` and :inst:`fill` instructions as necessary such that + instructions that expect their operands in registers won't see values that + live on the stack and vice versa. + + Reuse registers containing values loaded from the stack as much as possible + without exceeding the maximum allowed register pressure. + +Coloring + The process of assigning specific registers to the live values. It's a + property of SSA form that this can be done in a linear scan of the + dominator tree without causing any additional spills. + + Make sure that specific register operand constraints are satisfied. + +The contract between the spilling and coloring phases is that the number of +values in registers never exceeds the number of available registers. This +sounds simple enough in theory, but in practice there are some complications. + +Real-world complications to SSA coloring +---------------------------------------- + +In practice, instruction set architectures don't have "K interchangeable +registers", and register pressure can't be measured with a single number. There +are complications: + +Different register banks + Most ISAs separate integer registers from floating point registers, and + instructions require their operands to come from a specific bank. This is a + fairly simple problem to deal with since the register banks are completely + disjoint. We simply count the number of integer and floating-point values + that are live independently, and make sure that each number does not exceed + the size of their respective register banks. + +Instructions with fixed operands + Some instructions use a fixed register for an operand. This happens on the + x86 ISAs: + + - Dynamic shift and rotate instructions take the shift amount in CL. + - Division instructions use RAX and RDX for both input and output operands. + - Wide multiply instructions use fixed RAX and RDX registers for input and + output operands. + - A few SSE variable blend instructions use a hardwired XMM0 input operand. + +Operands constrained to register subclasses + Some instructions can only use a subset of the registers for some operands. + For example, the ARM NEON vmla (scalar) instruction requires the scalar + operand to be located in D0-15 or even D0-7, depending on the data type. + The other operands can be from the full D0-31 register set. + +ABI boundaries + Before making a function call, arguments must be placed in specific + registers and stack locations determined by the ABI, and return values + appear in fixed registers. + + Some registers can be clobbered by the call and some are saved by the + callee. In some cases, only the low bits of a register are saved by the + callee. For example, ARM64 callees save only the low 64 bits of v8-15, and + Win64 callees only save the low 128 bits of AVX registers. + + ABI boundaries also affect the location of arguments to the entry block and + return values passed to the :inst:`return` instruction. + +Aliasing registers + Different registers sometimes share the same bits in the register bank. + This can make it difficult to measure register pressure. For example, the + x86 registers RAX, EAX, AX, AL, and AH overlap. + + If only one of the aliasing registers can be used at a time, the aliasing + doesn't cause problems since the registers can simply be counted as one + unit. + +Early clobbers + Sometimes an instruction requires that the register used for an output + operand does not alias any of the input operands. This happens for inline + assembly and in some other special cases. + + +Liveness Analysis +================= + +All the register allocator passes need to know exactly where SSA values are +live. The liveness analysis computes this information. + +The data structure representing the live range of a value uses the linear +layout of the function. All instructions and EBB headers are assigned a +*program position*. A starting point for a live range can be one of the +following: + +- The instruction where the value is defined. +- The EBB header where the value is an EBB parameter. +- An EBB header where the value is live-in because it was defined in a + dominating block. + +The ending point of a live range can be: + +- The last instruction to use the value. +- A branch or jump to an EBB where the value is live-in. + +When all the EBBs in a function are laid out linearly, the live range of a +value doesn't have to be a contiguous interval, although it will be in a +majority of cases. There can be holes in the linear live range. + +The part of a value's live range that falls inside a single EBB will always be +an interval without any holes. This follows from the dominance requirements of +SSA. A live range is represented as: + +- The interval inside the EBB where the value is defined. +- A set of intervals for EBBs where the value is live-in. + +Any value that is only used inside a single EBB will have an empty set of +live-in intervals. Some values are live across large parts of the function, and +this can often be represented with coalesced live-in intervals covering many +EBBs. It is important that the live range data structure doesn't have to grow +linearly with the number of EBBs covered by a live range. + +This representation is very similar to LLVM's ``LiveInterval`` data structure +with a few important differences: + +- The Cranelift ``LiveRange`` only covers a single SSA value, while LLVM's + ``LiveInterval`` represents the union of multiple related SSA values in a + virtual register. This makes Cranelift's representation smaller because + individual segments don't have to annotated with a value number. +- Cranelift stores the def-interval separately from a list of coalesced live-in + intervals, while LLVM stores an array of segments. The two representations + are equivalent, but Cranelift optimizes for the common case of a value that is + only used locally. +- It is simpler to check if two live ranges are overlapping. The dominance + properties of SSA form means that it is only necessary to check the + def-interval of each live range against the intervals of the other range. It + is not necessary to check for overlap between the two sets of live-in + intervals. This makes the overlap check logarithmic in the number of live-in + intervals instead of linear. +- LLVM represents a program point as ``SlotIndex`` which holds a pointer to a + 32-byte ``IndexListEntry`` struct. The entries are organized in a double + linked list that mirrors the ordering of instructions in a basic block. This + allows 'tombstone' program points corresponding to instructions that have + been deleted. + + Cranelift uses a 32-bit program point representation that encodes an + instruction or EBB number directly. There are no 'tombstones' for deleted + instructions, and no mirrored linked list of instructions. Live ranges must + be updated when instructions are deleted. + +A consequence of Cranelift's more compact representation is that two program +points can't be compared without the context of a function layout. + +Coalescing algorithm +==================== + +Unconstrained SSA form is not well suited to register allocation because of the problems +that can arise around EBB parameters and arguments. Consider this simple example:: + + function %interference(i32, i32) -> i32 { + ebb0(v0: i32, v1: i32): + brz v0, ebb1(v1) + jump ebb1(v0) + + ebb1(v2: i32): + v3 = iadd v1, v2 + return v3 + } + +Here, the value ``v1`` is both passed as an argument to ``ebb1`` *and* it is +live in to the EBB because it is used by the :inst:`iadd` instruction. Since +EBB arguments on the :inst:`brz` instruction need to be in the same register as +the corresponding EBB parameter ``v2``, there is going to be interference +between ``v1`` and ``v2`` in the ``ebb1`` block. + +The interference can be resolved by isolating the SSA values passed as EBB arguments:: + + function %coalesced(i32, i32) -> i32 { + ebb0(v0: i32, v1: i32): + v5 = copy v1 + brz v0, ebb1(v5) + v6 = copy v0 + jump ebb1(v6) + + ebb1(v2: i32): + v3 = iadd.i32 v1, v2 + return v3 + } + +Now the EBB argument is ``v5`` which is *not* itself live into ``ebb1``, +resolving the interference. + +The coalescing pass groups the SSA values into sets called *virtual registers* +and inserts copies such that: + +1. Whenever a value is passed as an EBB argument, the corresponding EBB + parameter value belongs to the same virtual register as the passed argument + value. +2. The live ranges of values belonging to the same virtual register do not + interfere, i.e. they don't overlap anywhere. + +Most virtual registers contains only a single isolated SSA value because most +SSA values are never passed as EBB arguments. The ``VirtRegs`` data structure +doesn't store any information about these singleton virtual registers, it only +tracks larger virtual registers and assumes that any value it doesn't know about +is its own singleton virtual register + +Once the values have been partitioned into interference-free virtual registers, +the code is said to be in `conventional SSA form (CSSA) +`_. A program +in CSSA form can be register allocated correctly by assigning all the values in +a virtual register to the same stack or register location. + +Conventional SSA form and the virtual registers are maintained through all the +register allocator passes. + + +Spilling algorithm +================== + +The spilling pass is responsible for lowering the register pressure enough that +the coloring pass is guaranteed to be able to find a coloring solution. It does +this by assigning whole virtual registers to stack slots. + +Besides just counting registers, the spiller also has to look at the +instruction's operand constraints because sometimes the constraints can require +extra registers to solve, raising the register pressure: + +- If a single value is used more than once by an instruction, and the operands + have conflicting constraints, two registers must be used. The most common case is + when a single value is passed as two separate arguments to a function call. +- If an instruction has a *tied operand constraint* where one of the input operands + must use the same register as the output operand, the spiller makes sure that + the tied input value doesn't interfere with the output value by inserting a copy + if needed. + +The spilling heuristic used by Cranelift is very simple. Whenever the spiller +determines that the register pressure is too high at some instruction, it picks +the live SSA value whose definition is farthest away as the spill candidate. +Then it spills all values in the corresponding virtual register to the same +spill slot. It is important that all values in a virtual register get the same +spill slot, otherwise we could need memory-to-memory copies when passing spilled +arguments to a spilled EBB parameter. + +This simple heuristic tends to spill values with long live ranges, and it +depends on the reload pass to do a good job of reusing registers reloaded from +spill slots if the spilled value gets used a lot. The idea is to minimize stack +*write* traffic with the spilling heuristic and to minimize stack *read* traffic +with the reload pass. + +Coloring algorithm +================== + +The SSA coloring algorithm is based on a single observation: If two SSA values +interfere, one of the values must be live where the other value is defined. + +We visit the EBBs in a topological order such that all dominating EBBs are +visited before the current EBB. The instructions in an EBB are visited in a +top-down order, and each value define by the instruction is assigned an +available register. With this iteration order, every value that is live at an +instruction has already been assigned to a register. + +This coloring algorithm works if the following condition holds: + + At every instruction, consider the values live through the instruction. No + matter how the live values have been assigned to registers, there must be + available registers of the right register classes available for the values + defined by the instruction. + +We'll need to modify this condition in order to deal with the real-world +complications. + +The coloring algorithm needs to keep track of the set of live values at each +instruction. At the top of an EBB, this set can be computed as the union of: + +- The set of live values before the immediately dominating branch or jump + instruction. The topological iteration order guarantees that this set is + available. Values whose live range indicate that they are not live-in to the + current EBB should be filtered out. +- The set of parameters the EBB. These values should all be live-in, although + it is possible that some are dead and never used anywhere. + +For each live value, we also track its kill point in the current EBB. This is +the last instruction to use the value in the EBB. Values that are live-out +through the EBB terminator don't have a kill point. Note that the kill point +can be a branch to another EBB that uses the value, so the kill instruction +doesn't have to be a use of the value. + +When advancing past an instruction, the live set is updated: + +- Any values whose kill point is the current instruction are removed. +- Any values defined by the instruction are added, unless their kill point is + the current instruction. This corresponds to a dead def which has no uses. diff --git a/cranelift/docs/testing.rst b/cranelift/docs/testing.rst new file mode 100644 index 0000000000..b0d3361dc8 --- /dev/null +++ b/cranelift/docs/testing.rst @@ -0,0 +1,399 @@ +***************** +Testing Cranelift +***************** + +Cranelift is tested at multiple levels of abstraction and integration. When +possible, Rust unit tests are used to verify single functions and types. When +testing the interaction between compiler passes, file-level tests are +appropriate. + +The top-level shell script :file:`test-all.sh` runs all of the tests in the +Cranelift repository. + +Rust tests +========== + +.. highlight:: rust + +Rust and Cargo have good support for testing. Cranelift uses unit tests, doc +tests, and integration tests where appropriate. + +Unit tests +---------- + +Unit test live in a ``tests`` sub-module of the code they are testing:: + + pub fn add(x: u32, y: u32) -> u32 { + x + y + } + + #[cfg(test)] + mod tests { + use super::add; + + #[test] + check_add() { + assert_eq!(add(2, 2), 4); + } + } + +Since sub-modules have access to non-public items in a Rust module, unit tests +can be used to test module-internal functions and types too. + +Doc tests +--------- + +Documentation comments can contain code snippets which are also compiled and +tested:: + + //! The `Flags` struct is immutable once it has been created. A `Builder` instance is used to + //! create it. + //! + //! # Example + //! ``` + //! use cranelift_codegen::settings::{self, Configurable}; + //! + //! let mut b = settings::builder(); + //! b.set("opt_level", "fastest"); + //! + //! let f = settings::Flags::new(&b); + //! assert_eq!(f.opt_level(), settings::OptLevel::Fastest); + //! ``` + +These tests are useful for demonstrating how to use an API, and running them +regularly makes sure that they stay up to date. Documentation tests are not +appropriate for lots of assertions; use unit tests for that. + +Integration tests +----------------- + +Integration tests are Rust source files that are compiled and linked +individually. They are used to exercise the external API of the crates under +test. + +These tests are usually found in the :file:`tests` top-level directory where +they have access to all the crates in the Cranelift repository. The +:file:`cranelift-codegen` and :file:`cranelift-reader` crates have no external +dependencies, which can make testing tedious. Integration tests that don't need +to depend on other crates can be placed in :file:`cranelift-codegen/tests` and +:file:`cranelift-reader/tests`. + +File tests +========== + +.. highlight:: clif + +Compilers work with large data structures representing programs, and it quickly +gets unwieldy to generate test data programmatically. File-level tests make it +easier to provide substantial input functions for the compiler tests. + +File tests are :file:`*.clif` files in the :file:`filetests/` directory +hierarchy. Each file has a header describing what to test followed by a number +of input functions in the :doc:`Cranelift textual intermediate representation +`: + +.. productionlist:: + test_file : test_header `function_list` + test_header : test_commands (`isa_specs` | `settings`) + test_commands : test_command { test_command } + test_command : "test" test_name { option } "\n" + +The available test commands are described below. + +Many test commands only make sense in the context of a target instruction set +architecture. These tests require one or more ISA specifications in the test +header: + +.. productionlist:: + isa_specs : { [`settings`] isa_spec } + isa_spec : "isa" isa_name { `option` } "\n" + +The options given on the ``isa`` line modify the ISA-specific settings defined in +:file:`cranelift-codegen/meta-python/isa/*/settings.py`. + +All types of tests allow shared Cranelift settings to be modified: + +.. productionlist:: + settings : { setting } + setting : "set" { option } "\n" + option : flag | setting "=" value + +The shared settings available for all target ISAs are defined in +:file:`cranelift-codegen/meta-python/base/settings.py`. + +The ``set`` lines apply settings cumulatively:: + + test legalizer + set opt_level=best + set is_pic=1 + isa riscv64 + set is_pic=0 + isa riscv32 supports_m=false + + function %foo() {} + +This example will run the legalizer test twice. Both runs will have +``opt_level=best``, but they will have different ``is_pic`` settings. The 32-bit +run will also have the RISC-V specific flag ``supports_m`` disabled. + +The filetests are run automatically as part of `cargo test`, and they can +also be run manually with the `clif-util test` command. + +Filecheck +--------- + +Many of the test commands described below use *filecheck* to verify their +output. Filecheck is a Rust implementation of the LLVM tool of the same name. +See the `documentation `_ for details of its syntax. + +Comments in :file:`.clif` files are associated with the entity they follow. +This typically means an instruction or the whole function. Those tests that +use filecheck will extract comments associated with each function (or its +entities) and scan them for filecheck directives. The test output for each +function is then matched against the filecheck directives for that function. + +Comments appearing before the first function in a file apply to every function. +This is useful for defining common regular expression variables with the +``regex:`` directive, for example. + +Note that LLVM's file tests don't separate filecheck directives by their +associated function. It verifies the concatenated output against all filecheck +directives in the test file. LLVM's :command:`FileCheck` command has a +``CHECK-LABEL:`` directive to help separate the output from different functions. +Cranelift's tests don't need this. + +`test cat` +---------- + +This is one of the simplest file tests, used for testing the conversion to and +from textual IR. The ``test cat`` command simply parses each function and +converts it back to text again. The text of each function is then matched +against the associated filecheck directives. + +Example:: + + function %r1() -> i32, f32 { + ebb1: + v10 = iconst.i32 3 + v20 = f32const 0.0 + return v10, v20 + } + ; sameln: function %r1() -> i32, f32 { + ; nextln: ebb0: + ; nextln: v10 = iconst.i32 3 + ; nextln: v20 = f32const 0.0 + ; nextln: return v10, v20 + ; nextln: } + +`test verifier` +--------------- + +Run each function through the IR verifier and check that it produces the +expected error messages. + +Expected error messages are indicated with an ``error:`` directive *on the +instruction that produces the verifier error*. Both the error message and +reported location of the error is verified:: + + test verifier + + function %test(i32) { + ebb0(v0: i32): + jump ebb1 ; error: terminator + return + } + +This example test passes if the verifier fails with an error message containing +the sub-string ``"terminator"`` *and* the error is reported for the ``jump`` +instruction. + +If a function contains no ``error:`` annotations, the test passes if the +function verifies correctly. + +`test print-cfg` +---------------- + +Print the control flow graph of each function as a Graphviz graph, and run +filecheck over the result. See also the :command:`clif-util print-cfg` +command:: + + ; For testing cfg generation. This code is nonsense. + test print-cfg + test verifier + + function %nonsense(i32, i32) -> f32 { + ; check: digraph %nonsense { + ; regex: I=\binst\d+\b + ; check: label="{ebb0 | <$(BRZ=$I)>brz ebb2 | <$(JUMP=$I)>jump ebb1}"] + + ebb0(v0: i32, v1: i32): + brz v1, ebb2 ; unordered: ebb0:$BRZ -> ebb2 + v2 = iconst.i32 0 + jump ebb1(v2) ; unordered: ebb0:$JUMP -> ebb1 + + ebb1(v5: i32): + return v0 + + ebb2: + v100 = f32const 0.0 + return v100 + } + +`test domtree` +-------------- + +Compute the dominator tree of each function and validate it against the +``dominates:`` annotations:: + + test domtree + + function %test(i32) { + ebb0(v0: i32): + jump ebb1 ; dominates: ebb1 + ebb1: + brz v0, ebb3 ; dominates: ebb3 + jump ebb2 ; dominates: ebb2 + ebb2: + jump ebb3 + ebb3: + return + } + +Every reachable extended basic block except for the entry block has an +*immediate dominator* which is a jump or branch instruction. This test passes +if the ``dominates:`` annotations on the immediate dominator instructions are +both correct and complete. + +This test also sends the computed CFG post-order through filecheck. + +`test legalizer` +---------------- + +Legalize each function for the specified target ISA and run the resulting +function through filecheck. This test command can be used to validate the +encodings selected for legal instructions as well as the instruction +transformations performed by the legalizer. + +`test regalloc` +--------------- + +Test the register allocator. + +First, each function is legalized for the specified target ISA. This is +required for register allocation since the instruction encodings provide +register class constraints to the register allocator. + +Second, the register allocator is run on the function, inserting spill code and +assigning registers and stack slots to all values. + +The resulting function is then run through filecheck. + +`test binemit` +-------------- + +Test the emission of binary machine code. + +The functions must contains instructions that are annotated with both encodings +and value locations (registers or stack slots). For instructions that are +annotated with a `bin:` directive, the emitted hexadecimal machine code for +that instruction is compared to the directive:: + + test binemit + isa riscv + + function %int32() { + ebb0: + [-,%x5] v0 = iconst.i32 1 + [-,%x6] v1 = iconst.i32 2 + [R#0c,%x7] v10 = iadd v0, v1 ; bin: 006283b3 + [R#200c,%x8] v11 = isub v0, v1 ; bin: 40628433 + return + } + +If any instructions are unencoded (indicated with a `[-]` encoding field), they +will be encoded using the same mechanism as the legalizer uses. However, +illegal instructions for the ISA won't be expanded into other instruction +sequences. Instead the test will fail. + +Value locations must be present if they are required to compute the binary +bits. Missing value locations will cause the test to crash. + +`test simple-gvn` +----------------- + +Test the simple GVN pass. + +The simple GVN pass is run on each function, and then results are run +through filecheck. + +`test licm` +----------------- + +Test the LICM pass. + +The LICM pass is run on each function, and then results are run +through filecheck. + +`test dce` +----------------- + +Test the DCE pass. + +The DCE pass is run on each function, and then results are run +through filecheck. + +`test shrink` +----------------- + +Test the instruction shrinking pass. + +The shrink pass is run on each function, and then results are run +through filecheck. + +`test preopt` +----------------- + +Test the preopt pass. + +The preopt pass is run on each function, and then results are run +through filecheck. + +`test postopt` +----------------- + +Test the postopt pass. + +The postopt pass is run on each function, and then results are run +through filecheck. + +`test compile` +-------------- + +Test the whole code generation pipeline. + +Each function is passed through the full ``Context::compile()`` function +which is normally used to compile code. This type of test often depends +on assertions or verifier errors, but it is also possible to use +filecheck directives which will be matched against the final form of the +Cranelift IR right before binary machine code emission. + +`test run` +---------- + +Compile and execute a function. + +Add a ``; run`` directive after each function that should be executed. These +functions must have the signature ``() -> bNN`` where ``bNN`` is some sort of +boolean, e.g. ``b1`` or ``b32``. A ``true`` value is interpreted as a successful +test execution, whereas a ``false`` value is interpreted as a failed test. + +Example:: + + test run + + function %trivial_test() -> b1 { + ebb0: + v0 = bconst.b1 true + return v0 + } + ; run diff --git a/cranelift/entity/Cargo.toml b/cranelift/entity/Cargo.toml new file mode 100644 index 0000000000..a0c5706803 --- /dev/null +++ b/cranelift/entity/Cargo.toml @@ -0,0 +1,22 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-entity" +version = "0.59.0" +description = "Data structures using entity references as mapping keys" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +categories = ["no-std"] +readme = "README.md" +keywords = ["entity", "set", "map"] +edition = "2018" + +[dependencies] +serde = { version = "1.0.94", features = ["derive"], optional = true } + +[features] +enable-serde = ["serde"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/entity/LICENSE b/cranelift/entity/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/entity/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/entity/README.md b/cranelift/entity/README.md new file mode 100644 index 0000000000..f840b142e6 --- /dev/null +++ b/cranelift/entity/README.md @@ -0,0 +1,40 @@ +This crate contains array-based data structures used by the core Cranelift code +generator which use densely numbered entity references as mapping keys. + +One major difference between this crate and crates like [slotmap], [slab], +and [generational-arena] is that this crate currently provides no way to delete +entities. This limits its use to situations where deleting isn't important, +however this also makes it more efficient, because it doesn't need extra +bookkeeping state to reuse the storage for deleted objects, or to ensure that +new objects always have unique keys (eg. slotmap's and generational-arena's +versioning). + +Another major difference is that this crate protects against using a key from +one map to access an element in another. Where `SlotMap`, `Slab`, and `Arena` +have a value type parameter, `PrimaryMap` has a key type parameter and a value +type parameter. The crate also provides the `entity_impl` macro which makes it +easy to declare new unique types for use as keys. Any attempt to use a key in +a map it's not intended for is diagnosed with a type error. + +Another is that this crate has two core map types, `PrimaryMap` and +`SecondaryMap`, which serve complementary purposes. A `PrimaryMap` creates its +own keys when elements are inserted, while an `SecondaryMap` reuses the keys +values of a `PrimaryMap`, conceptually storing additional data in the same +index space. `SecondaryMap`'s values must implement `Default` and all elements +in an `SecondaryMap` initially have the value of `default()`. + +A common way to implement `Default` is to wrap a type in `Option`, however +this crate also provides the `PackedOption` utility which can use less memory +in some cases. + +Additional utilities provided by this crate include: + - `EntityList`, for allocating many small arrays (such as instruction operand + lists in a compiler code generator). + - `SparseMap`: an alternative to `SecondaryMap` which can use less memory + in some situations. + - `EntitySet`: a specialized form of `SecondaryMap` using a bitvector to + record which entities are members of the set. + +[slotmap]: https://crates.io/crates/slotmap +[slab]: https://crates.io/crates/slab +[generational-arena]: https://crates.io/crates/generational-arena diff --git a/cranelift/entity/src/boxed_slice.rs b/cranelift/entity/src/boxed_slice.rs new file mode 100644 index 0000000000..3b3b39155b --- /dev/null +++ b/cranelift/entity/src/boxed_slice.rs @@ -0,0 +1,316 @@ +//! Boxed slices for `PrimaryMap`. + +use crate::iter::{Iter, IterMut}; +use crate::keys::Keys; +use crate::EntityRef; +use alloc::boxed::Box; +use core::marker::PhantomData; +use core::ops::{Index, IndexMut}; +use core::slice; + +/// A slice mapping `K -> V` allocating dense entity references. +/// +/// The `BoxedSlice` data structure uses the dense index space to implement a map with a boxed +/// slice. +#[derive(Debug, Clone)] +pub struct BoxedSlice +where + K: EntityRef, +{ + elems: Box<[V]>, + unused: PhantomData, +} + +impl BoxedSlice +where + K: EntityRef, +{ + /// Create a new slice from a raw pointer. A safer way to create slices is + /// to use `PrimaryMap::into_boxed_slice()`. + /// + /// # Safety + /// + /// This relies on `raw` pointing to a valid slice of `V`s. + pub unsafe fn from_raw(raw: *mut [V]) -> Self { + Self { + elems: Box::from_raw(raw), + unused: PhantomData, + } + } + + /// Check if `k` is a valid key in the map. + pub fn is_valid(&self, k: K) -> bool { + k.index() < self.elems.len() + } + + /// Get the element at `k` if it exists. + pub fn get(&self, k: K) -> Option<&V> { + self.elems.get(k.index()) + } + + /// Get the element at `k` if it exists, mutable version. + pub fn get_mut(&mut self, k: K) -> Option<&mut V> { + self.elems.get_mut(k.index()) + } + + /// Is this map completely empty? + pub fn is_empty(&self) -> bool { + self.elems.is_empty() + } + + /// Get the total number of entity references created. + pub fn len(&self) -> usize { + self.elems.len() + } + + /// Iterate over all the keys in this map. + pub fn keys(&self) -> Keys { + Keys::with_len(self.elems.len()) + } + + /// Iterate over all the values in this map. + pub fn values(&self) -> slice::Iter { + self.elems.iter() + } + + /// Iterate over all the values in this map, mutable edition. + pub fn values_mut(&mut self) -> slice::IterMut { + self.elems.iter_mut() + } + + /// Iterate over all the keys and values in this map. + pub fn iter(&self) -> Iter { + Iter::new(self.elems.iter()) + } + + /// Iterate over all the keys and values in this map, mutable edition. + pub fn iter_mut(&mut self) -> IterMut { + IterMut::new(self.elems.iter_mut()) + } + + /// Returns the last element that was inserted in the map. + pub fn last(&self) -> Option<&V> { + self.elems.last() + } +} + +/// Immutable indexing into a `BoxedSlice`. +/// The indexed value must be in the map. +impl Index for BoxedSlice +where + K: EntityRef, +{ + type Output = V; + + fn index(&self, k: K) -> &V { + &self.elems[k.index()] + } +} + +/// Mutable indexing into a `BoxedSlice`. +impl IndexMut for BoxedSlice +where + K: EntityRef, +{ + fn index_mut(&mut self, k: K) -> &mut V { + &mut self.elems[k.index()] + } +} + +impl<'a, K, V> IntoIterator for &'a BoxedSlice +where + K: EntityRef, +{ + type Item = (K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + Iter::new(self.elems.iter()) + } +} + +impl<'a, K, V> IntoIterator for &'a mut BoxedSlice +where + K: EntityRef, +{ + type Item = (K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + IterMut::new(self.elems.iter_mut()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::primary::PrimaryMap; + use alloc::vec::Vec; + + // `EntityRef` impl for testing. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + struct E(u32); + + impl EntityRef for E { + fn new(i: usize) -> Self { + E(i as u32) + } + fn index(self) -> usize { + self.0 as usize + } + } + + #[test] + fn basic() { + let r0 = E(0); + let r1 = E(1); + let p = PrimaryMap::::new(); + let m = p.into_boxed_slice(); + + let v: Vec = m.keys().collect(); + assert_eq!(v, []); + + assert!(!m.is_valid(r0)); + assert!(!m.is_valid(r1)); + } + + #[test] + fn iter() { + let mut p: PrimaryMap = PrimaryMap::new(); + p.push(12); + p.push(33); + let mut m = p.into_boxed_slice(); + + let mut i = 0; + for (key, value) in &m { + assert_eq!(key.index(), i); + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + i += 1; + } + i = 0; + for (key_mut, value_mut) in m.iter_mut() { + assert_eq!(key_mut.index(), i); + match i { + 0 => assert_eq!(*value_mut, 12), + 1 => assert_eq!(*value_mut, 33), + _ => panic!(), + } + i += 1; + } + } + + #[test] + fn iter_rev() { + let mut p: PrimaryMap = PrimaryMap::new(); + p.push(12); + p.push(33); + let mut m = p.into_boxed_slice(); + + let mut i = 2; + for (key, value) in m.iter().rev() { + i -= 1; + assert_eq!(key.index(), i); + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + } + + i = 2; + for (key, value) in m.iter_mut().rev() { + i -= 1; + assert_eq!(key.index(), i); + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + } + } + #[test] + fn keys() { + let mut p: PrimaryMap = PrimaryMap::new(); + p.push(12); + p.push(33); + let m = p.into_boxed_slice(); + + let mut i = 0; + for key in m.keys() { + assert_eq!(key.index(), i); + i += 1; + } + } + + #[test] + fn keys_rev() { + let mut p: PrimaryMap = PrimaryMap::new(); + p.push(12); + p.push(33); + let m = p.into_boxed_slice(); + + let mut i = 2; + for key in m.keys().rev() { + i -= 1; + assert_eq!(key.index(), i); + } + } + + #[test] + fn values() { + let mut p: PrimaryMap = PrimaryMap::new(); + p.push(12); + p.push(33); + let mut m = p.into_boxed_slice(); + + let mut i = 0; + for value in m.values() { + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + i += 1; + } + i = 0; + for value_mut in m.values_mut() { + match i { + 0 => assert_eq!(*value_mut, 12), + 1 => assert_eq!(*value_mut, 33), + _ => panic!(), + } + i += 1; + } + } + + #[test] + fn values_rev() { + let mut p: PrimaryMap = PrimaryMap::new(); + p.push(12); + p.push(33); + let mut m = p.into_boxed_slice(); + + let mut i = 2; + for value in m.values().rev() { + i -= 1; + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + } + i = 2; + for value_mut in m.values_mut().rev() { + i -= 1; + match i { + 0 => assert_eq!(*value_mut, 12), + 1 => assert_eq!(*value_mut, 33), + _ => panic!(), + } + } + } +} diff --git a/cranelift/entity/src/iter.rs b/cranelift/entity/src/iter.rs new file mode 100644 index 0000000000..8c681023d2 --- /dev/null +++ b/cranelift/entity/src/iter.rs @@ -0,0 +1,86 @@ +//! A double-ended iterator over entity references and entities. + +use crate::EntityRef; +use core::iter::Enumerate; +use core::marker::PhantomData; +use core::slice; + +/// Iterate over all keys in order. +pub struct Iter<'a, K: EntityRef, V> +where + V: 'a, +{ + enumerate: Enumerate>, + unused: PhantomData, +} + +impl<'a, K: EntityRef, V> Iter<'a, K, V> { + /// Create an `Iter` iterator that visits the `PrimaryMap` keys and values + /// of `iter`. + pub fn new(iter: slice::Iter<'a, V>) -> Self { + Self { + enumerate: iter.enumerate(), + unused: PhantomData, + } + } +} + +impl<'a, K: EntityRef, V> Iterator for Iter<'a, K, V> { + type Item = (K, &'a V); + + fn next(&mut self) -> Option { + self.enumerate.next().map(|(i, v)| (K::new(i), v)) + } + + fn size_hint(&self) -> (usize, Option) { + self.enumerate.size_hint() + } +} + +impl<'a, K: EntityRef, V> DoubleEndedIterator for Iter<'a, K, V> { + fn next_back(&mut self) -> Option { + self.enumerate.next_back().map(|(i, v)| (K::new(i), v)) + } +} + +impl<'a, K: EntityRef, V> ExactSizeIterator for Iter<'a, K, V> {} + +/// Iterate over all keys in order. +pub struct IterMut<'a, K: EntityRef, V> +where + V: 'a, +{ + enumerate: Enumerate>, + unused: PhantomData, +} + +impl<'a, K: EntityRef, V> IterMut<'a, K, V> { + /// Create an `IterMut` iterator that visits the `PrimaryMap` keys and values + /// of `iter`. + pub fn new(iter: slice::IterMut<'a, V>) -> Self { + Self { + enumerate: iter.enumerate(), + unused: PhantomData, + } + } +} + +impl<'a, K: EntityRef, V> Iterator for IterMut<'a, K, V> { + type Item = (K, &'a mut V); + + fn next(&mut self) -> Option { + self.enumerate.next().map(|(i, v)| (K::new(i), v)) + } + + fn size_hint(&self) -> (usize, Option) { + self.enumerate.size_hint() + } +} + +impl<'a, K: EntityRef, V> DoubleEndedIterator for IterMut<'a, K, V> { + fn next_back(&mut self) -> Option { + self.enumerate.next_back().map(|(i, v)| (K::new(i), v)) + } +} + +impl<'a, K: EntityRef, V> ExactSizeIterator for IterMut<'a, K, V> {} diff --git a/cranelift/entity/src/keys.rs b/cranelift/entity/src/keys.rs new file mode 100644 index 0000000000..bfbaa0cb90 --- /dev/null +++ b/cranelift/entity/src/keys.rs @@ -0,0 +1,58 @@ +//! A double-ended iterator over entity references. +//! +//! When `core::iter::Step` is stabilized, `Keys` could be implemented as a wrapper around +//! `core::ops::Range`, but for now, we implement it manually. + +use crate::EntityRef; +use core::marker::PhantomData; + +/// Iterate over all keys in order. +pub struct Keys { + pos: usize, + rev_pos: usize, + unused: PhantomData, +} + +impl Keys { + /// Create a `Keys` iterator that visits `len` entities starting from 0. + pub fn with_len(len: usize) -> Self { + Self { + pos: 0, + rev_pos: len, + unused: PhantomData, + } + } +} + +impl Iterator for Keys { + type Item = K; + + fn next(&mut self) -> Option { + if self.pos < self.rev_pos { + let k = K::new(self.pos); + self.pos += 1; + Some(k) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let size = self.rev_pos - self.pos; + (size, Some(size)) + } +} + +impl DoubleEndedIterator for Keys { + fn next_back(&mut self) -> Option { + if self.rev_pos > self.pos { + let k = K::new(self.rev_pos - 1); + self.rev_pos -= 1; + Some(k) + } else { + None + } + } +} + +impl ExactSizeIterator for Keys {} diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs new file mode 100644 index 0000000000..f9062a8c51 --- /dev/null +++ b/cranelift/entity/src/lib.rs @@ -0,0 +1,143 @@ +//! Array-based data structures using densely numbered entity references as mapping keys. +//! +//! This crate defines a number of data structures based on arrays. The arrays are not indexed by +//! `usize` as usual, but by *entity references* which are integers wrapped in new-types. This has +//! a couple advantages: +//! +//! - Improved type safety. The various map and set types accept a specific key type, so there is +//! no confusion about the meaning of an array index, as there is with plain arrays. +//! - Smaller indexes. The normal `usize` index is often 64 bits which is way too large for most +//! purposes. The entity reference types can be smaller, allowing for more compact data +//! structures. +//! +//! The `EntityRef` trait should be implemented by types to be used as indexed. The `entity_impl!` +//! macro provides convenient defaults for types wrapping `u32` which is common. +//! +//! - [`PrimaryMap`](struct.PrimaryMap.html) is used to keep track of a vector of entities, +//! assigning a unique entity reference to each. +//! - [`SecondaryMap`](struct.SecondaryMap.html) is used to associate secondary information to an +//! entity. The map is implemented as a simple vector, so it does not keep track of which +//! entities have been inserted. Instead, any unknown entities map to the default value. +//! - [`SparseMap`](struct.SparseMap.html) is used to associate secondary information to a small +//! number of entities. It tracks accurately which entities have been inserted. This is a +//! specialized data structure which can use a lot of memory, so read the documentation before +//! using it. +//! - [`EntitySet`](struct.EntitySet.html) is used to represent a secondary set of entities. +//! The set is implemented as a simple vector, so it does not keep track of which entities have +//! been inserted into the primary map. Instead, any unknown entities are not in the set. +//! - [`EntityList`](struct.EntityList.html) is a compact representation of lists of entity +//! references allocated from an associated memory pool. It has a much smaller footprint than +//! `Vec`. + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +extern crate alloc; + +// Re-export core so that the macros works with both std and no_std crates +#[doc(hidden)] +pub extern crate core as __core; + +/// A type wrapping a small integer index should implement `EntityRef` so it can be used as the key +/// of an `SecondaryMap` or `SparseMap`. +pub trait EntityRef: Copy + Eq { + /// Create a new entity reference from a small integer. + /// This should crash if the requested index is not representable. + fn new(_: usize) -> Self; + + /// Get the index that was used to create this entity reference. + fn index(self) -> usize; +} + +/// Macro which provides the common implementation of a 32-bit entity reference. +#[macro_export] +macro_rules! entity_impl { + // Basic traits. + ($entity:ident) => { + impl $crate::EntityRef for $entity { + fn new(index: usize) -> Self { + debug_assert!(index < ($crate::__core::u32::MAX as usize)); + $entity(index as u32) + } + + fn index(self) -> usize { + self.0 as usize + } + } + + impl $crate::packed_option::ReservedValue for $entity { + fn reserved_value() -> $entity { + $entity($crate::__core::u32::MAX) + } + } + + impl $entity { + /// Return the underlying index value as a `u32`. + #[allow(dead_code)] + pub fn from_u32(x: u32) -> Self { + debug_assert!(x < $crate::__core::u32::MAX); + $entity(x) + } + + /// Return the underlying index value as a `u32`. + #[allow(dead_code)] + pub fn as_u32(self) -> u32 { + self.0 + } + } + }; + + // Include basic `Display` impl using the given display prefix. + // Display a `Block` reference as "block12". + ($entity:ident, $display_prefix:expr) => { + entity_impl!($entity); + + impl $crate::__core::fmt::Display for $entity { + fn fmt(&self, f: &mut $crate::__core::fmt::Formatter) -> $crate::__core::fmt::Result { + write!(f, concat!($display_prefix, "{}"), self.0) + } + } + + impl $crate::__core::fmt::Debug for $entity { + fn fmt(&self, f: &mut $crate::__core::fmt::Formatter) -> $crate::__core::fmt::Result { + (self as &dyn $crate::__core::fmt::Display).fmt(f) + } + } + }; +} + +pub mod packed_option; + +mod boxed_slice; +mod iter; +mod keys; +mod list; +mod map; +mod primary; +mod set; +mod sparse; + +pub use self::boxed_slice::BoxedSlice; +pub use self::iter::{Iter, IterMut}; +pub use self::keys::Keys; +pub use self::list::{EntityList, ListPool}; +pub use self::map::SecondaryMap; +pub use self::primary::PrimaryMap; +pub use self::set::EntitySet; +pub use self::sparse::{SparseMap, SparseMapValue, SparseSet}; diff --git a/cranelift/entity/src/list.rs b/cranelift/entity/src/list.rs new file mode 100644 index 0000000000..68cab8166b --- /dev/null +++ b/cranelift/entity/src/list.rs @@ -0,0 +1,707 @@ +//! Small lists of entity references. +use crate::packed_option::ReservedValue; +use crate::EntityRef; +use alloc::vec::Vec; +use core::marker::PhantomData; +use core::mem; + +/// A small list of entity references allocated from a pool. +/// +/// An `EntityList` type provides similar functionality to `Vec`, but with some important +/// differences in the implementation: +/// +/// 1. Memory is allocated from a `ListPool` instead of the global heap. +/// 2. The footprint of an entity list is 4 bytes, compared with the 24 bytes for `Vec`. +/// 3. An entity list doesn't implement `Drop`, leaving it to the pool to manage memory. +/// +/// The list pool is intended to be used as a LIFO allocator. After building up a larger data +/// structure with many list references, the whole thing can be discarded quickly by clearing the +/// pool. +/// +/// # Safety +/// +/// Entity lists are not as safe to use as `Vec`, but they never jeopardize Rust's memory safety +/// guarantees. These are the problems to be aware of: +/// +/// - If you lose track of an entity list, its memory won't be recycled until the pool is cleared. +/// This can cause the pool to grow very large with leaked lists. +/// - If entity lists are used after their pool is cleared, they may contain garbage data, and +/// modifying them may corrupt other lists in the pool. +/// - If an entity list is used with two different pool instances, both pools are likely to become +/// corrupted. +/// +/// Entity lists can be cloned, but that operation should only be used as part of cloning the whole +/// function they belong to. *Cloning an entity list does not allocate new memory for the clone*. +/// It creates an alias of the same memory. +/// +/// Entity lists cannot be hashed and compared for equality because it's not possible to compare the +/// contents of the list without the pool reference. +/// +/// # Implementation +/// +/// The `EntityList` itself is designed to have the smallest possible footprint. This is important +/// because it is used inside very compact data structures like `InstructionData`. The list +/// contains only a 32-bit index into the pool's memory vector, pointing to the first element of +/// the list. +/// +/// The pool is just a single `Vec` containing all of the allocated lists. Each list is +/// represented as three contiguous parts: +/// +/// 1. The number of elements in the list. +/// 2. The list elements. +/// 3. Excess capacity elements. +/// +/// The total size of the three parts is always a power of two, and the excess capacity is always +/// as small as possible. This means that shrinking a list may cause the excess capacity to shrink +/// if a smaller power-of-two size becomes available. +/// +/// Both growing and shrinking a list may cause it to be reallocated in the pool vector. +/// +/// The index stored in an `EntityList` points to part 2, the list elements. The value 0 is +/// reserved for the empty list which isn't allocated in the vector. +#[derive(Clone, Debug)] +pub struct EntityList { + index: u32, + unused: PhantomData, +} + +/// Create an empty list. +impl Default for EntityList { + fn default() -> Self { + Self { + index: 0, + unused: PhantomData, + } + } +} + +/// A memory pool for storing lists of `T`. +#[derive(Clone, Debug)] +pub struct ListPool { + // The main array containing the lists. + data: Vec, + + // Heads of the free lists, one for each size class. + free: Vec, +} + +/// Lists are allocated in sizes that are powers of two, starting from 4. +/// Each power of two is assigned a size class number, so the size is `4 << SizeClass`. +type SizeClass = u8; + +/// Get the size of a given size class. The size includes the length field, so the maximum list +/// length is one less than the class size. +fn sclass_size(sclass: SizeClass) -> usize { + 4 << sclass +} + +/// Get the size class to use for a given list length. +/// This always leaves room for the length element in addition to the list elements. +fn sclass_for_length(len: usize) -> SizeClass { + 30 - (len as u32 | 3).leading_zeros() as SizeClass +} + +/// Is `len` the minimum length in its size class? +fn is_sclass_min_length(len: usize) -> bool { + len > 3 && len.is_power_of_two() +} + +impl ListPool { + /// Create a new list pool. + pub fn new() -> Self { + Self { + data: Vec::new(), + free: Vec::new(), + } + } + + /// Clear the pool, forgetting about all lists that use it. + /// + /// This invalidates any existing entity lists that used this pool to allocate memory. + /// + /// The pool's memory is not released to the operating system, but kept around for faster + /// allocation in the future. + pub fn clear(&mut self) { + self.data.clear(); + self.free.clear(); + } + + /// Read the length of a list field, if it exists. + fn len_of(&self, list: &EntityList) -> Option { + let idx = list.index as usize; + // `idx` points at the list elements. The list length is encoded in the element immediately + // before the list elements. + // + // The `wrapping_sub` handles the special case 0, which is the empty list. This way, the + // cost of the bounds check that we have to pay anyway is co-opted to handle the special + // case of the empty list. + self.data.get(idx.wrapping_sub(1)).map(|len| len.index()) + } + + /// Allocate a storage block with a size given by `sclass`. + /// + /// Returns the first index of an available segment of `self.data` containing + /// `sclass_size(sclass)` elements. The allocated memory is filled with reserved + /// values. + fn alloc(&mut self, sclass: SizeClass) -> usize { + // First try the free list for this size class. + match self.free.get(sclass as usize).cloned() { + Some(head) if head > 0 => { + // The free list pointers are offset by 1, using 0 to terminate the list. + // A block on the free list has two entries: `[ 0, next ]`. + // The 0 is where the length field would be stored for a block in use. + // The free list heads and the next pointer point at the `next` field. + self.free[sclass as usize] = self.data[head].index(); + head - 1 + } + _ => { + // Nothing on the free list. Allocate more memory. + let offset = self.data.len(); + self.data + .resize(offset + sclass_size(sclass), T::reserved_value()); + offset + } + } + } + + /// Free a storage block with a size given by `sclass`. + /// + /// This must be a block that was previously allocated by `alloc()` with the same size class. + fn free(&mut self, block: usize, sclass: SizeClass) { + let sclass = sclass as usize; + + // Make sure we have a free-list head for `sclass`. + if self.free.len() <= sclass { + self.free.resize(sclass + 1, 0); + } + + // Make sure the length field is cleared. + self.data[block] = T::new(0); + // Insert the block on the free list which is a single linked list. + self.data[block + 1] = T::new(self.free[sclass]); + self.free[sclass] = block + 1 + } + + /// Returns two mutable slices representing the two requested blocks. + /// + /// The two returned slices can be longer than the blocks. Each block is located at the front + /// of the respective slice. + fn mut_slices(&mut self, block0: usize, block1: usize) -> (&mut [T], &mut [T]) { + if block0 < block1 { + let (s0, s1) = self.data.split_at_mut(block1); + (&mut s0[block0..], s1) + } else { + let (s1, s0) = self.data.split_at_mut(block0); + (s0, &mut s1[block1..]) + } + } + + /// Reallocate a block to a different size class. + /// + /// Copy `elems_to_copy` elements from the old to the new block. + fn realloc( + &mut self, + block: usize, + from_sclass: SizeClass, + to_sclass: SizeClass, + elems_to_copy: usize, + ) -> usize { + debug_assert!(elems_to_copy <= sclass_size(from_sclass)); + debug_assert!(elems_to_copy <= sclass_size(to_sclass)); + let new_block = self.alloc(to_sclass); + + if elems_to_copy > 0 { + let (old, new) = self.mut_slices(block, new_block); + (&mut new[0..elems_to_copy]).copy_from_slice(&old[0..elems_to_copy]); + } + + self.free(block, from_sclass); + new_block + } +} + +impl EntityList { + /// Create a new empty list. + pub fn new() -> Self { + Default::default() + } + + /// Create a new list with the contents initialized from a slice. + pub fn from_slice(slice: &[T], pool: &mut ListPool) -> Self { + let len = slice.len(); + if len == 0 { + return Self::new(); + } + + let block = pool.alloc(sclass_for_length(len)); + pool.data[block] = T::new(len); + pool.data[block + 1..=block + len].copy_from_slice(slice); + + Self { + index: (block + 1) as u32, + unused: PhantomData, + } + } + + /// Returns `true` if the list has a length of 0. + pub fn is_empty(&self) -> bool { + // 0 is a magic value for the empty list. Any list in the pool array must have a positive + // length. + self.index == 0 + } + + /// Get the number of elements in the list. + pub fn len(&self, pool: &ListPool) -> usize { + // Both the empty list and any invalidated old lists will return `None`. + pool.len_of(self).unwrap_or(0) + } + + /// Returns `true` if the list is valid + pub fn is_valid(&self, pool: &ListPool) -> bool { + // We consider an empty list to be valid + self.is_empty() || pool.len_of(self) != None + } + + /// Get the list as a slice. + pub fn as_slice<'a>(&'a self, pool: &'a ListPool) -> &'a [T] { + let idx = self.index as usize; + match pool.len_of(self) { + None => &[], + Some(len) => &pool.data[idx..idx + len], + } + } + + /// Get a single element from the list. + pub fn get(&self, index: usize, pool: &ListPool) -> Option { + self.as_slice(pool).get(index).cloned() + } + + /// Get the first element from the list. + pub fn first(&self, pool: &ListPool) -> Option { + if self.is_empty() { + None + } else { + Some(pool.data[self.index as usize]) + } + } + + /// Get the list as a mutable slice. + pub fn as_mut_slice<'a>(&'a mut self, pool: &'a mut ListPool) -> &'a mut [T] { + let idx = self.index as usize; + match pool.len_of(self) { + None => &mut [], + Some(len) => &mut pool.data[idx..idx + len], + } + } + + /// Get a mutable reference to a single element from the list. + pub fn get_mut<'a>(&'a mut self, index: usize, pool: &'a mut ListPool) -> Option<&'a mut T> { + self.as_mut_slice(pool).get_mut(index) + } + + /// Removes all elements from the list. + /// + /// The memory used by the list is put back in the pool. + pub fn clear(&mut self, pool: &mut ListPool) { + let idx = self.index as usize; + match pool.len_of(self) { + None => debug_assert_eq!(idx, 0, "Invalid pool"), + Some(len) => pool.free(idx - 1, sclass_for_length(len)), + } + // Switch back to the empty list representation which has no storage. + self.index = 0; + } + + /// Take all elements from this list and return them as a new list. Leave this list empty. + /// + /// This is the equivalent of `Option::take()`. + pub fn take(&mut self) -> Self { + mem::replace(self, Default::default()) + } + + /// Appends an element to the back of the list. + /// Returns the index where the element was inserted. + pub fn push(&mut self, element: T, pool: &mut ListPool) -> usize { + let idx = self.index as usize; + match pool.len_of(self) { + None => { + // This is an empty list. Allocate a block and set length=1. + debug_assert_eq!(idx, 0, "Invalid pool"); + let block = pool.alloc(sclass_for_length(1)); + pool.data[block] = T::new(1); + pool.data[block + 1] = element; + self.index = (block + 1) as u32; + 0 + } + Some(len) => { + // Do we need to reallocate? + let new_len = len + 1; + let block; + if is_sclass_min_length(new_len) { + // Reallocate, preserving length + all old elements. + let sclass = sclass_for_length(len); + block = pool.realloc(idx - 1, sclass, sclass + 1, len + 1); + self.index = (block + 1) as u32; + } else { + block = idx - 1; + } + pool.data[block + new_len] = element; + pool.data[block] = T::new(new_len); + len + } + } + } + + /// Grow list by adding `count` reserved-value elements at the end. + /// + /// Returns a mutable slice representing the whole list. + fn grow<'a>(&'a mut self, count: usize, pool: &'a mut ListPool) -> &'a mut [T] { + let idx = self.index as usize; + let new_len; + let block; + match pool.len_of(self) { + None => { + // This is an empty list. Allocate a block. + debug_assert_eq!(idx, 0, "Invalid pool"); + if count == 0 { + return &mut []; + } + new_len = count; + block = pool.alloc(sclass_for_length(new_len)); + self.index = (block + 1) as u32; + } + Some(len) => { + // Do we need to reallocate? + let sclass = sclass_for_length(len); + new_len = len + count; + let new_sclass = sclass_for_length(new_len); + if new_sclass != sclass { + block = pool.realloc(idx - 1, sclass, new_sclass, len + 1); + self.index = (block + 1) as u32; + } else { + block = idx - 1; + } + } + } + pool.data[block] = T::new(new_len); + &mut pool.data[block + 1..block + 1 + new_len] + } + + /// Appends multiple elements to the back of the list. + pub fn extend(&mut self, elements: I, pool: &mut ListPool) + where + I: IntoIterator, + { + // TODO: use `size_hint()` to reduce reallocations. + for x in elements { + self.push(x, pool); + } + } + + /// Inserts an element as position `index` in the list, shifting all elements after it to the + /// right. + pub fn insert(&mut self, index: usize, element: T, pool: &mut ListPool) { + // Increase size by 1. + self.push(element, pool); + + // Move tail elements. + let seq = self.as_mut_slice(pool); + if index < seq.len() { + let tail = &mut seq[index..]; + for i in (1..tail.len()).rev() { + tail[i] = tail[i - 1]; + } + tail[0] = element; + } else { + debug_assert_eq!(index, seq.len()); + } + } + + /// Removes the element at position `index` from the list. Potentially linear complexity. + pub fn remove(&mut self, index: usize, pool: &mut ListPool) { + let len; + { + let seq = self.as_mut_slice(pool); + len = seq.len(); + debug_assert!(index < len); + + // Copy elements down. + for i in index..len - 1 { + seq[i] = seq[i + 1]; + } + } + + // Check if we deleted the last element. + if len == 1 { + self.clear(pool); + return; + } + + // Do we need to reallocate to a smaller size class? + let mut block = self.index as usize - 1; + if is_sclass_min_length(len) { + let sclass = sclass_for_length(len); + block = pool.realloc(block, sclass, sclass - 1, len); + self.index = (block + 1) as u32; + } + + // Finally adjust the length. + pool.data[block] = T::new(len - 1); + } + + /// Removes the element at `index` in constant time by switching it with the last element of + /// the list. + pub fn swap_remove(&mut self, index: usize, pool: &mut ListPool) { + let len = self.len(pool); + debug_assert!(index < len); + if index == len - 1 { + self.remove(index, pool); + } else { + { + let seq = self.as_mut_slice(pool); + seq.swap(index, len - 1); + } + self.remove(len - 1, pool); + } + } + + /// Grow the list by inserting `count` elements at `index`. + /// + /// The new elements are not initialized, they will contain whatever happened to be in memory. + /// Since the memory comes from the pool, this will be either zero entity references or + /// whatever where in a previously deallocated list. + pub fn grow_at(&mut self, index: usize, count: usize, pool: &mut ListPool) { + let data = self.grow(count, pool); + + // Copy elements after `index` up. + for i in (index + count..data.len()).rev() { + data[i] = data[i - count]; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::{sclass_for_length, sclass_size}; + use crate::EntityRef; + + /// An opaque reference to an instruction in a function. + #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] + pub struct Inst(u32); + entity_impl!(Inst, "inst"); + + #[test] + fn size_classes() { + assert_eq!(sclass_size(0), 4); + assert_eq!(sclass_for_length(0), 0); + assert_eq!(sclass_for_length(1), 0); + assert_eq!(sclass_for_length(2), 0); + assert_eq!(sclass_for_length(3), 0); + assert_eq!(sclass_for_length(4), 1); + assert_eq!(sclass_for_length(7), 1); + assert_eq!(sclass_for_length(8), 2); + assert_eq!(sclass_size(1), 8); + for l in 0..300 { + assert!(sclass_size(sclass_for_length(l)) >= l + 1); + } + } + + #[test] + fn block_allocator() { + let mut pool = ListPool::::new(); + let b1 = pool.alloc(0); + let b2 = pool.alloc(1); + let b3 = pool.alloc(0); + assert_ne!(b1, b2); + assert_ne!(b1, b3); + assert_ne!(b2, b3); + pool.free(b2, 1); + let b2a = pool.alloc(1); + let b2b = pool.alloc(1); + assert_ne!(b2a, b2b); + // One of these should reuse the freed block. + assert!(b2a == b2 || b2b == b2); + + // Check the free lists for a size class smaller than the largest seen so far. + pool.free(b1, 0); + pool.free(b3, 0); + let b1a = pool.alloc(0); + let b3a = pool.alloc(0); + assert_ne!(b1a, b3a); + assert!(b1a == b1 || b1a == b3); + assert!(b3a == b1 || b3a == b3); + } + + #[test] + fn empty_list() { + let pool = &mut ListPool::::new(); + let mut list = EntityList::::default(); + { + let ilist = &list; + assert!(ilist.is_empty()); + assert_eq!(ilist.len(pool), 0); + assert_eq!(ilist.as_slice(pool), &[]); + assert_eq!(ilist.get(0, pool), None); + assert_eq!(ilist.get(100, pool), None); + } + assert_eq!(list.as_mut_slice(pool), &[]); + assert_eq!(list.get_mut(0, pool), None); + assert_eq!(list.get_mut(100, pool), None); + + list.clear(pool); + assert!(list.is_empty()); + assert_eq!(list.len(pool), 0); + assert_eq!(list.as_slice(pool), &[]); + assert_eq!(list.first(pool), None); + } + + #[test] + fn from_slice() { + let pool = &mut ListPool::::new(); + + let list = EntityList::::from_slice(&[Inst(0), Inst(1)], pool); + assert!(!list.is_empty()); + assert_eq!(list.len(pool), 2); + assert_eq!(list.as_slice(pool), &[Inst(0), Inst(1)]); + assert_eq!(list.get(0, pool), Some(Inst(0))); + assert_eq!(list.get(100, pool), None); + + let list = EntityList::::from_slice(&[], pool); + assert!(list.is_empty()); + assert_eq!(list.len(pool), 0); + assert_eq!(list.as_slice(pool), &[]); + assert_eq!(list.get(0, pool), None); + assert_eq!(list.get(100, pool), None); + } + + #[test] + fn push() { + let pool = &mut ListPool::::new(); + let mut list = EntityList::::default(); + + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + let i4 = Inst::new(4); + + assert_eq!(list.push(i1, pool), 0); + assert_eq!(list.len(pool), 1); + assert!(!list.is_empty()); + assert_eq!(list.as_slice(pool), &[i1]); + assert_eq!(list.first(pool), Some(i1)); + assert_eq!(list.get(0, pool), Some(i1)); + assert_eq!(list.get(1, pool), None); + + assert_eq!(list.push(i2, pool), 1); + assert_eq!(list.len(pool), 2); + assert!(!list.is_empty()); + assert_eq!(list.as_slice(pool), &[i1, i2]); + assert_eq!(list.first(pool), Some(i1)); + assert_eq!(list.get(0, pool), Some(i1)); + assert_eq!(list.get(1, pool), Some(i2)); + assert_eq!(list.get(2, pool), None); + + assert_eq!(list.push(i3, pool), 2); + assert_eq!(list.len(pool), 3); + assert!(!list.is_empty()); + assert_eq!(list.as_slice(pool), &[i1, i2, i3]); + assert_eq!(list.first(pool), Some(i1)); + assert_eq!(list.get(0, pool), Some(i1)); + assert_eq!(list.get(1, pool), Some(i2)); + assert_eq!(list.get(2, pool), Some(i3)); + assert_eq!(list.get(3, pool), None); + + // This triggers a reallocation. + assert_eq!(list.push(i4, pool), 3); + assert_eq!(list.len(pool), 4); + assert!(!list.is_empty()); + assert_eq!(list.as_slice(pool), &[i1, i2, i3, i4]); + assert_eq!(list.first(pool), Some(i1)); + assert_eq!(list.get(0, pool), Some(i1)); + assert_eq!(list.get(1, pool), Some(i2)); + assert_eq!(list.get(2, pool), Some(i3)); + assert_eq!(list.get(3, pool), Some(i4)); + assert_eq!(list.get(4, pool), None); + + list.extend([i1, i1, i2, i2, i3, i3, i4, i4].iter().cloned(), pool); + assert_eq!(list.len(pool), 12); + assert_eq!( + list.as_slice(pool), + &[i1, i2, i3, i4, i1, i1, i2, i2, i3, i3, i4, i4] + ); + } + + #[test] + fn insert_remove() { + let pool = &mut ListPool::::new(); + let mut list = EntityList::::default(); + + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + let i4 = Inst::new(4); + + list.insert(0, i4, pool); + assert_eq!(list.as_slice(pool), &[i4]); + + list.insert(0, i3, pool); + assert_eq!(list.as_slice(pool), &[i3, i4]); + + list.insert(2, i2, pool); + assert_eq!(list.as_slice(pool), &[i3, i4, i2]); + + list.insert(2, i1, pool); + assert_eq!(list.as_slice(pool), &[i3, i4, i1, i2]); + + list.remove(3, pool); + assert_eq!(list.as_slice(pool), &[i3, i4, i1]); + + list.remove(2, pool); + assert_eq!(list.as_slice(pool), &[i3, i4]); + + list.remove(0, pool); + assert_eq!(list.as_slice(pool), &[i4]); + + list.remove(0, pool); + assert_eq!(list.as_slice(pool), &[]); + assert!(list.is_empty()); + } + + #[test] + fn growing() { + let pool = &mut ListPool::::new(); + let mut list = EntityList::::default(); + + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + let i4 = Inst::new(4); + + // This is not supposed to change the list. + list.grow_at(0, 0, pool); + assert_eq!(list.len(pool), 0); + assert!(list.is_empty()); + + list.grow_at(0, 2, pool); + assert_eq!(list.len(pool), 2); + + list.as_mut_slice(pool).copy_from_slice(&[i2, i3]); + + list.grow_at(1, 0, pool); + assert_eq!(list.as_slice(pool), &[i2, i3]); + + list.grow_at(1, 1, pool); + list.as_mut_slice(pool)[1] = i1; + assert_eq!(list.as_slice(pool), &[i2, i1, i3]); + + // Append nothing at the end. + list.grow_at(3, 0, pool); + assert_eq!(list.as_slice(pool), &[i2, i1, i3]); + + // Append something at the end. + list.grow_at(3, 1, pool); + list.as_mut_slice(pool)[3] = i4; + assert_eq!(list.as_slice(pool), &[i2, i1, i3, i4]); + } +} diff --git a/cranelift/entity/src/map.rs b/cranelift/entity/src/map.rs new file mode 100644 index 0000000000..7eb889e8b4 --- /dev/null +++ b/cranelift/entity/src/map.rs @@ -0,0 +1,309 @@ +//! Densely numbered entity references as mapping keys. + +use crate::iter::{Iter, IterMut}; +use crate::keys::Keys; +use crate::EntityRef; +use alloc::vec::Vec; +use core::cmp::min; +use core::marker::PhantomData; +use core::ops::{Index, IndexMut}; +use core::slice; +#[cfg(feature = "enable-serde")] +use serde::{ + de::{Deserializer, SeqAccess, Visitor}, + ser::{SerializeSeq, Serializer}, + Deserialize, Serialize, +}; + +/// A mapping `K -> V` for densely indexed entity references. +/// +/// The `SecondaryMap` data structure uses the dense index space to implement a map with a vector. +/// Unlike `PrimaryMap`, an `SecondaryMap` can't be used to allocate entity references. It is used +/// to associate secondary information with entities. +/// +/// The map does not track if an entry for a key has been inserted or not. Instead it behaves as if +/// all keys have a default entry from the beginning. +#[derive(Debug, Clone)] +pub struct SecondaryMap +where + K: EntityRef, + V: Clone, +{ + elems: Vec, + default: V, + unused: PhantomData, +} + +/// Shared `SecondaryMap` implementation for all value types. +impl SecondaryMap +where + K: EntityRef, + V: Clone, +{ + /// Create a new empty map. + pub fn new() -> Self + where + V: Default, + { + Self { + elems: Vec::new(), + default: Default::default(), + unused: PhantomData, + } + } + + /// Create a new, empty map with the specified capacity. + /// + /// The map will be able to hold exactly `capacity` elements without reallocating. + pub fn with_capacity(capacity: usize) -> Self + where + V: Default, + { + Self { + elems: Vec::with_capacity(capacity), + default: Default::default(), + unused: PhantomData, + } + } + + /// Create a new empty map with a specified default value. + /// + /// This constructor does not require V to implement Default. + pub fn with_default(default: V) -> Self { + Self { + elems: Vec::new(), + default, + unused: PhantomData, + } + } + + /// Returns the number of elements the map can hold without reallocating. + pub fn capacity(&self) -> usize { + self.elems.capacity() + } + + /// Get the element at `k` if it exists. + #[inline(always)] + pub fn get(&self, k: K) -> Option<&V> { + self.elems.get(k.index()) + } + + /// Is this map completely empty? + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.elems.is_empty() + } + + /// Remove all entries from this map. + #[inline(always)] + pub fn clear(&mut self) { + self.elems.clear() + } + + /// Iterate over all the keys and values in this map. + pub fn iter(&self) -> Iter { + Iter::new(self.elems.iter()) + } + + /// Iterate over all the keys and values in this map, mutable edition. + pub fn iter_mut(&mut self) -> IterMut { + IterMut::new(self.elems.iter_mut()) + } + + /// Iterate over all the keys in this map. + pub fn keys(&self) -> Keys { + Keys::with_len(self.elems.len()) + } + + /// Iterate over all the values in this map. + pub fn values(&self) -> slice::Iter { + self.elems.iter() + } + + /// Iterate over all the values in this map, mutable edition. + pub fn values_mut(&mut self) -> slice::IterMut { + self.elems.iter_mut() + } + + /// Resize the map to have `n` entries by adding default entries as needed. + pub fn resize(&mut self, n: usize) { + self.elems.resize(n, self.default.clone()); + } +} + +/// Immutable indexing into an `SecondaryMap`. +/// +/// All keys are permitted. Untouched entries have the default value. +impl Index for SecondaryMap +where + K: EntityRef, + V: Clone, +{ + type Output = V; + + #[inline(always)] + fn index(&self, k: K) -> &V { + self.elems.get(k.index()).unwrap_or(&self.default) + } +} + +/// Mutable indexing into an `SecondaryMap`. +/// +/// The map grows as needed to accommodate new keys. +impl IndexMut for SecondaryMap +where + K: EntityRef, + V: Clone, +{ + #[inline(always)] + fn index_mut(&mut self, k: K) -> &mut V { + let i = k.index(); + if i >= self.elems.len() { + self.elems.resize(i + 1, self.default.clone()); + } + &mut self.elems[i] + } +} + +impl PartialEq for SecondaryMap +where + K: EntityRef, + V: Clone + PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + let min_size = min(self.elems.len(), other.elems.len()); + self.default == other.default + && self.elems[..min_size] == other.elems[..min_size] + && self.elems[min_size..].iter().all(|e| *e == self.default) + && other.elems[min_size..].iter().all(|e| *e == other.default) + } +} + +impl Eq for SecondaryMap +where + K: EntityRef, + V: Clone + PartialEq + Eq, +{ +} + +#[cfg(feature = "enable-serde")] +impl Serialize for SecondaryMap +where + K: EntityRef, + V: Clone + PartialEq + Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + // TODO: bincode encodes option as "byte for Some/None" and then optionally the content + // TODO: we can actually optimize it by encoding manually bitmask, then elements + let mut elems_cnt = self.elems.len(); + while elems_cnt > 0 && self.elems[elems_cnt - 1] == self.default { + elems_cnt -= 1; + } + let mut seq = serializer.serialize_seq(Some(1 + elems_cnt))?; + seq.serialize_element(&Some(self.default.clone()))?; + for e in self.elems.iter().take(elems_cnt) { + let some_e = Some(e); + seq.serialize_element(if *e == self.default { &None } else { &some_e })?; + } + seq.end() + } +} + +#[cfg(feature = "enable-serde")] +impl<'de, K, V> Deserialize<'de> for SecondaryMap +where + K: EntityRef, + V: Clone + Deserialize<'de>, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + use alloc::fmt; + struct SecondaryMapVisitor { + unused: PhantomData V>, + } + + impl<'de, K, V> Visitor<'de> for SecondaryMapVisitor + where + K: EntityRef, + V: Clone + Deserialize<'de>, + { + type Value = SecondaryMap; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("struct SecondaryMap") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: SeqAccess<'de>, + { + match seq.next_element()? { + Some(Some(default_val)) => { + let default_val: V = default_val; // compiler can't infer the type + let mut m = SecondaryMap::with_default(default_val.clone()); + let mut idx = 0; + while let Some(val) = seq.next_element()? { + let val: Option<_> = val; // compiler can't infer the type + m[K::new(idx)] = val.unwrap_or_else(|| default_val.clone()); + idx += 1; + } + Ok(m) + } + _ => Err(serde::de::Error::custom("Default value required")), + } + } + } + + deserializer.deserialize_seq(SecondaryMapVisitor { + unused: PhantomData {}, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // `EntityRef` impl for testing. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + struct E(u32); + + impl EntityRef for E { + fn new(i: usize) -> Self { + E(i as u32) + } + fn index(self) -> usize { + self.0 as usize + } + } + + #[test] + fn basic() { + let r0 = E(0); + let r1 = E(1); + let r2 = E(2); + let mut m = SecondaryMap::new(); + + let v: Vec = m.keys().collect(); + assert_eq!(v, []); + + m[r2] = 3; + m[r1] = 5; + + assert_eq!(m[r1], 5); + assert_eq!(m[r2], 3); + + let v: Vec = m.keys().collect(); + assert_eq!(v, [r0, r1, r2]); + + let shared = &m; + assert_eq!(shared[r0], 0); + assert_eq!(shared[r1], 5); + assert_eq!(shared[r2], 3); + } +} diff --git a/cranelift/entity/src/packed_option.rs b/cranelift/entity/src/packed_option.rs new file mode 100644 index 0000000000..63764406c0 --- /dev/null +++ b/cranelift/entity/src/packed_option.rs @@ -0,0 +1,157 @@ +//! Compact representation of `Option` for types with a reserved value. +//! +//! Small Cranelift types like the 32-bit entity references are often used in tables and linked +//! lists where an `Option` is needed. Unfortunately, that would double the size of the tables +//! because `Option` is twice as big as `T`. +//! +//! This module provides a `PackedOption` for types that have a reserved value that can be used +//! to represent `None`. + +use core::fmt; +use core::mem; + +/// Types that have a reserved value which can't be created any other way. +pub trait ReservedValue: Eq { + /// Create an instance of the reserved value. + fn reserved_value() -> Self; +} + +/// Packed representation of `Option`. +#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +pub struct PackedOption(T); + +impl PackedOption { + /// Returns `true` if the packed option is a `None` value. + pub fn is_none(&self) -> bool { + self.0 == T::reserved_value() + } + + /// Returns `true` if the packed option is a `Some` value. + pub fn is_some(&self) -> bool { + self.0 != T::reserved_value() + } + + /// Expand the packed option into a normal `Option`. + pub fn expand(self) -> Option { + if self.is_none() { + None + } else { + Some(self.0) + } + } + + /// Maps a `PackedOption` to `Option` by applying a function to a contained value. + pub fn map(self, f: F) -> Option + where + F: FnOnce(T) -> U, + { + self.expand().map(f) + } + + /// Unwrap a packed `Some` value or panic. + pub fn unwrap(self) -> T { + self.expand().unwrap() + } + + /// Unwrap a packed `Some` value or panic. + pub fn expect(self, msg: &str) -> T { + self.expand().expect(msg) + } + + /// Takes the value out of the packed option, leaving a `None` in its place. + pub fn take(&mut self) -> Option { + mem::replace(self, None.into()).expand() + } +} + +impl Default for PackedOption { + /// Create a default packed option representing `None`. + fn default() -> Self { + Self(T::reserved_value()) + } +} + +impl From for PackedOption { + /// Convert `t` into a packed `Some(x)`. + fn from(t: T) -> Self { + debug_assert!( + t != T::reserved_value(), + "Can't make a PackedOption from the reserved value." + ); + Self(t) + } +} + +impl From> for PackedOption { + /// Convert an option into its packed equivalent. + fn from(opt: Option) -> Self { + match opt { + None => Self::default(), + Some(t) => t.into(), + } + } +} + +impl Into> for PackedOption { + fn into(self) -> Option { + self.expand() + } +} + +impl fmt::Debug for PackedOption +where + T: ReservedValue + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_none() { + write!(f, "None") + } else { + write!(f, "Some({:?})", self.0) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Dummy entity class, with no Copy or Clone. + #[derive(Debug, PartialEq, Eq)] + struct NoC(u32); + + impl ReservedValue for NoC { + fn reserved_value() -> Self { + NoC(13) + } + } + + #[test] + fn moves() { + let x = NoC(3); + let somex: PackedOption = x.into(); + assert!(!somex.is_none()); + assert_eq!(somex.expand(), Some(NoC(3))); + + let none: PackedOption = None.into(); + assert!(none.is_none()); + assert_eq!(none.expand(), None); + } + + // Dummy entity class, with Copy. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + struct Ent(u32); + + impl ReservedValue for Ent { + fn reserved_value() -> Self { + Ent(13) + } + } + + #[test] + fn copies() { + let x = Ent(2); + let some: PackedOption = x.into(); + assert_eq!(some.expand(), x.into()); + assert_eq!(some, x.into()); + } +} diff --git a/cranelift/entity/src/primary.rs b/cranelift/entity/src/primary.rs new file mode 100644 index 0000000000..974033addd --- /dev/null +++ b/cranelift/entity/src/primary.rs @@ -0,0 +1,404 @@ +//! Densely numbered entity references as mapping keys. +use crate::boxed_slice::BoxedSlice; +use crate::iter::{Iter, IterMut}; +use crate::keys::Keys; +use crate::EntityRef; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::iter::FromIterator; +use core::marker::PhantomData; +use core::ops::{Index, IndexMut}; +use core::slice; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// A primary mapping `K -> V` allocating dense entity references. +/// +/// The `PrimaryMap` data structure uses the dense index space to implement a map with a vector. +/// +/// A primary map contains the main definition of an entity, and it can be used to allocate new +/// entity references with the `push` method. +/// +/// There should only be a single `PrimaryMap` instance for a given `EntityRef` type, otherwise +/// conflicting references will be created. Using unknown keys for indexing will cause a panic. +/// +/// Note that `PrimaryMap` doesn't implement `Deref` or `DerefMut`, which would allow +/// `&PrimaryMap` to convert to `&[V]`. One of the main advantages of `PrimaryMap` is +/// that it only allows indexing with the distinct `EntityRef` key type, so converting to a +/// plain slice would make it easier to use incorrectly. To make a slice of a `PrimaryMap`, use +/// `into_boxed_slice`. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct PrimaryMap +where + K: EntityRef, +{ + elems: Vec, + unused: PhantomData, +} + +impl PrimaryMap +where + K: EntityRef, +{ + /// Create a new empty map. + pub fn new() -> Self { + Self { + elems: Vec::new(), + unused: PhantomData, + } + } + + /// Create a new empty map with the given capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self { + elems: Vec::with_capacity(capacity), + unused: PhantomData, + } + } + + /// Check if `k` is a valid key in the map. + pub fn is_valid(&self, k: K) -> bool { + k.index() < self.elems.len() + } + + /// Get the element at `k` if it exists. + pub fn get(&self, k: K) -> Option<&V> { + self.elems.get(k.index()) + } + + /// Get the element at `k` if it exists, mutable version. + pub fn get_mut(&mut self, k: K) -> Option<&mut V> { + self.elems.get_mut(k.index()) + } + + /// Is this map completely empty? + pub fn is_empty(&self) -> bool { + self.elems.is_empty() + } + + /// Get the total number of entity references created. + pub fn len(&self) -> usize { + self.elems.len() + } + + /// Iterate over all the keys in this map. + pub fn keys(&self) -> Keys { + Keys::with_len(self.elems.len()) + } + + /// Iterate over all the values in this map. + pub fn values(&self) -> slice::Iter { + self.elems.iter() + } + + /// Iterate over all the values in this map, mutable edition. + pub fn values_mut(&mut self) -> slice::IterMut { + self.elems.iter_mut() + } + + /// Iterate over all the keys and values in this map. + pub fn iter(&self) -> Iter { + Iter::new(self.elems.iter()) + } + + /// Iterate over all the keys and values in this map, mutable edition. + pub fn iter_mut(&mut self) -> IterMut { + IterMut::new(self.elems.iter_mut()) + } + + /// Remove all entries from this map. + pub fn clear(&mut self) { + self.elems.clear() + } + + /// Get the key that will be assigned to the next pushed value. + pub fn next_key(&self) -> K { + K::new(self.elems.len()) + } + + /// Append `v` to the mapping, assigning a new key which is returned. + pub fn push(&mut self, v: V) -> K { + let k = self.next_key(); + self.elems.push(v); + k + } + + /// Returns the last element that was inserted in the map. + pub fn last(&self) -> Option<&V> { + self.elems.last() + } + + /// Reserves capacity for at least `additional` more elements to be inserted. + pub fn reserve(&mut self, additional: usize) { + self.elems.reserve(additional) + } + + /// Reserves the minimum capacity for exactly `additional` more elements to be inserted. + pub fn reserve_exact(&mut self, additional: usize) { + self.elems.reserve_exact(additional) + } + + /// Shrinks the capacity of the `PrimaryMap` as much as possible. + pub fn shrink_to_fit(&mut self) { + self.elems.shrink_to_fit() + } + + /// Consumes this `PrimaryMap` and produces a `BoxedSlice`. + pub fn into_boxed_slice(self) -> BoxedSlice { + unsafe { BoxedSlice::::from_raw(Box::<[V]>::into_raw(self.elems.into_boxed_slice())) } + } +} + +/// Immutable indexing into an `PrimaryMap`. +/// The indexed value must be in the map. +impl Index for PrimaryMap +where + K: EntityRef, +{ + type Output = V; + + fn index(&self, k: K) -> &V { + &self.elems[k.index()] + } +} + +/// Mutable indexing into an `PrimaryMap`. +impl IndexMut for PrimaryMap +where + K: EntityRef, +{ + fn index_mut(&mut self, k: K) -> &mut V { + &mut self.elems[k.index()] + } +} + +impl<'a, K, V> IntoIterator for &'a PrimaryMap +where + K: EntityRef, +{ + type Item = (K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + Iter::new(self.elems.iter()) + } +} + +impl<'a, K, V> IntoIterator for &'a mut PrimaryMap +where + K: EntityRef, +{ + type Item = (K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + IterMut::new(self.elems.iter_mut()) + } +} + +impl FromIterator for PrimaryMap +where + K: EntityRef, +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + Self { + elems: Vec::from_iter(iter), + unused: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // `EntityRef` impl for testing. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + struct E(u32); + + impl EntityRef for E { + fn new(i: usize) -> Self { + E(i as u32) + } + fn index(self) -> usize { + self.0 as usize + } + } + + #[test] + fn basic() { + let r0 = E(0); + let r1 = E(1); + let m = PrimaryMap::::new(); + + let v: Vec = m.keys().collect(); + assert_eq!(v, []); + + assert!(!m.is_valid(r0)); + assert!(!m.is_valid(r1)); + } + + #[test] + fn push() { + let mut m = PrimaryMap::new(); + let k0: E = m.push(12); + let k1 = m.push(33); + + assert_eq!(m[k0], 12); + assert_eq!(m[k1], 33); + + let v: Vec = m.keys().collect(); + assert_eq!(v, [k0, k1]); + } + + #[test] + fn iter() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let mut i = 0; + for (key, value) in &m { + assert_eq!(key.index(), i); + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + i += 1; + } + i = 0; + for (key_mut, value_mut) in m.iter_mut() { + assert_eq!(key_mut.index(), i); + match i { + 0 => assert_eq!(*value_mut, 12), + 1 => assert_eq!(*value_mut, 33), + _ => panic!(), + } + i += 1; + } + } + + #[test] + fn iter_rev() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let mut i = 2; + for (key, value) in m.iter().rev() { + i -= 1; + assert_eq!(key.index(), i); + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + } + + i = 2; + for (key, value) in m.iter_mut().rev() { + i -= 1; + assert_eq!(key.index(), i); + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + } + } + #[test] + fn keys() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let mut i = 0; + for key in m.keys() { + assert_eq!(key.index(), i); + i += 1; + } + } + + #[test] + fn keys_rev() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let mut i = 2; + for key in m.keys().rev() { + i -= 1; + assert_eq!(key.index(), i); + } + } + + #[test] + fn values() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let mut i = 0; + for value in m.values() { + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + i += 1; + } + i = 0; + for value_mut in m.values_mut() { + match i { + 0 => assert_eq!(*value_mut, 12), + 1 => assert_eq!(*value_mut, 33), + _ => panic!(), + } + i += 1; + } + } + + #[test] + fn values_rev() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let mut i = 2; + for value in m.values().rev() { + i -= 1; + match i { + 0 => assert_eq!(*value, 12), + 1 => assert_eq!(*value, 33), + _ => panic!(), + } + } + i = 2; + for value_mut in m.values_mut().rev() { + i -= 1; + match i { + 0 => assert_eq!(*value_mut, 12), + 1 => assert_eq!(*value_mut, 33), + _ => panic!(), + } + } + } + + #[test] + fn from_iter() { + let mut m: PrimaryMap = PrimaryMap::new(); + m.push(12); + m.push(33); + + let n = m.values().collect::>(); + assert!(m.len() == n.len()); + for (me, ne) in m.values().zip(n.values()) { + assert!(*me == **ne); + } + } +} diff --git a/cranelift/entity/src/set.rs b/cranelift/entity/src/set.rs new file mode 100644 index 0000000000..ac8b156be2 --- /dev/null +++ b/cranelift/entity/src/set.rs @@ -0,0 +1,246 @@ +//! Densely numbered entity references as set keys. + +use crate::keys::Keys; +use crate::EntityRef; +use alloc::vec::Vec; +use core::marker::PhantomData; + +/// A set of `K` for densely indexed entity references. +/// +/// The `EntitySet` data structure uses the dense index space to implement a set with a bitvector. +/// Like `SecondaryMap`, an `EntitySet` is used to associate secondary information with entities. +#[derive(Debug, Clone)] +pub struct EntitySet +where + K: EntityRef, +{ + elems: Vec, + len: usize, + unused: PhantomData, +} + +/// Shared `EntitySet` implementation for all value types. +impl EntitySet +where + K: EntityRef, +{ + /// Create a new empty set. + pub fn new() -> Self { + Self { + elems: Vec::new(), + len: 0, + unused: PhantomData, + } + } + + /// Creates a new empty set with the specified capacity. + pub fn with_capacity(capacity: usize) -> Self { + Self { + elems: Vec::with_capacity((capacity + 7) / 8), + ..Self::new() + } + } + + /// Get the element at `k` if it exists. + pub fn contains(&self, k: K) -> bool { + let index = k.index(); + if index < self.len { + (self.elems[index / 8] & (1 << (index % 8))) != 0 + } else { + false + } + } + + /// Is this set completely empty? + pub fn is_empty(&self) -> bool { + if self.len != 0 { + false + } else { + self.elems.iter().all(|&e| e == 0) + } + } + + /// Returns the cardinality of the set. More precisely, it returns the number of calls to + /// `insert` with different key values, that have happened since the the set was most recently + /// `clear`ed or created with `new`. + pub fn cardinality(&self) -> usize { + let mut n: usize = 0; + for byte_ix in 0..self.len / 8 { + n += self.elems[byte_ix].count_ones() as usize; + } + for bit_ix in (self.len / 8) * 8..self.len { + if (self.elems[bit_ix / 8] & (1 << (bit_ix % 8))) != 0 { + n += 1; + } + } + n + } + + /// Remove all entries from this set. + pub fn clear(&mut self) { + self.len = 0; + self.elems.clear() + } + + /// Iterate over all the keys in this set. + pub fn keys(&self) -> Keys { + Keys::with_len(self.len) + } + + /// Resize the set to have `n` entries by adding default entries as needed. + pub fn resize(&mut self, n: usize) { + self.elems.resize((n + 7) / 8, 0); + self.len = n + } + + /// Insert the element at `k`. + pub fn insert(&mut self, k: K) -> bool { + let index = k.index(); + if index >= self.len { + self.resize(index + 1) + } + let result = !self.contains(k); + self.elems[index / 8] |= 1 << (index % 8); + result + } + + /// Removes and returns the entity from the set if it exists. + pub fn pop(&mut self) -> Option { + if self.len == 0 { + return None; + } + + // Clear the last known entity in the list. + let last_index = self.len - 1; + self.elems[last_index / 8] &= !(1 << (last_index % 8)); + + // Set the length to the next last stored entity or zero if we pop'ed + // the last entity. + self.len = self + .elems + .iter() + .enumerate() + .rev() + .find(|(_, &byte)| byte != 0) + // Map `i` from byte index to bit level index. + // `(i + 1) * 8` = Last bit in byte. + // `last - byte.leading_zeros()` = last set bit in byte. + // `as usize` won't ever truncate as the potential range is `0..=8`. + .map_or(0, |(i, byte)| ((i + 1) * 8) - byte.leading_zeros() as usize); + + Some(K::new(last_index)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use core::u32; + + // `EntityRef` impl for testing. + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] + struct E(u32); + + impl EntityRef for E { + fn new(i: usize) -> Self { + E(i as u32) + } + fn index(self) -> usize { + self.0 as usize + } + } + + #[test] + fn basic() { + let r0 = E(0); + let r1 = E(1); + let r2 = E(2); + let mut m = EntitySet::new(); + + let v: Vec = m.keys().collect(); + assert_eq!(v, []); + assert!(m.is_empty()); + + m.insert(r2); + m.insert(r1); + + assert!(!m.contains(r0)); + assert!(m.contains(r1)); + assert!(m.contains(r2)); + assert!(!m.contains(E(3))); + assert!(!m.is_empty()); + + let v: Vec = m.keys().collect(); + assert_eq!(v, [r0, r1, r2]); + + m.resize(20); + assert!(!m.contains(E(3))); + assert!(!m.contains(E(4))); + assert!(!m.contains(E(8))); + assert!(!m.contains(E(15))); + assert!(!m.contains(E(19))); + + m.insert(E(8)); + m.insert(E(15)); + assert!(!m.contains(E(3))); + assert!(!m.contains(E(4))); + assert!(m.contains(E(8))); + assert!(!m.contains(E(9))); + assert!(!m.contains(E(14))); + assert!(m.contains(E(15))); + assert!(!m.contains(E(16))); + assert!(!m.contains(E(19))); + assert!(!m.contains(E(20))); + assert!(!m.contains(E(u32::MAX))); + + m.clear(); + assert!(m.is_empty()); + } + + #[test] + fn pop_ordered() { + let r0 = E(0); + let r1 = E(1); + let r2 = E(2); + let mut m = EntitySet::new(); + m.insert(r0); + m.insert(r1); + m.insert(r2); + + assert_eq!(r2, m.pop().unwrap()); + assert_eq!(r1, m.pop().unwrap()); + assert_eq!(r0, m.pop().unwrap()); + assert!(m.pop().is_none()); + assert!(m.pop().is_none()); + } + + #[test] + fn pop_unordered() { + let mut blocks = [ + E(0), + E(1), + E(6), + E(7), + E(5), + E(9), + E(10), + E(2), + E(3), + E(11), + E(12), + ]; + + let mut m = EntitySet::new(); + for &block in &blocks { + m.insert(block); + } + assert_eq!(m.len, 13); + blocks.sort(); + + for &block in blocks.iter().rev() { + assert_eq!(block, m.pop().unwrap()); + } + + assert!(m.is_empty()); + } +} diff --git a/cranelift/entity/src/sparse.rs b/cranelift/entity/src/sparse.rs new file mode 100644 index 0000000000..57d971b281 --- /dev/null +++ b/cranelift/entity/src/sparse.rs @@ -0,0 +1,364 @@ +//! Sparse mapping of entity references to larger value types. +//! +//! This module provides a `SparseMap` data structure which implements a sparse mapping from an +//! `EntityRef` key to a value type that may be on the larger side. This implementation is based on +//! the paper: +//! +//! > Briggs, Torczon, *An efficient representation for sparse sets*, +//! ACM Letters on Programming Languages and Systems, Volume 2, Issue 1-4, March-Dec. 1993. + +use crate::map::SecondaryMap; +use crate::EntityRef; +use alloc::vec::Vec; +use core::mem; +use core::slice; +use core::u32; + +/// Trait for extracting keys from values stored in a `SparseMap`. +/// +/// All values stored in a `SparseMap` must keep track of their own key in the map and implement +/// this trait to provide access to the key. +pub trait SparseMapValue { + /// Get the key of this sparse map value. This key is not allowed to change while the value + /// is a member of the map. + fn key(&self) -> K; +} + +/// A sparse mapping of entity references. +/// +/// A `SparseMap` map provides: +/// +/// - Memory usage equivalent to `SecondaryMap` + `Vec`, so much smaller than +/// `SecondaryMap` for sparse mappings of larger `V` types. +/// - Constant time lookup, slightly slower than `SecondaryMap`. +/// - A very fast, constant time `clear()` operation. +/// - Fast insert and erase operations. +/// - Stable iteration that is as fast as a `Vec`. +/// +/// # Compared to `SecondaryMap` +/// +/// When should we use a `SparseMap` instead of a secondary `SecondaryMap`? First of all, +/// `SparseMap` does not provide the functionality of a `PrimaryMap` which can allocate and assign +/// entity references to objects as they are pushed onto the map. It is only the secondary entity +/// maps that can be replaced with a `SparseMap`. +/// +/// - A secondary entity map assigns a default mapping to all keys. It doesn't distinguish between +/// an unmapped key and one that maps to the default value. `SparseMap` does not require +/// `Default` values, and it tracks accurately if a key has been mapped or not. +/// - Iterating over the contents of an `SecondaryMap` is linear in the size of the *key space*, +/// while iterating over a `SparseMap` is linear in the number of elements in the mapping. This +/// is an advantage precisely when the mapping is sparse. +/// - `SparseMap::clear()` is constant time and super-fast. `SecondaryMap::clear()` is linear in +/// the size of the key space. (Or, rather the required `resize()` call following the `clear()` +/// is). +/// - `SparseMap` requires the values to implement `SparseMapValue` which means that they must +/// contain their own key. +pub struct SparseMap +where + K: EntityRef, + V: SparseMapValue, +{ + sparse: SecondaryMap, + dense: Vec, +} + +impl SparseMap +where + K: EntityRef, + V: SparseMapValue, +{ + /// Create a new empty mapping. + pub fn new() -> Self { + Self { + sparse: SecondaryMap::new(), + dense: Vec::new(), + } + } + + /// Returns the number of elements in the map. + pub fn len(&self) -> usize { + self.dense.len() + } + + /// Returns true is the map contains no elements. + pub fn is_empty(&self) -> bool { + self.dense.is_empty() + } + + /// Remove all elements from the mapping. + pub fn clear(&mut self) { + self.dense.clear(); + } + + /// Returns a reference to the value corresponding to the key. + pub fn get(&self, key: K) -> Option<&V> { + if let Some(idx) = self.sparse.get(key).cloned() { + if let Some(entry) = self.dense.get(idx as usize) { + if entry.key() == key { + return Some(entry); + } + } + } + None + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// Note that the returned value must not be mutated in a way that would change its key. This + /// would invalidate the sparse set data structure. + pub fn get_mut(&mut self, key: K) -> Option<&mut V> { + if let Some(idx) = self.sparse.get(key).cloned() { + if let Some(entry) = self.dense.get_mut(idx as usize) { + if entry.key() == key { + return Some(entry); + } + } + } + None + } + + /// Return the index into `dense` of the value corresponding to `key`. + fn index(&self, key: K) -> Option { + if let Some(idx) = self.sparse.get(key).cloned() { + let idx = idx as usize; + if let Some(entry) = self.dense.get(idx) { + if entry.key() == key { + return Some(idx); + } + } + } + None + } + + /// Return `true` if the map contains a value corresponding to `key`. + pub fn contains_key(&self, key: K) -> bool { + self.get(key).is_some() + } + + /// Insert a value into the map. + /// + /// If the map did not have this key present, `None` is returned. + /// + /// If the map did have this key present, the value is updated, and the old value is returned. + /// + /// It is not necessary to provide a key since the value knows its own key already. + pub fn insert(&mut self, value: V) -> Option { + let key = value.key(); + + // Replace the existing entry for `key` if there is one. + if let Some(entry) = self.get_mut(key) { + return Some(mem::replace(entry, value)); + } + + // There was no previous entry for `key`. Add it to the end of `dense`. + let idx = self.dense.len(); + debug_assert!(idx <= u32::MAX as usize, "SparseMap overflow"); + self.dense.push(value); + self.sparse[key] = idx as u32; + None + } + + /// Remove a value from the map and return it. + pub fn remove(&mut self, key: K) -> Option { + if let Some(idx) = self.index(key) { + let back = self.dense.pop().unwrap(); + + // Are we popping the back of `dense`? + if idx == self.dense.len() { + return Some(back); + } + + // We're removing an element from the middle of `dense`. + // Replace the element at `idx` with the back of `dense`. + // Repair `sparse` first. + self.sparse[back.key()] = idx as u32; + return Some(mem::replace(&mut self.dense[idx], back)); + } + + // Nothing to remove. + None + } + + /// Remove the last value from the map. + pub fn pop(&mut self) -> Option { + self.dense.pop() + } + + /// Get an iterator over the values in the map. + /// + /// The iteration order is entirely determined by the preceding sequence of `insert` and + /// `remove` operations. In particular, if no elements were removed, this is the insertion + /// order. + pub fn values(&self) -> slice::Iter { + self.dense.iter() + } + + /// Get the values as a slice. + pub fn as_slice(&self) -> &[V] { + self.dense.as_slice() + } +} + +/// Iterating over the elements of a set. +impl<'a, K, V> IntoIterator for &'a SparseMap +where + K: EntityRef, + V: SparseMapValue, +{ + type Item = &'a V; + type IntoIter = slice::Iter<'a, V>; + + fn into_iter(self) -> Self::IntoIter { + self.values() + } +} + +/// Any `EntityRef` can be used as a sparse map value representing itself. +impl SparseMapValue for T +where + T: EntityRef, +{ + fn key(&self) -> Self { + *self + } +} + +/// A sparse set of entity references. +/// +/// Any type that implements `EntityRef` can be used as a sparse set value too. +pub type SparseSet = SparseMap; + +#[cfg(test)] +mod tests { + use super::*; + use crate::EntityRef; + + /// An opaque reference to an instruction in a function. + #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] + pub struct Inst(u32); + entity_impl!(Inst, "inst"); + + // Mock key-value object for testing. + #[derive(PartialEq, Eq, Debug)] + struct Obj(Inst, &'static str); + + impl SparseMapValue for Obj { + fn key(&self) -> Inst { + self.0 + } + } + + #[test] + fn empty_immutable_map() { + let i1 = Inst::new(1); + let map: SparseMap = SparseMap::new(); + + assert!(map.is_empty()); + assert_eq!(map.len(), 0); + assert_eq!(map.get(i1), None); + assert_eq!(map.values().count(), 0); + } + + #[test] + fn single_entry() { + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let mut map = SparseMap::new(); + + assert!(map.is_empty()); + assert_eq!(map.len(), 0); + assert_eq!(map.get(i1), None); + assert_eq!(map.get_mut(i1), None); + assert_eq!(map.remove(i1), None); + + assert_eq!(map.insert(Obj(i1, "hi")), None); + assert!(!map.is_empty()); + assert_eq!(map.len(), 1); + assert_eq!(map.get(i0), None); + assert_eq!(map.get(i1), Some(&Obj(i1, "hi"))); + assert_eq!(map.get(i2), None); + assert_eq!(map.get_mut(i0), None); + assert_eq!(map.get_mut(i1), Some(&mut Obj(i1, "hi"))); + assert_eq!(map.get_mut(i2), None); + + assert_eq!(map.remove(i0), None); + assert_eq!(map.remove(i2), None); + assert_eq!(map.remove(i1), Some(Obj(i1, "hi"))); + assert_eq!(map.len(), 0); + assert_eq!(map.get(i1), None); + assert_eq!(map.get_mut(i1), None); + assert_eq!(map.remove(i0), None); + assert_eq!(map.remove(i1), None); + assert_eq!(map.remove(i2), None); + } + + #[test] + fn multiple_entries() { + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let i2 = Inst::new(2); + let i3 = Inst::new(3); + let mut map = SparseMap::new(); + + assert_eq!(map.insert(Obj(i2, "foo")), None); + assert_eq!(map.insert(Obj(i1, "bar")), None); + assert_eq!(map.insert(Obj(i0, "baz")), None); + + // Iteration order = insertion order when nothing has been removed yet. + assert_eq!( + map.values().map(|obj| obj.1).collect::>(), + ["foo", "bar", "baz"] + ); + + assert_eq!(map.len(), 3); + assert_eq!(map.get(i0), Some(&Obj(i0, "baz"))); + assert_eq!(map.get(i1), Some(&Obj(i1, "bar"))); + assert_eq!(map.get(i2), Some(&Obj(i2, "foo"))); + assert_eq!(map.get(i3), None); + + // Remove front object, causing back to be swapped down. + assert_eq!(map.remove(i1), Some(Obj(i1, "bar"))); + assert_eq!(map.len(), 2); + assert_eq!(map.get(i0), Some(&Obj(i0, "baz"))); + assert_eq!(map.get(i1), None); + assert_eq!(map.get(i2), Some(&Obj(i2, "foo"))); + assert_eq!(map.get(i3), None); + + // Reinsert something at a previously used key. + assert_eq!(map.insert(Obj(i1, "barbar")), None); + assert_eq!(map.len(), 3); + assert_eq!(map.get(i0), Some(&Obj(i0, "baz"))); + assert_eq!(map.get(i1), Some(&Obj(i1, "barbar"))); + assert_eq!(map.get(i2), Some(&Obj(i2, "foo"))); + assert_eq!(map.get(i3), None); + + // Replace an entry. + assert_eq!(map.insert(Obj(i0, "bazbaz")), Some(Obj(i0, "baz"))); + assert_eq!(map.len(), 3); + assert_eq!(map.get(i0), Some(&Obj(i0, "bazbaz"))); + assert_eq!(map.get(i1), Some(&Obj(i1, "barbar"))); + assert_eq!(map.get(i2), Some(&Obj(i2, "foo"))); + assert_eq!(map.get(i3), None); + + // Check the reference `IntoIter` impl. + let mut v = Vec::new(); + for i in &map { + v.push(i.1); + } + assert_eq!(v.len(), map.len()); + } + + #[test] + fn entity_set() { + let i0 = Inst::new(0); + let i1 = Inst::new(1); + let mut set = SparseSet::new(); + + assert_eq!(set.insert(i0), None); + assert_eq!(set.insert(i0), Some(i0)); + assert_eq!(set.insert(i1), None); + assert_eq!(set.get(i0), Some(&i0)); + assert_eq!(set.get(i1), Some(&i1)); + } +} diff --git a/cranelift/faerie/Cargo.toml b/cranelift/faerie/Cargo.toml new file mode 100644 index 0000000000..63d26264e3 --- /dev/null +++ b/cranelift/faerie/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "cranelift-faerie" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "Emit Cranelift output to native object files with Faerie" +repository = "https://github.com/bytecodealliance/cranelift" +documentation = "https://cranelift.readthedocs.io/" +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-module = { path = "../module", version = "0.59.0" } +faerie = "0.14.0" +goblin = "0.1.0" +anyhow = "1.0" +target-lexicon = "0.10" + +[dependencies.cranelift-codegen] +path = "../codegen" +version = "0.59.0" +default-features = false +features = ["std"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/faerie/LICENSE b/cranelift/faerie/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/faerie/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/faerie/README.md b/cranelift/faerie/README.md new file mode 100644 index 0000000000..666d2db59d --- /dev/null +++ b/cranelift/faerie/README.md @@ -0,0 +1,4 @@ +This crate contains a library that enables +[Cranelift](https://crates.io/crates/cranelift) +to emit native object (".o") files, using the +[Faerie](https://crates.io/crates/faerie) library. diff --git a/cranelift/faerie/src/backend.rs b/cranelift/faerie/src/backend.rs new file mode 100644 index 0000000000..40cca8eddc --- /dev/null +++ b/cranelift/faerie/src/backend.rs @@ -0,0 +1,503 @@ +//! Defines `FaerieBackend`. + +use crate::container; +use crate::traps::{FaerieTrapManifest, FaerieTrapSink}; +use anyhow::Error; +use cranelift_codegen::binemit::{ + Addend, CodeOffset, NullStackmapSink, NullTrapSink, Reloc, RelocSink, Stackmap, StackmapSink, +}; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::{self, binemit, ir}; +use cranelift_module::{ + Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleError, + ModuleNamespace, ModuleResult, TrapSite, +}; +use faerie; +use std::convert::TryInto; +use std::fs::File; +use target_lexicon::Triple; + +#[derive(Debug)] +/// Setting to enable collection of traps. Setting this to `Enabled` in +/// `FaerieBuilder` means that a `FaerieTrapManifest` will be present +/// in the `FaerieProduct`. +pub enum FaerieTrapCollection { + /// `FaerieProduct::trap_manifest` will be `None` + Disabled, + /// `FaerieProduct::trap_manifest` will be `Some` + Enabled, +} + +/// A builder for `FaerieBackend`. +pub struct FaerieBuilder { + isa: Box, + name: String, + collect_traps: FaerieTrapCollection, + libcall_names: Box String>, +} + +impl FaerieBuilder { + /// Create a new `FaerieBuilder` using the given Cranelift target, that + /// can be passed to + /// [`Module::new`](cranelift_module::Module::new) + /// + /// Faerie output requires that TargetIsa have PIC (Position Independent Code) enabled. + /// + /// `collect_traps` setting determines whether trap information is collected in a + /// `FaerieTrapManifest` available in the `FaerieProduct`. + /// + /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` + /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain + /// floating point instructions, and for stack probes. If you don't know what to use for this + /// argument, use `cranelift_module::default_libcall_names()`. + pub fn new( + isa: Box, + name: String, + collect_traps: FaerieTrapCollection, + libcall_names: Box String>, + ) -> ModuleResult { + if !isa.flags().is_pic() { + return Err(ModuleError::Backend( + "faerie requires TargetIsa be PIC".to_owned(), + )); + } + Ok(Self { + isa, + name, + collect_traps, + libcall_names, + }) + } +} + +/// A `FaerieBackend` implements `Backend` and emits ".o" files using the `faerie` library. +/// +/// See the `FaerieBuilder` for a convenient way to construct `FaerieBackend` instances. +pub struct FaerieBackend { + isa: Box, + artifact: faerie::Artifact, + trap_manifest: Option, + libcall_names: Box String>, +} + +pub struct FaerieCompiledFunction { + code_length: u32, +} + +impl FaerieCompiledFunction { + pub fn code_length(&self) -> u32 { + self.code_length + } +} + +pub struct FaerieCompiledData {} + +impl Backend for FaerieBackend { + type Builder = FaerieBuilder; + + type CompiledFunction = FaerieCompiledFunction; + type CompiledData = FaerieCompiledData; + + // There's no need to return individual artifacts; we're writing them into + // the output file instead. + type FinalizedFunction = (); + type FinalizedData = (); + + /// The returned value here provides functions for emitting object files + /// to memory and files. + type Product = FaerieProduct; + + /// Create a new `FaerieBackend` using the given Cranelift target. + fn new(builder: FaerieBuilder) -> Self { + Self { + artifact: faerie::Artifact::new(builder.isa.triple().clone(), builder.name), + isa: builder.isa, + trap_manifest: match builder.collect_traps { + FaerieTrapCollection::Enabled => Some(FaerieTrapManifest::new()), + FaerieTrapCollection::Disabled => None, + }, + libcall_names: builder.libcall_names, + } + } + + fn isa(&self) -> &dyn TargetIsa { + &*self.isa + } + + fn declare_function(&mut self, _id: FuncId, name: &str, linkage: Linkage) { + self.artifact + .declare(name, translate_function_linkage(linkage)) + .expect("inconsistent declarations"); + } + + fn declare_data( + &mut self, + _id: DataId, + name: &str, + linkage: Linkage, + writable: bool, + tls: bool, + align: Option, + ) { + assert!(!tls, "Faerie doesn't yet support TLS"); + self.artifact + .declare(name, translate_data_linkage(linkage, writable, align)) + .expect("inconsistent declarations"); + } + + fn define_function( + &mut self, + _id: FuncId, + name: &str, + ctx: &cranelift_codegen::Context, + namespace: &ModuleNamespace, + total_size: u32, + ) -> ModuleResult { + let mut code: Vec = vec![0; total_size as usize]; + // TODO: Replace this with FaerieStackmapSink once it is implemented. + let mut stackmap_sink = NullStackmapSink {}; + + // Non-lexical lifetimes would obviate the braces here. + { + let mut reloc_sink = FaerieRelocSink { + triple: self.isa.triple().clone(), + artifact: &mut self.artifact, + name, + namespace, + libcall_names: &*self.libcall_names, + }; + + if let Some(ref mut trap_manifest) = self.trap_manifest { + let mut trap_sink = FaerieTrapSink::new(name, total_size); + unsafe { + ctx.emit_to_memory( + &*self.isa, + code.as_mut_ptr(), + &mut reloc_sink, + &mut trap_sink, + &mut stackmap_sink, + ) + }; + trap_manifest.add_sink(trap_sink); + } else { + let mut trap_sink = NullTrapSink {}; + unsafe { + ctx.emit_to_memory( + &*self.isa, + code.as_mut_ptr(), + &mut reloc_sink, + &mut trap_sink, + &mut stackmap_sink, + ) + }; + } + } + + // because `define` will take ownership of code, this is our last chance + let code_length = code.len() as u32; + + self.artifact + .define(name, code) + .expect("inconsistent declaration"); + + Ok(FaerieCompiledFunction { code_length }) + } + + fn define_function_bytes( + &mut self, + _id: FuncId, + name: &str, + bytes: &[u8], + _namespace: &ModuleNamespace, + traps: Vec, + ) -> ModuleResult { + let code_length: u32 = match bytes.len().try_into() { + Ok(code_length) => code_length, + _ => Err(ModuleError::FunctionTooLarge(name.to_string()))?, + }; + + if let Some(ref mut trap_manifest) = self.trap_manifest { + let trap_sink = FaerieTrapSink::new_with_sites(name, code_length, traps); + trap_manifest.add_sink(trap_sink); + } + + self.artifact + .define(name, bytes.to_vec()) + .expect("inconsistent declaration"); + + Ok(FaerieCompiledFunction { code_length }) + } + + fn define_data( + &mut self, + _id: DataId, + name: &str, + _writable: bool, + tls: bool, + _align: Option, + data_ctx: &DataContext, + namespace: &ModuleNamespace, + ) -> ModuleResult { + assert!(!tls, "Faerie doesn't yet support TLS"); + let &DataDescription { + ref init, + ref function_decls, + ref data_decls, + ref function_relocs, + ref data_relocs, + } = data_ctx.description(); + + for &(offset, id) in function_relocs { + let to = &namespace.get_function_decl(&function_decls[id]).name; + self.artifact + .link(faerie::Link { + from: name, + to, + at: u64::from(offset), + }) + .map_err(|e| ModuleError::Backend(e.to_string()))?; + } + for &(offset, id, addend) in data_relocs { + debug_assert_eq!( + addend, 0, + "faerie doesn't support addends in data section relocations yet" + ); + let to = &namespace.get_data_decl(&data_decls[id]).name; + self.artifact + .link(faerie::Link { + from: name, + to, + at: u64::from(offset), + }) + .map_err(|e| ModuleError::Backend(e.to_string()))?; + } + + match *init { + Init::Uninitialized => { + panic!("data is not initialized yet"); + } + Init::Zeros { size } => { + self.artifact + .define_zero_init(name, size) + .expect("inconsistent declaration"); + } + Init::Bytes { ref contents } => { + self.artifact + .define(name, contents.to_vec()) + .expect("inconsistent declaration"); + } + } + + Ok(FaerieCompiledData {}) + } + + fn write_data_funcaddr( + &mut self, + _data: &mut FaerieCompiledData, + _offset: usize, + _what: ir::FuncRef, + ) { + unimplemented!() + } + + fn write_data_dataaddr( + &mut self, + _data: &mut FaerieCompiledData, + _offset: usize, + _what: ir::GlobalValue, + _usize: binemit::Addend, + ) { + unimplemented!() + } + + fn finalize_function( + &mut self, + _id: FuncId, + _func: &FaerieCompiledFunction, + _namespace: &ModuleNamespace, + ) { + // Nothing to do. + } + + fn get_finalized_function(&self, _func: &FaerieCompiledFunction) { + // Nothing to do. + } + + fn finalize_data( + &mut self, + _id: DataId, + _data: &FaerieCompiledData, + _namespace: &ModuleNamespace, + ) { + // Nothing to do. + } + + fn get_finalized_data(&self, _data: &FaerieCompiledData) { + // Nothing to do. + } + + fn publish(&mut self) { + // Nothing to do. + } + + fn finish(self, _namespace: &ModuleNamespace) -> FaerieProduct { + FaerieProduct { + artifact: self.artifact, + trap_manifest: self.trap_manifest, + } + } +} + +/// This is the output of `Module`'s +/// [`finish`](../cranelift_module/struct.Module.html#method.finish) function. +/// It provides functions for writing out the object file to memory or a file. +#[derive(Debug)] +pub struct FaerieProduct { + /// Faerie artifact with all functions, data, and links from the module defined + pub artifact: faerie::Artifact, + /// Optional trap manifest. Contains `FaerieTrapManifest` when `FaerieBuilder.collect_traps` is + /// set to `FaerieTrapCollection::Enabled`. + pub trap_manifest: Option, +} + +impl FaerieProduct { + /// Return the name of the output file. This is the name passed into `new`. + pub fn name(&self) -> &str { + &self.artifact.name + } + + /// Call `emit` on the faerie `Artifact`, producing bytes in memory. + pub fn emit(&self) -> Result, Error> { + self.artifact.emit() + } + + /// Call `write` on the faerie `Artifact`, writing to a file. + pub fn write(&self, sink: File) -> Result<(), Error> { + self.artifact.write(sink) + } +} + +fn translate_function_linkage(linkage: Linkage) -> faerie::Decl { + match linkage { + Linkage::Import => faerie::Decl::function_import().into(), + Linkage::Local => faerie::Decl::function().into(), + Linkage::Export => faerie::Decl::function().global().into(), + Linkage::Preemptible => faerie::Decl::function().weak().into(), + } +} + +fn translate_data_linkage(linkage: Linkage, writable: bool, align: Option) -> faerie::Decl { + let align = align.map(u64::from); + match linkage { + Linkage::Import => faerie::Decl::data_import().into(), + Linkage::Local => faerie::Decl::data() + .with_writable(writable) + .with_align(align) + .into(), + Linkage::Export => faerie::Decl::data() + .global() + .with_writable(writable) + .with_align(align) + .into(), + Linkage::Preemptible => faerie::Decl::data() + .weak() + .with_writable(writable) + .with_align(align) + .into(), + } +} + +struct FaerieRelocSink<'a> { + triple: Triple, + artifact: &'a mut faerie::Artifact, + name: &'a str, + namespace: &'a ModuleNamespace<'a, FaerieBackend>, + libcall_names: &'a dyn Fn(ir::LibCall) -> String, +} + +impl<'a> RelocSink for FaerieRelocSink<'a> { + fn reloc_block(&mut self, _offset: CodeOffset, _reloc: Reloc, _block_offset: CodeOffset) { + unimplemented!(); + } + + fn reloc_external( + &mut self, + offset: CodeOffset, + reloc: Reloc, + name: &ir::ExternalName, + addend: Addend, + ) { + let ref_name: String = match *name { + ir::ExternalName::User { .. } => { + if self.namespace.is_function(name) { + self.namespace.get_function_decl(name).name.clone() + } else { + self.namespace.get_data_decl(name).name.clone() + } + } + ir::ExternalName::LibCall(ref libcall) => { + let sym = (self.libcall_names)(*libcall); + self.artifact + .declare(sym.clone(), faerie::Decl::function_import()) + .expect("faerie declaration of libcall"); + sym + } + _ => panic!("invalid ExternalName {}", name), + }; + let (raw_reloc, raw_addend) = container::raw_relocation(reloc, &self.triple); + // TODO: Handle overflow. + let final_addend = addend + raw_addend; + let addend_i32 = final_addend as i32; + debug_assert!(i64::from(addend_i32) == final_addend); + self.artifact + .link_with( + faerie::Link { + from: self.name, + to: &ref_name, + at: u64::from(offset), + }, + faerie::Reloc::Raw { + reloc: raw_reloc, + addend: addend_i32, + }, + ) + .expect("faerie relocation error"); + } + + fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } + + fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } +} + +#[allow(dead_code)] +struct FaerieStackmapSink<'a> { + artifact: &'a mut faerie::Artifact, + namespace: &'a ModuleNamespace<'a, FaerieBackend>, +} + +/// Faerie is currently not used in SpiderMonkey. Methods are unimplemented. +impl<'a> StackmapSink for FaerieStackmapSink<'a> { + fn add_stackmap(&mut self, _: CodeOffset, _: Stackmap) { + unimplemented!("faerie support for stackmaps"); + } +} diff --git a/cranelift/faerie/src/container.rs b/cranelift/faerie/src/container.rs new file mode 100644 index 0000000000..51355f4435 --- /dev/null +++ b/cranelift/faerie/src/container.rs @@ -0,0 +1,65 @@ +//! Utilities for working with Faerie container formats. + +use cranelift_codegen::binemit::Reloc; +use target_lexicon::{Architecture, BinaryFormat, Triple}; + +/// An object file format. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum Format { + /// The ELF object file format. + ELF, + /// The Mach-O object file format. + MachO, +} + +/// Translate from a Cranelift `Reloc` to a raw object-file-format-specific +/// relocation code and relocation-implied addend. +pub fn raw_relocation(reloc: Reloc, triple: &Triple) -> (u32, i64) { + match triple.binary_format { + BinaryFormat::Elf => { + use goblin::elf; + ( + match triple.architecture { + Architecture::X86_64 => { + match reloc { + Reloc::Abs4 => elf::reloc::R_X86_64_32, + Reloc::Abs8 => elf::reloc::R_X86_64_64, + Reloc::X86PCRel4 | Reloc::X86CallPCRel4 => elf::reloc::R_X86_64_PC32, + // TODO: Get Cranelift to tell us when we can use + // R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX. + Reloc::X86CallPLTRel4 => elf::reloc::R_X86_64_PLT32, + Reloc::X86GOTPCRel4 => elf::reloc::R_X86_64_GOTPCREL, + _ => unimplemented!(), + } + } + _ => unimplemented!("unsupported architecture: {}", triple), + }, + // Most ELF relocations do not include an implicit addend. + 0, + ) + } + BinaryFormat::Macho => { + use goblin::mach; + match triple.architecture { + Architecture::X86_64 => { + match reloc { + Reloc::Abs8 => (u32::from(mach::relocation::R_ABS), 0), + // Mach-O doesn't need us to distinguish between PC-relative calls + // and PLT calls, but it does need us to distinguish between calls + // and non-calls. And, it includes the 4-byte addend implicitly. + Reloc::X86PCRel4 => (u32::from(mach::relocation::X86_64_RELOC_SIGNED), 4), + Reloc::X86CallPCRel4 | Reloc::X86CallPLTRel4 => { + (u32::from(mach::relocation::X86_64_RELOC_BRANCH), 4) + } + Reloc::X86GOTPCRel4 => { + (u32::from(mach::relocation::X86_64_RELOC_GOT_LOAD), 4) + } + _ => unimplemented!("unsupported mach-o reloc: {}", reloc), + } + } + _ => unimplemented!("unsupported architecture: {}", triple), + } + } + _ => unimplemented!("unsupported format"), + } +} diff --git a/cranelift/faerie/src/lib.rs b/cranelift/faerie/src/lib.rs new file mode 100644 index 0000000000..a6098ff8f7 --- /dev/null +++ b/cranelift/faerie/src/lib.rs @@ -0,0 +1,36 @@ +//! Top-level lib.rs for `cranelift_faerie`. +//! +//! Users of this module should not have to depend on faerie directly. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +mod backend; +mod container; +pub mod traps; + +pub use crate::backend::{FaerieBackend, FaerieBuilder, FaerieProduct, FaerieTrapCollection}; +pub use crate::container::Format; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/faerie/src/traps.rs b/cranelift/faerie/src/traps.rs new file mode 100644 index 0000000000..b84f171955 --- /dev/null +++ b/cranelift/faerie/src/traps.rs @@ -0,0 +1,65 @@ +//! Faerie trap manifests record every `TrapCode` that cranelift outputs during code generation, +//! for every function in the module. This data may be useful at runtime. + +use cranelift_codegen::{binemit, ir}; +use cranelift_module::TrapSite; + +/// Record of the trap sites for a given function +#[derive(Debug)] +pub struct FaerieTrapSink { + /// Name of function + pub name: String, + /// Total code size of function + pub code_size: u32, + /// All trap sites collected in function + pub sites: Vec, +} + +impl FaerieTrapSink { + /// Create an empty `FaerieTrapSink` + pub fn new(name: &str, code_size: u32) -> Self { + Self { + sites: Vec::new(), + name: name.to_owned(), + code_size, + } + } + + /// Create a `FaerieTrapSink` pre-populated with `traps` + pub fn new_with_sites(name: &str, code_size: u32, traps: Vec) -> Self { + Self { + sites: traps, + name: name.to_owned(), + code_size, + } + } +} + +impl binemit::TrapSink for FaerieTrapSink { + fn trap(&mut self, offset: binemit::CodeOffset, srcloc: ir::SourceLoc, code: ir::TrapCode) { + self.sites.push(TrapSite { + offset, + srcloc, + code, + }); + } +} + +/// Collection of all `FaerieTrapSink`s for the module +#[derive(Debug)] +pub struct FaerieTrapManifest { + /// All `FaerieTrapSink` for the module + pub sinks: Vec, +} + +impl FaerieTrapManifest { + /// Create an empty `FaerieTrapManifest` + pub fn new() -> Self { + Self { sinks: Vec::new() } + } + + /// Put a `FaerieTrapSink` into manifest + pub fn add_sink(&mut self, sink: FaerieTrapSink) { + self.sinks.push(sink); + } +} diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml new file mode 100644 index 0000000000..1ed5461341 --- /dev/null +++ b/cranelift/filetests/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "cranelift-filetests" +authors = ["The Cranelift Project Developers"] +version = "0.59.0" +description = "Test driver and implementations of the filetest commands" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/en/latest/testing.html#file-tests" +repository = "https://github.com/bytecodealliance/cranelift" +publish = false +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0", features = ["testing_hooks"] } +cranelift-native = { path = "../native", version = "0.59.0" } +cranelift-reader = { path = "../reader", version = "0.59.0" } +cranelift-preopt = { path = "../preopt", version = "0.59.0" } +file-per-thread-logger = "0.1.2" +filecheck = "0.4.0" +gimli = { version = "0.20.0", default-features = false, features = ["read"] } +log = "0.4.6" +memmap = "0.7.0" +num_cpus = "1.8.0" +region = "2.1.2" +byteorder = { version = "1.3.2", default-features = false } diff --git a/cranelift/filetests/LICENSE b/cranelift/filetests/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/filetests/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/filetests/filetests/cfg/loop.clif b/cranelift/filetests/filetests/cfg/loop.clif new file mode 100644 index 0000000000..a18de9dc31 --- /dev/null +++ b/cranelift/filetests/filetests/cfg/loop.clif @@ -0,0 +1,56 @@ +; For testing cfg generation. This code is nonsense. +test print-cfg +test verifier + +function %nonsense(i32, i32) -> f32 { +; regex: I=\binst\d+\b +; check: digraph "%nonsense" { +; check: block0 [shape=record, label="{block0(v1: i32, v2: i32): +; check: | <$(BRZ=$I)>brz v2, block2 +; nextln: | <$(JUMP0=$I)>jump block3 +; nextln: }"] +; nextln: block3 [shape=record, label="{block3: +; check: | <$(JUMP3=$I)>jump block1(v4) +; nextln: }"] +; nextln: block1 [shape=record, label="{block1(v5: i32): +; check: | <$(BRNZ1=$I)>brnz v13, block1(v12) +; nextln: | <$(JUMP1=$I)>jump block4 +; nextln: }"] +; nextln: block4 [shape=record, label="{block4: +; check: | <$I>return v17 +; nextln: }"] +; nextln: block2 [shape=record, label="{block2: +; check: | <$I>return v100 +; check:}"] +block0(v1: i32, v2: i32): + v3 = f64const 0x0.0 + brz v2, block2 ; unordered: block0:$BRZ -> block2 + jump block3 ; unordered: block0:$JUMP0 -> block3 + +block3: + v4 = iconst.i32 0 + jump block1(v4) ; unordered: block3:$JUMP3 -> block1 + +block1(v5: i32): + v6 = imul_imm v5, 4 + v7 = iadd v1, v6 + v8 = f32const 0.0 + v9 = f32const 0.0 + v10 = f32const 0.0 + v11 = fadd v9, v10 + v12 = iadd_imm v5, 1 + v13 = icmp ult v12, v2 + brnz v13, block1(v12) ; unordered: block1:$BRNZ1 -> block1 + jump block4 ; unordered: block1:$JUMP1 -> block4 + +block4: + v14 = f64const 0.0 + v15 = f64const 0.0 + v16 = fdiv v14, v15 + v17 = f32const 0.0 + return v17 + +block2: + v100 = f32const 0.0 + return v100 +} diff --git a/cranelift/filetests/filetests/cfg/traps_early.clif b/cranelift/filetests/filetests/cfg/traps_early.clif new file mode 100644 index 0000000000..33de056e4c --- /dev/null +++ b/cranelift/filetests/filetests/cfg/traps_early.clif @@ -0,0 +1,21 @@ +; For testing cfg generation. This code explores the implications of encountering +; a terminating instruction before any connections have been made. +test print-cfg +test verifier + +function %nonsense(i32) { +; check: digraph "%nonsense" { + +block0(v1: i32): + trap user0 ; error: terminator instruction was encountered before the end + brnz v1, block2 ; unordered: block0:inst1 -> block2 + jump block1 ; unordered: block0:inst2 -> block1 + +block1: + v2 = iconst.i32 0 + v3 = iadd v1, v3 + jump block0(v3) ; unordered: block1:inst5 -> block0 + +block2: + return v1 +} diff --git a/cranelift/filetests/filetests/cfg/unused_node.clif b/cranelift/filetests/filetests/cfg/unused_node.clif new file mode 100644 index 0000000000..41f98073fd --- /dev/null +++ b/cranelift/filetests/filetests/cfg/unused_node.clif @@ -0,0 +1,27 @@ +; For testing cfg generation where some block is never reached. +test print-cfg + +function %not_reached(i32) -> i32 { +; check: digraph "%not_reached" { +; check: block0 [shape=record, label="{block0(v0: i32): +; check: | brnz v0, block2 +; check: | trap user0 +; check: }"] +; check: block1 [shape=record, label="{block1: +; check: | jump block0(v2) +; check: }"] +; check: block2 [shape=record, label="{block2: +; check: | return v0 +; check: }"] +block0(v0: i32): + brnz v0, block2 ; unordered: block0:inst0 -> block2 + trap user0 + +block1: + v1 = iconst.i32 1 + v2 = iadd v0, v1 + jump block0(v2) ; unordered: block1:inst4 -> block0 + +block2: + return v0 +} diff --git a/cranelift/filetests/filetests/dce/basic.clif b/cranelift/filetests/filetests/dce/basic.clif new file mode 100644 index 0000000000..0c94926584 --- /dev/null +++ b/cranelift/filetests/filetests/dce/basic.clif @@ -0,0 +1,46 @@ +test dce + +function %simple() -> i32 { +block0: + v2 = iconst.i32 2 + v3 = iconst.i32 3 + return v3 +} +; sameln: function %simple +; nextln: block0: +; nextln: v3 = iconst.i32 3 +; nextln: return v3 +; nextln: } + +function %some_branching(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v3 = iconst.i32 70 + v4 = iconst.i32 71 + v5 = iconst.i32 72 + v8 = iconst.i32 73 + brz v0, block1 + jump block2(v8) + +block1: + v2 = iadd v0, v3 + return v0 + +block2(v9: i32): + v6 = iadd v1, v4 + v7 = iadd v6, v9 + return v7 +} +; sameln: function %some_branching +; nextln: block0(v0: i32, v1: i32): +; nextln: v4 = iconst.i32 71 +; nextln: v8 = iconst.i32 73 +; nextln: brz v0, block1 +; nextln: jump block2(v8) +; nextln: +; nextln: block1: +; nextln: return v0 +; nextln: +; nextln: block2(v9: i32): +; nextln: v6 = iadd.i32 v1, v4 +; nextln: v7 = iadd v6, v9 +; nextln: return v7 diff --git a/cranelift/filetests/filetests/domtree/basic.clif b/cranelift/filetests/filetests/domtree/basic.clif new file mode 100644 index 0000000000..2960ab0e62 --- /dev/null +++ b/cranelift/filetests/filetests/domtree/basic.clif @@ -0,0 +1,25 @@ +test domtree + +function %test(i32) { + block0(v0: i32): + jump block1 ; dominates: block1 + block1: + brz v0, block3 ; dominates: block3 + jump block2 ; dominates: block2 + block2: + jump block3 + block3: + return +} +; check: cfg_postorder: +; sameln: block2 +; sameln: block3 +; sameln: block1 +; sameln: block0 + +; check: domtree_preorder { +; nextln: block0: block1 +; nextln: block1: block3 block2 +; nextln: block3: +; nextln: block2: +; nextln: } diff --git a/cranelift/filetests/filetests/domtree/loops.clif b/cranelift/filetests/filetests/domtree/loops.clif new file mode 100644 index 0000000000..a2a334e3fa --- /dev/null +++ b/cranelift/filetests/filetests/domtree/loops.clif @@ -0,0 +1,118 @@ +test domtree + +function %test(i32) { + block0(v0: i32): + brz v0, block1 ; dominates: block1 block3 block4 block5 + jump block2 ; dominates: block2 + block1: + jump block3 + block2: + brz v0, block4 + jump block5 + block3: + jump block4 + block4: + brz v0, block3 + jump block5 + block5: + brz v0, block4 + jump block6 ; dominates: block6 + block6: + return +} +; Fall-through-first, prune-at-source DFT: +; +; block0 { +; block0:brz v0, block1 { +; block0:jump block2 { +; block2 { +; block2:brz v2, block2 - +; block2:brz v3, block1 - +; block2:brz v4, block4 { +; block2: jump block5 { +; block5: jump block6 { +; block6 {} +; } +; } +; block4 {} +; } +; } block2 +; } +; block1 { +; block1:jump block3 { +; block3 {} +; } +; } block1 +; } +; } block0 +; +; check: cfg_postorder: +; sameln: block6 +; sameln: block5 +; sameln: block3 +; sameln: block4 +; sameln: block2 +; sameln: block1 +; sameln: block0 + +; check: domtree_preorder { +; nextln: block0: block1 block2 block4 block3 block5 +; nextln: block1: +; nextln: block2: +; nextln: block4: +; nextln: block3: +; nextln: block5: block6 +; nextln: block6: +; nextln: } + +function %loop2(i32) system_v { + block0(v0: i32): + brz v0, block1 ; dominates: block1 block3 block4 block5 + jump block2 ; dominates: block2 + block1: + jump block3 + block2: + brz v0, block4 + jump block5 + block3: + jump block4 + block4: + brz v0, block3 + jump block8 ; dominates: block8 + block8: + brnz v0, block5 + jump block6 ; dominates: block6 + block5: + brz v0, block4 + jump block9 ; dominates: block9 + block9: + trap user0 + block6: + jump block7 ; dominates: block7 + block7: + return +} +; check: cfg_postorder: +; sameln: block9 +; sameln: block5 +; sameln: block7 +; sameln: block6 +; sameln: block8 +; sameln: block3 +; sameln: block4 +; sameln: block2 +; sameln: block1 +; sameln: block0 + +; check: domtree_preorder { +; nextln: block0: block1 block2 block4 block3 block5 +; nextln: block1: +; nextln: block2: +; nextln: block4: block8 +; nextln: block8: block6 +; nextln: block6: block7 +; nextln: block7: +; nextln: block3: +; nextln: block5: block9 +; nextln: block9: +; nextln: } diff --git a/cranelift/filetests/filetests/domtree/loops2.clif b/cranelift/filetests/filetests/domtree/loops2.clif new file mode 100644 index 0000000000..140916bafb --- /dev/null +++ b/cranelift/filetests/filetests/domtree/loops2.clif @@ -0,0 +1,92 @@ +test domtree + +function %loop1(i32) { + block0(v0: i32): + brz v0, block1 ; dominates: block1 block6 + jump block10 ; dominates: block10 + block10: + brnz v0, block2 ; dominates: block2 block9 + jump block3 ; dominates: block3 + block1: + jump block6 + block2: + brz v0, block4 ; dominates: block4 block7 block8 + jump block5 ; dominates: block5 + block3: + jump block9 + block4: + brz v0, block4 + jump block11 ; dominates: block11 + block11: + brnz v0, block6 + jump block7 + block5: + brz v0, block7 + jump block12 ; dominates: block12 + block12: + brnz v0, block8 + jump block9 + block6: + return + block7: + jump block8 + block8: + return + block9: + return +} + +; check: domtree_preorder { +; nextln: block0: block1 block10 block6 +; nextln: block1: +; nextln: block10: block2 block3 block9 +; nextln: block2: block4 block5 block7 block8 +; nextln: block4: block11 +; nextln: block11: +; nextln: block5: block12 +; nextln: block12: +; nextln: block7: +; nextln: block8: +; nextln: block3: +; nextln: block9: +; nextln: block6: +; nextln: } + +function %loop2(i32) system_v { + block0(v0: i32): + brz v0, block1 ; dominates: block1 block3 block4 block5 + jump block2 ; dominates: block2 + block1: + jump block3 + block2: + brz v0, block4 + jump block5 + block3: + jump block4 + block4: + brz v0, block3 + jump block5 + block5: + brz v0, block4 + jump block6 ; dominates: block6 + block6: + return +} +; check: cfg_postorder: +; sameln: block6 +; sameln: block5 +; sameln: block3 +; sameln: block4 +; sameln: block2 +; sameln: block1 +; sameln: block0 + +; check: domtree_preorder { +; nextln: block0: block1 block2 block4 block3 block5 +; nextln: block1: +; nextln: block2: +; nextln: block4: +; nextln: block3: +; nextln: block5: block6 +; nextln: block6: +; nextln: } diff --git a/cranelift/filetests/filetests/domtree/tall-tree.clif b/cranelift/filetests/filetests/domtree/tall-tree.clif new file mode 100644 index 0000000000..436edc643b --- /dev/null +++ b/cranelift/filetests/filetests/domtree/tall-tree.clif @@ -0,0 +1,54 @@ +test domtree + +function %test(i32) { + block0(v0: i32): + brz v0, block1 ; dominates: block1 + jump block12 ; dominates: block12 + block12: + brnz v0, block2 ; dominates: block2 block5 + jump block3 ; dominates: block3 + block1: + jump block4 ; dominates: block4 + block2: + jump block5 + block3: + jump block5 + block4: + brz v0, block6 ; dominates: block6 block10 + jump block7 ; dominates: block7 + block5: + return + block6: + brz v0, block8 ; dominates: block11 block8 + jump block13 ; dominates: block13 + block13: + brnz v0, block9 ; dominates: block9 + jump block10 + block7: + jump block10 + block8: + jump block11 + block9: + jump block11 + block10: + return + block11: + return +} + +; check: domtree_preorder { +; nextln: block0: block1 block12 +; nextln: block1: block4 +; nextln: block4: block6 block7 block10 +; nextln: block6: block8 block13 block11 +; nextln: block8: +; nextln: block13: block9 +; nextln: block9: +; nextln: block11: +; nextln: block7: +; nextln: block10: +; nextln: block12: block2 block3 block5 +; nextln: block2: +; nextln: block3: +; nextln: block5: +; nextln: } diff --git a/cranelift/filetests/filetests/domtree/wide-tree.clif b/cranelift/filetests/filetests/domtree/wide-tree.clif new file mode 100644 index 0000000000..e118e684f0 --- /dev/null +++ b/cranelift/filetests/filetests/domtree/wide-tree.clif @@ -0,0 +1,73 @@ +test domtree + +function %test(i32) { + block0(v0: i32): + brz v0, block13 ; dominates: block13 + jump block1 ; dominates: block1 + block1: + brz v0, block2 ; dominates: block2 block7 + jump block20 ; dominates: block20 + block20: + brnz v0, block3 ; dominates: block3 + jump block21 ; dominates: block21 + block21: + brz v0, block4 ; dominates: block4 + jump block22 ; dominates: block22 + block22: + brnz v0, block5 ; dominates: block5 + jump block6 ; dominates: block6 + block2: + jump block7 + block3: + jump block7 + block4: + jump block7 + block5: + jump block7 + block6: + jump block7 + block7: + brnz v0, block8 ; dominates: block8 block12 + jump block23 ; dominates: block23 + block23: + brz v0, block9 ; dominates: block9 + jump block24 ; dominates: block24 + block24: + brnz v0, block10 ; dominates: block10 + jump block11 ; dominates: block11 + block8: + jump block12 + block9: + jump block12 + block10: + brz v0, block13 + jump block12 + block11: + jump block13 + block12: + return + block13: + return +} + +; check: domtree_preorder { +; nextln: block0: block13 block1 +; nextln: block13: +; nextln: block1: block2 block20 block7 +; nextln: block2: +; nextln: block20: block3 block21 +; nextln: block3: +; nextln: block21: block4 block22 +; nextln: block4: +; nextln: block22: block5 block6 +; nextln: block5: +; nextln: block6: +; nextln: block7: block8 block23 block12 +; nextln: block8: +; nextln: block23: block9 block24 +; nextln: block9: +; nextln: block24: block10 block11 +; nextln: block10: +; nextln: block11: +; nextln: block12: +; nextln: } diff --git a/cranelift/filetests/filetests/isa/riscv/abi-e.clif b/cranelift/filetests/filetests/isa/riscv/abi-e.clif new file mode 100644 index 0000000000..fcd762ee81 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/abi-e.clif @@ -0,0 +1,14 @@ +; Test the legalization of function signatures for RV32E. +test legalizer +target riscv32 enable_e + +; regex: V=v\d+ + +function %f() { + ; Spilling into the stack args after %x15 since %16 and up are not + ; available in RV32E. + sig0 = (i64, i64, i64, i64) -> i64 system_v + ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] system_v +block0: + return +} diff --git a/cranelift/filetests/filetests/isa/riscv/abi.clif b/cranelift/filetests/filetests/isa/riscv/abi.clif new file mode 100644 index 0000000000..d9469f490e --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/abi.clif @@ -0,0 +1,32 @@ +; Test the legalization of function signatures. +test legalizer +target riscv32 + +; regex: V=v\d+ + +function %f() { + sig0 = (i32) -> i32 system_v + ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v + + sig1 = (i64) -> b1 system_v + ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v + + ; The i64 argument must go in an even-odd register pair. + sig2 = (f32, i64) -> f64 system_v + ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v + + ; Spilling into the stack args. + sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 system_v + ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] system_v + + ; Splitting vectors. + sig4 = (i32x4) system_v + ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) system_v + + ; Splitting vectors, then splitting ints. + sig5 = (i64x4) system_v + ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) system_v + +block0: + return +} diff --git a/cranelift/filetests/filetests/isa/riscv/binary32.clif b/cranelift/filetests/filetests/isa/riscv/binary32.clif new file mode 100644 index 0000000000..5a69c4289b --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/binary32.clif @@ -0,0 +1,189 @@ +; Binary emission of 32-bit code. +test binemit +target riscv32 + +function %RV32I(i32 link [%x1]) -> i32 link [%x1] { + sig0 = () + fn0 = %foo() + +block0(v9999: i32): + [-,%x10] v1 = iconst.i32 1 + [-,%x21] v2 = iconst.i32 2 + + ; Integer Register-Register Operations. + ; add + [-,%x7] v10 = iadd v1, v2 ; bin: 015503b3 + [-,%x16] v11 = iadd v2, v1 ; bin: 00aa8833 + ; sub + [-,%x7] v12 = isub v1, v2 ; bin: 415503b3 + [-,%x16] v13 = isub v2, v1 ; bin: 40aa8833 + ; and + [-,%x7] v20 = band v1, v2 ; bin: 015573b3 + [-,%x16] v21 = band v2, v1 ; bin: 00aaf833 + ; or + [-,%x7] v22 = bor v1, v2 ; bin: 015563b3 + [-,%x16] v23 = bor v2, v1 ; bin: 00aae833 + ; xor + [-,%x7] v24 = bxor v1, v2 ; bin: 015543b3 + [-,%x16] v25 = bxor v2, v1 ; bin: 00aac833 + ; sll + [-,%x7] v30 = ishl v1, v2 ; bin: 015513b3 + [-,%x16] v31 = ishl v2, v1 ; bin: 00aa9833 + ; srl + [-,%x7] v32 = ushr v1, v2 ; bin: 015553b3 + [-,%x16] v33 = ushr v2, v1 ; bin: 00aad833 + ; sra + [-,%x7] v34 = sshr v1, v2 ; bin: 415553b3 + [-,%x16] v35 = sshr v2, v1 ; bin: 40aad833 + ; slt + [-,%x7] v42 = icmp slt v1, v2 ; bin: 015523b3 + [-,%x16] v43 = icmp slt v2, v1 ; bin: 00aaa833 + ; sltu + [-,%x7] v44 = icmp ult v1, v2 ; bin: 015533b3 + [-,%x16] v45 = icmp ult v2, v1 ; bin: 00aab833 + + ; Integer Register-Immediate Instructions + + ; addi + [-,%x7] v100 = iadd_imm v1, 1000 ; bin: 3e850393 + [-,%x16] v101 = iadd_imm v2, -905 ; bin: c77a8813 + ; andi + [-,%x7] v110 = band_imm v1, 1000 ; bin: 3e857393 + [-,%x16] v111 = band_imm v2, -905 ; bin: c77af813 + ; ori + [-,%x7] v112 = bor_imm v1, 1000 ; bin: 3e856393 + [-,%x16] v113 = bor_imm v2, -905 ; bin: c77ae813 + ; xori + [-,%x7] v114 = bxor_imm v1, 1000 ; bin: 3e854393 + [-,%x16] v115 = bxor_imm v2, -905 ; bin: c77ac813 + + ; slli + [-,%x7] v120 = ishl_imm v1, 31 ; bin: 01f51393 + [-,%x16] v121 = ishl_imm v2, 8 ; bin: 008a9813 + ; srli + [-,%x7] v122 = ushr_imm v1, 31 ; bin: 01f55393 + [-,%x16] v123 = ushr_imm v2, 8 ; bin: 008ad813 + ; srai + [-,%x7] v124 = sshr_imm v1, 31 ; bin: 41f55393 + [-,%x16] v125 = sshr_imm v2, 8 ; bin: 408ad813 + + ; slti + [-,%x7] v130 = icmp_imm slt v1, 1000 ; bin: 3e852393 + [-,%x16] v131 = icmp_imm slt v2, -905 ; bin: c77aa813 + ; sltiu + [-,%x7] v132 = icmp_imm ult v1, 1000 ; bin: 3e853393 + [-,%x16] v133 = icmp_imm ult v2, -905 ; bin: c77ab813 + + ; lui + [-,%x7] v140 = iconst.i32 0x12345000 ; bin: 123453b7 + [-,%x16] v141 = iconst.i32 0xffffffff_fedcb000 ; bin: fedcb837 + ; addi + [-,%x7] v142 = iconst.i32 1000 ; bin: 3e800393 + [-,%x16] v143 = iconst.i32 -905 ; bin: c7700813 + + ; Copies alias to iadd_imm. + [-,%x7] v150 = copy v1 ; bin: 00050393 + [-,%x16] v151 = copy v2 ; bin: 000a8813 + + ; Control Transfer Instructions + + ; jal %x1, fn0 + call fn0() ; bin: Call(%foo) 000000ef + + ; jalr %x1, %x10 + call_indirect sig0, v1() ; bin: 000500e7 + call_indirect sig0, v2() ; bin: 000a80e7 + + brz v1, block3 + fallthrough block4 + +block4: + brnz v1, block1 + fallthrough block5 + +block5: + ; jalr %x0, %x1, 0 + return v9999 ; bin: 00008067 + +block1: + ; beq 0x000 + br_icmp eq v1, v2, block1 ; bin: 01550063 + fallthrough block100 + +block100: + ; bne 0xffc + br_icmp ne v1, v2, block1 ; bin: ff551ee3 + fallthrough block101 + +block101: + ; blt 0xff8 + br_icmp slt v1, v2, block1 ; bin: ff554ce3 + fallthrough block102 + +block102: + ; bge 0xff4 + br_icmp sge v1, v2, block1 ; bin: ff555ae3 + fallthrough block103 + +block103: + ; bltu 0xff0 + br_icmp ult v1, v2, block1 ; bin: ff5568e3 + fallthrough block104 + +block104: + ; bgeu 0xfec + br_icmp uge v1, v2, block1 ; bin: ff5576e3 + fallthrough block105 + +block105: + + ; Forward branches. + fallthrough block106 + +block106: + ; beq 0x018 + br_icmp eq v2, v1, block2 ; bin: 00aa8c63 + fallthrough block107 + +block107: + ; bne 0x014 + br_icmp ne v2, v1, block2 ; bin: 00aa9a63 + fallthrough block108 + +block108: + ; blt 0x010 + br_icmp slt v2, v1, block2 ; bin: 00aac863 + fallthrough block109 + +block109: + ; bge 0x00c + br_icmp sge v2, v1, block2 ; bin: 00aad663 + fallthrough block110 + +block110: + ; bltu 0x008 + br_icmp ult v2, v1, block2 ; bin: 00aae463 + fallthrough block111 + +block111: + ; bgeu 0x004 + br_icmp uge v2, v1, block2 ; bin: 00aaf263 + + fallthrough block2 + +block2: + ; jal %x0, 0x00000 + jump block2 ; bin: 0000006f + +block3: + ; beq x, %x0 + brz v1, block3 ; bin: 00050063 + fallthrough block6 + +block6: + ; bne x, %x0 + brnz v1, block3 ; bin: fe051ee3 + + ; jal %x0, 0x1ffff4 + jump block2 ; bin: ff5ff06f +} diff --git a/cranelift/filetests/filetests/isa/riscv/encoding.clif b/cranelift/filetests/filetests/isa/riscv/encoding.clif new file mode 100644 index 0000000000..b8c991f52e --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/encoding.clif @@ -0,0 +1,21 @@ +test legalizer +target riscv32 supports_m=1 + +function %int32(i32, i32) { +block0(v1: i32, v2: i32): + v10 = iadd v1, v2 + ; check: [R#0c] + ; sameln: v10 = iadd + + v11 = isub v1, v2 + ; check: [R#200c] + ; sameln: v11 = isub + + v12 = imul v1, v2 + ; check: [R#10c] + ; sameln: v12 = imul + + return + ; check: [Iret#19] + ; sameln: return +} diff --git a/cranelift/filetests/filetests/isa/riscv/expand-i32.clif b/cranelift/filetests/filetests/isa/riscv/expand-i32.clif new file mode 100644 index 0000000000..ee62bc093f --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/expand-i32.clif @@ -0,0 +1,37 @@ +; Test the legalization of i32 instructions that don't have RISC-V versions. +test legalizer + +target riscv32 supports_m=1 + +target riscv64 supports_m=1 + +; regex: V=v\d+ + +function %carry_out(i32, i32) -> i32, b1 { +block0(v1: i32, v2: i32): + v3, v4 = iadd_cout v1, v2 + return v3, v4 +} +; check: v3 = iadd v1, v2 +; check: v4 = icmp ult v3, v1 +; check: return v3, v4 + +; Expanding illegal immediate constants. +; Note that at some point we'll probably expand the iconst as well. +function %large_imm(i32) -> i32 { +block0(v0: i32): + v1 = iadd_imm v0, 1000000000 + return v1 +} +; check: $(cst=$V) = iconst.i32 0x3b9a_ca00 +; check: v1 = iadd v0, $cst +; check: return v1 + +function %bitclear(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not v0, v1 + ; check: iconst.i32 -1 + ; check: bxor + ; check: band + return v2 +} diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif b/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif new file mode 100644 index 0000000000..0a5fb801a3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif @@ -0,0 +1,134 @@ +; Test legalizer's handling of ABI boundaries. +test legalizer +target riscv32 + +; regex: V=v\d+ +; regex: SS=ss\d+ +; regex: WS=\s+ + +function %int_split_args(i64) -> i64 { +block0(v0: i64): + ; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): + ; check: v0 = iconcat $v0l, $v0h + v1 = iadd_imm v0, 1 + ; check: $(v1l=$V), $(v1h=$V) = isplit v1 + ; check: return $v1l, $v1h, $link + return v1 +} + +function %split_call_arg(i32) { + fn1 = %foo(i64) + fn2 = %foo(i32, i64) +block0(v0: i32): + v1 = uextend.i64 v0 + call fn1(v1) + ; check: $(v1l=$V), $(v1h=$V) = isplit v1 + ; check: call fn1($v1l, $v1h) + call fn2(v0, v1) + ; check: call fn2(v0, $V, $V) + return +} + +function %split_ret_val() { + fn1 = %foo() -> i64 +block0: + v1 = call fn1() + ; check: block0($(link=$V): i32): + ; nextln: $(v1l=$V), $(v1h=$V) = call fn1() + ; check: v1 = iconcat $v1l, $v1h + jump block1(v1) + ; check: jump block1(v1) + +block1(v10: i64): + jump block1(v10) +} + +; First return value is fine, second one is expanded. +function %split_ret_val2() { + fn1 = %foo() -> i32, i64 +block0: + v1, v2 = call fn1() + ; check: block0($(link=$V): i32): + ; nextln: v1, $(v2l=$V), $(v2h=$V) = call fn1() + ; check: v2 = iconcat $v2l, $v2h + jump block1(v1, v2) + ; check: jump block1(v1, v2) + +block1(v9: i32, v10: i64): + jump block1(v9, v10) +} + +function %int_ext(i8, i8 sext, i8 uext) -> i8 uext { +block0(v1: i8, v2: i8, v3: i8): + ; check: block0(v1: i8, $(v2x=$V): i32, $(v3x=$V): i32, $(link=$V): i32): + ; check: v2 = ireduce.i8 $v2x + ; check: v3 = ireduce.i8 $v3x + ; check: $(v1x=$V) = uextend.i32 v1 + ; check: return $v1x, $link + return v1 +} + +; Function produces single return value, still need to copy. +function %ext_ret_val() { + fn1 = %foo() -> i8 sext +block0: + v1 = call fn1() + ; check: block0($V: i32): + ; nextln: $(rv=$V) = call fn1() + ; check: v1 = ireduce.i8 $rv + jump block1(v1) + ; check: jump block1(v1) + +block1(v10: i8): + jump block1(v10) +} + +function %vector_split_args(i64x4) -> i64x4 { +block0(v0: i64x4): + ; check: block0($(v0al=$V): i32, $(v0ah=$V): i32, $(v0bl=$V): i32, $(v0bh=$V): i32, $(v0cl=$V): i32, $(v0ch=$V): i32, $(v0dl=$V): i32, $(v0dh=$V): i32, $(link=$V): i32): + ; check: $(v0a=$V) = iconcat $v0al, $v0ah + ; check: $(v0b=$V) = iconcat $v0bl, $v0bh + ; check: $(v0ab=$V) = vconcat $v0a, $v0b + ; check: $(v0c=$V) = iconcat $v0cl, $v0ch + ; check: $(v0d=$V) = iconcat $v0dl, $v0dh + ; check: $(v0cd=$V) = vconcat $v0c, $v0d + ; check: v0 = vconcat $v0ab, $v0cd + v1 = bxor v0, v0 + ; check: $(v1ab=$V), $(v1cd=$V) = vsplit v1 + ; check: $(v1a=$V), $(v1b=$V) = vsplit $v1ab + ; check: $(v1al=$V), $(v1ah=$V) = isplit $v1a + ; check: $(v1bl=$V), $(v1bh=$V) = isplit $v1b + ; check: $(v1c=$V), $(v1d=$V) = vsplit $v1cd + ; check: $(v1cl=$V), $(v1ch=$V) = isplit $v1c + ; check: $(v1dl=$V), $(v1dh=$V) = isplit $v1d + ; check: return $v1al, $v1ah, $v1bl, $v1bh, $v1cl, $v1ch, $v1dl, $v1dh, $link + return v1 +} + +function %indirect(i32) { + sig1 = () system_v +block0(v0: i32): + call_indirect sig1, v0() + return +} + +; The first argument to call_indirect doesn't get altered. +function %indirect_arg(i32, f32x2) { + sig1 = (f32x2) system_v +block0(v0: i32, v1: f32x2): + call_indirect sig1, v0(v1) + ; check: call_indirect sig1, v0($V, $V) + return +} + +; Call a function that takes arguments on the stack. +function %stack_args(i32) { + ; check: $(ss0=$SS) = outgoing_arg 4 + fn1 = %foo(i64, i64, i64, i64, i32) +block0(v0: i32): + v1 = iconst.i64 1 + call fn1(v1, v1, v1, v1, v0) + ; check: [GPsp#48,$ss0]$WS $(v0s=$V) = spill v0 + ; check: call fn1($(=.*), $v0s) + return +} diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif b/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif new file mode 100644 index 0000000000..11b31218be --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif @@ -0,0 +1,64 @@ +; Test the legalization of i64 arithmetic instructions. +test legalizer +target riscv32 supports_m=1 + +; regex: V=v\d+ + +function %bitwise_and(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v3 = band v1, v2 + return v3 +} +; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): +; check: [R#ec +; sameln: $(v3l=$V) = band $v1l, $v2l +; check: [R#ec +; sameln: $(v3h=$V) = band $v1h, $v2h +; check: v3 = iconcat $v3l, $v3h +; check: return $v3l, $v3h, $link + +function %bitwise_or(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v3 = bor v1, v2 + return v3 +} +; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): +; check: [R#cc +; sameln: $(v3l=$V) = bor $v1l, $v2l +; check: [R#cc +; sameln: $(v3h=$V) = bor $v1h, $v2h +; check: v3 = iconcat $v3l, $v3h +; check: return $v3l, $v3h, $link + +function %bitwise_xor(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v3 = bxor v1, v2 + return v3 +} +; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): +; check: [R#8c +; sameln: $(v3l=$V) = bxor $v1l, $v2l +; check: [R#8c +; sameln: $(v3h=$V) = bxor $v1h, $v2h +; check: v3 = iconcat $v3l, $v3h +; check: return $v3l, $v3h, $link + +function %arith_add(i64, i64) -> i64 { +; Legalizing iadd.i64 requires two steps: +; 1. Narrow to iadd_cout.i32, then +; 2. Expand iadd_cout.i32 since RISC-V has no carry flag. +block0(v1: i64, v2: i64): + v3 = iadd v1, v2 + return v3 +} +; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): +; check: [R#0c +; sameln: $(v3l=$V) = iadd $v1l, $v2l +; check: $(c=$V) = icmp ult $v3l, $v1l +; check: [R#0c +; sameln: $(v3h1=$V) = iadd $v1h, $v2h +; check: $(c_int=$V) = bint.i32 $c +; check: [R#0c +; sameln: $(v3h=$V) = iadd $v3h1, $c_int +; check: v3 = iconcat $v3l, $v3h +; check: return $v3l, $v3h, $link diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif b/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif new file mode 100644 index 0000000000..d7250cb3af --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif @@ -0,0 +1,55 @@ +test legalizer +target riscv32 + +; regex: V=v\d+ + +function %icmp_imm_eq(i64) -> b1 { +block0(v0: i64): + v1 = icmp_imm eq v0, 0x20202020_10101010 + return v1 +} +; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): +; nextln: $(v2l=$V) -> $(v0l) +; nextln: $(v2h=$V) -> $(v0h) +; nextln: v0 = iconcat $(v0l), $(v0h) +; nextln: $(imm_low=$V) = iconst.i32 0x1010_1010 +; nextln: $(imm_high=$V) = iconst.i32 0x2020_2020 +; nextln: $(v3=$V) = icmp eq $(v2l), $(imm_low) +; nextln: $(v4=$V) = icmp eq $(v2h), $(imm_high) +; nextln: v1 = band $(v3), $(v4) +; nextln: return v1, $(link) + +function %icmp_imm_ne(i64) -> b1 { +block0(v0: i64): + v1 = icmp_imm ne v0, 0x33333333_44444444 + return v1 +} +; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): +; nextln: $(v2l=$V) -> $(v0l) +; nextln: $(v2h=$V) -> $(v0h) +; nextln: v0 = iconcat $(v0l), $(v0h) +; nextln: $(imm_low=$V) = iconst.i32 0x4444_4444 +; nextln: $(imm_high=$V) = iconst.i32 0x3333_3333 +; nextln: $(v3=$V) = icmp ne $(v2l), $(imm_low) +; nextln: $(v4=$V) = icmp ne $(v2h), $(imm_high) +; nextln: v1 = bor $(v3), $(v4) +; nextln: return v1, $(link) + +function %icmp_imm_sge(i64) -> b1 { +block0(v0: i64): + v1 = icmp_imm sge v0, 0x01020304_05060708 + return v1 +} +; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): +; nextln: $(v2l=$V) -> $(v0l) +; nextln: $(v2h=$V) -> $(v0h) +; nextln: v0 = iconcat $(v0l), $(v0h) +; nextln: $(imm_low=$V) = iconst.i32 0x0506_0708 +; nextln: $(imm_high=$V) = iconst.i32 0x0102_0304 +; nextln: $(v3=$V) = icmp sgt $(v2h), $(imm_high) +; nextln: $(v4=$V) = icmp slt $(v2h), $(imm_high) +; nextln: $(v5=$V) = icmp uge $(v2l), $(imm_low) +; nextln: $(v6=$V) = bnot $v4 +; nextln: $(v7=$V) = band $v6, $v5 +; nextln: v1 = bor $(v3), $(v7) +; nextln: return v1, $(link) diff --git a/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif b/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif new file mode 100644 index 0000000000..21cd828b8a --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif @@ -0,0 +1,36 @@ +; Test the parser's support for encoding annotations. +test legalizer +target riscv32 + +function %parse_encoding(i32 [%x5]) -> i32 [%x10] { + ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] fast { + + sig0 = (i32 [%x10]) -> i32 [%x10] system_v + ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v + + sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v + ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v + + sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v + ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v + + ; Arguments on stack where not necessary + sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v + ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v + + ; Stack argument before register argument + sig4 = (f32 [72], i32 [%x10]) system_v + ; check: sig4 = (f32 [72], i32 [%x10]) system_v + + ; Return value on stack + sig5 = () -> f32 [0] system_v + ; check: sig5 = () -> f32 [0] system_v + + ; function + signature + fn0 = %bar(i32 [%x10]) -> b1 [%x10] system_v + ; check: sig6 = (i32 [%x10]) -> b1 [%x10] system_v + ; nextln: fn0 = %bar sig6 + +block0(v0: i32): + return v0 +} diff --git a/cranelift/filetests/filetests/isa/riscv/regmove.clif b/cranelift/filetests/filetests/isa/riscv/regmove.clif new file mode 100644 index 0000000000..f1509e8178 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/regmove.clif @@ -0,0 +1,15 @@ +; Test tracking of register moves. +test binemit +target riscv32 + +function %regmoves(i32 link [%x1]) -> i32 link [%x1] { +block0(v9999: i32): + [-,%x10] v1 = iconst.i32 1 + [-,%x7] v2 = iadd_imm v1, 1000 ; bin: 3e850393 + regmove v1, %x10 -> %x11 ; bin: 00050593 + [-,%x7] v3 = iadd_imm v1, 1000 ; bin: 3e858393 + regmove v1, %x11 -> %x10 ; bin: 00058513 + [-,%x7] v4 = iadd_imm v1, 1000 ; bin: 3e850393 + + return v9999 +} diff --git a/cranelift/filetests/filetests/isa/riscv/split-args.clif b/cranelift/filetests/filetests/isa/riscv/split-args.clif new file mode 100644 index 0000000000..9f4b3e9268 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/split-args.clif @@ -0,0 +1,55 @@ +; Test the legalization of block arguments that are split. +test legalizer +target riscv32 + +; regex: V=v\d+ + +function %simple(i64, i64) -> i64 { +block0(v1: i64, v2: i64): +; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): + jump block1(v1) + ; check: jump block1($v1l, $v1h) + +block1(v3: i64): +; check: block1($(v3l=$V): i32, $(v3h=$V): i32): + v4 = band v3, v2 + ; check: $(v4l=$V) = band $v3l, $v2l + ; check: $(v4h=$V) = band $v3h, $v2h + return v4 + ; check: return $v4l, $v4h, $link +} + +function %multi(i64) -> i64 { +block1(v1: i64): +; check: block1($(v1l=$V): i32, $(v1h=$V): i32, $(link=$V): i32): + jump block2(v1, v1) + ; check: jump block2($v1l, $v1l, $v1h, $v1h) + +block2(v2: i64, v3: i64): +; check: block2($(v2l=$V): i32, $(v3l=$V): i32, $(v2h=$V): i32, $(v3h=$V): i32): + jump block3(v2) + ; check: jump block3($v2l, $v2h) + +block3(v4: i64): +; check: block3($(v4l=$V): i32, $(v4h=$V): i32): + v5 = band v4, v3 + ; check: $(v5l=$V) = band $v4l, $v3l + ; check: $(v5h=$V) = band $v4h, $v3h + return v5 + ; check: return $v5l, $v5h, $link +} + +function %loop(i64, i64) -> i64 { +block0(v1: i64, v2: i64): +; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): + jump block1(v1) + ; check: jump block1($v1l, $v1h) + +block1(v3: i64): +; check: block1($(v3l=$V): i32, $(v3h=$V): i32): + v4 = band v3, v2 + ; check: $(v4l=$V) = band $v3l, $v2l + ; check: $(v4h=$V) = band $v3h, $v2h + jump block1(v4) + ; check: jump block1($v4l, $v4h) +} diff --git a/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif b/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif new file mode 100644 index 0000000000..1d29b86da9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif @@ -0,0 +1,21 @@ +test verifier +target riscv32 + +function %RV32I(i32 link [%x1]) -> i32 link [%x1] { + fn0 = %foo() + +block0(v9999: i32): + ; iconst.i32 needs legalizing, so it should throw a + [R#0,-] v1 = iconst.i32 0xf0f0f0f0f0 ; error: Instruction failed to re-encode + [Iret#19] return v9999 +} + +function %RV32I(i32 link [%x1]) -> i32 link [%x1] { + fn0 = %foo() + +block0(v9999: i32): + v1 = iconst.i32 1 + v2 = iconst.i32 2 + [R#0,-] v3 = iadd v1, v2 ; error: encoding R#00 should be R#0c + [Iret#19] return v9999 +} diff --git a/cranelift/filetests/filetests/isa/x86/abcd.clif b/cranelift/filetests/filetests/isa/x86/abcd.clif new file mode 100644 index 0000000000..658ba66ca4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/abcd.clif @@ -0,0 +1,13 @@ +test regalloc +target i686 + +; %rdi can't be used in a movsbl instruction, so test that the register +; allocator can move it to a register that can be. + +function %test(i32 [%rdi]) -> i32 system_v { +block0(v0: i32 [%rdi]): + v1 = ireduce.i8 v0 + v2 = sextend.i32 v1 + return v2 +} +; check: regmove v1, %rdi -> %rax diff --git a/cranelift/filetests/filetests/isa/x86/abi-bool.clif b/cranelift/filetests/filetests/isa/x86/abi-bool.clif new file mode 100644 index 0000000000..2083250a91 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/abi-bool.clif @@ -0,0 +1,19 @@ +test compile +target x86_64 haswell + +function %foo(i64, i64, i64, i32) -> b1 system_v { +block3(v0: i64, v1: i64, v2: i64, v3: i32): + v5 = icmp ne v2, v2 + v8 = iconst.i64 0 + jump block2(v8, v3, v5) + +block2(v10: i64, v30: i32, v37: b1): + v18 = load.i32 notrap aligned v2 + v27 = iadd.i64 v10, v10 + v31 = icmp eq v30, v30 + brz v31, block2(v27, v30, v37) + jump block0(v37) + +block0(v35: b1): + return v35 +} diff --git a/cranelift/filetests/filetests/isa/x86/abi32.clif b/cranelift/filetests/filetests/isa/x86/abi32.clif new file mode 100644 index 0000000000..155d0efc4c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/abi32.clif @@ -0,0 +1,20 @@ +; Test the legalization of function signatures. +test legalizer +target i686 + +; regex: V=v\d+ + +function %f() { + sig0 = (i32) -> i32 system_v + ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v + + sig1 = (i64) -> b1 system_v + ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v + + sig2 = (f32, i64) -> f64 system_v + ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v + +block0: + return +} + diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif new file mode 100644 index 0000000000..9494e78c67 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/abi64.clif @@ -0,0 +1,31 @@ +; Test the legalization of function signatures. +test legalizer +target x86_64 + +; regex: V=v\d+ + +function %f() { + sig0 = (i32) -> i32 system_v + ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v + + sig1 = (i64) -> b1 system_v + ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v + + sig2 = (f32, i64) -> f64 system_v + ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v + +block0: + return +} + +function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v { + sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v + fn0 = u0:0 sig0 + +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64): + call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) + jump block1 + +block1: + return +} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif new file mode 100644 index 0000000000..744b936c83 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif @@ -0,0 +1,25 @@ +; binary emission of 32-bit code. +test binemit +set opt_level=speed_and_size +set emit_all_ones_funcaddrs +target i686 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs32.clif | llvm-mc -show-encoding -triple=i386 +; + +; Tests from binary32.clif affected by emit_all_ones_funcaddrs. +function %I32() { + sig0 = () + fn0 = %foo() + +block0: + + ; asm: movl $-1, %ecx + [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) ffffffff + ; asm: movl $-1, %esi + [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) ffffffff + + return ; bin: c3 +} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif new file mode 100644 index 0000000000..623e96c9d3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif @@ -0,0 +1,27 @@ +; binary emission of 64-bit code. +test binemit +set opt_level=speed_and_size +set emit_all_ones_funcaddrs +target x86_64 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs64.clif | llvm-mc -show-encoding -triple=x86_64 +; + +; Tests from binary64.clif affected by emit_all_ones_funcaddrs. +function %I64() { + sig0 = () + fn0 = %foo() + +block0: + + ; asm: movabsq $-1, %rcx + [-,%rcx] v400 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) ffffffffffffffff + ; asm: movabsq $-1, %rsi + [-,%rsi] v401 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) ffffffffffffffff + ; asm: movabsq $-1, %r10 + [-,%r10] v402 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) ffffffffffffffff + + return ; bin: c3 +} diff --git a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif new file mode 100644 index 0000000000..e8dc4393ca --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif @@ -0,0 +1,14 @@ +test compile +set enable_probestack=false +target i686 + +function u0:0(i32 vmctx) baldrdash_system_v { + sig0 = (i32 vmctx, i32 sigid) baldrdash_system_v + +block0(v0: i32): + v2 = iconst.i32 0 + v8 = iconst.i32 0 + v9 = iconst.i32 0 + call_indirect sig0, v8(v9, v2) + trap user0 +} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif new file mode 100644 index 0000000000..7aca619d09 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif @@ -0,0 +1,92 @@ +test compile +target x86_64 baseline + + +; clz/ctz on 64 bit operands + +function %i64_clz(i64) -> i64 { +block0(v10: i64): + v11 = clz v10 + ; check: x86_bsr + ; check: selectif.i64 + return v11 +} + +function %i64_ctz(i64) -> i64 { +block1(v20: i64): + v21 = ctz v20 + ; check: x86_bsf + ; check: selectif.i64 + return v21 +} + + +; clz/ctz on 32 bit operands + +function %i32_clz(i32) -> i32 { +block0(v10: i32): + v11 = clz v10 + ; check: x86_bsr + ; check: selectif.i32 + return v11 +} + +function %i32_ctz(i32) -> i32 { +block1(v20: i32): + v21 = ctz v20 + ; check: x86_bsf + ; check: selectif.i32 + return v21 +} + + +; popcount on 64 bit operands + +function %i64_popcount(i64) -> i64 { +block0(v30: i64): + v31 = popcnt v30; + ; check: ushr_imm + ; check: iconst.i64 + ; check: band + ; check: isub + ; check: ushr_imm + ; check: band + ; check: isub + ; check: ushr_imm + ; check: band + ; check: isub + ; check: ushr_imm + ; check: iadd + ; check: iconst.i64 + ; check: band + ; check: iconst.i64 + ; check: imul + ; check: ushr_imm + return v31; +} + + +; popcount on 32 bit operands + +function %i32_popcount(i32) -> i32 { +block0(v40: i32): + v41 = popcnt v40; + ; check: ushr_imm + ; check: iconst.i32 + ; check: band + ; check: isub + ; check: ushr_imm + ; check: band + ; check: isub + ; check: ushr_imm + ; check: band + ; check: isub + ; check: ushr_imm + ; check: iadd + ; check: iconst.i32 + ; check: band + ; check: iconst.i32 + ; check: imul + ; check: ushr_imm + return v41; +} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif new file mode 100644 index 0000000000..cbe18d904c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif @@ -0,0 +1,87 @@ +test binemit +set opt_level=speed_and_size +target x86_64 baseline + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif | llvm-mc -show-encoding -triple=x86_64 +; + +function %Foo() { +block0: + ; 64-bit wide bsf + + [-,%r11] v10 = iconst.i64 0x1234 + ; asm: bsfq %r11, %rcx + [-,%rcx,%rflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb + + [-,%rdx] v14 = iconst.i64 0x5678 + ; asm: bsfq %rdx, %r12 + [-,%r12,%rflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2 + + ; asm: bsfq %rdx, %rdi + [-,%rdi,%rflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa + + + ; 32-bit wide bsf + + [-,%r11] v20 = iconst.i32 0x1234 + ; asm: bsfl %r11d, %ecx + [-,%rcx,%rflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb + + [-,%rdx] v24 = iconst.i32 0x5678 + ; asm: bsfl %edx, %r12d + [-,%r12,%rflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2 + + ; asm: bsfl %edx, %esi + [-,%rsi,%rflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2 + + + ; 64-bit wide bsr + + [-,%r11] v30 = iconst.i64 0x1234 + ; asm: bsrq %r11, %rcx + [-,%rcx,%rflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb + + [-,%rdx] v34 = iconst.i64 0x5678 + ; asm: bsrq %rdx, %r12 + [-,%r12,%rflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2 + + ; asm: bsrq %rdx, %rdi + [-,%rdi,%rflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa + + + ; 32-bit wide bsr + + [-,%r11] v40 = iconst.i32 0x1234 + ; asm: bsrl %r11d, %ecx + [-,%rcx,%rflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb + + [-,%rdx] v44 = iconst.i32 0x5678 + ; asm: bsrl %edx, %r12d + [-,%r12,%rflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2 + + ; asm: bsrl %edx, %esi + [-,%rsi,%rflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2 + + + ; 64-bit wide cmov + + ; asm: cmoveq %r11, %rdx + [-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3 + + ; asm: cmoveq %rdi, %rdx + [-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7 + + + ; 32-bit wide cmov + + ; asm: cmovnel %r11d, %edx + [-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3 + + ; asm: cmovlel %esi, %edx + [-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6 + + + trap user0 +} diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif new file mode 100644 index 0000000000..dc65a1f234 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/binary32-float.clif @@ -0,0 +1,557 @@ +; Binary emission of 32-bit floating point code. +test binemit +target i686 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32-float.clif | llvm-mc -show-encoding -triple=i386 +; + +function %F32() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + [-,%rcx] v0 = iconst.i32 1 + [-,%rsi] v1 = iconst.i32 2 + + ; asm: cvtsi2ss %ecx, %xmm5 + [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 0f 2a e9 + ; asm: cvtsi2ss %esi, %xmm2 + [-,%xmm2] v11 = fcvt_from_sint.f32 v1 ; bin: f3 0f 2a d6 + + ; asm: cvtss2sd %xmm2, %xmm5 + [-,%xmm5] v12 = fpromote.f64 v11 ; bin: f3 0f 5a ea + ; asm: cvtss2sd %xmm5, %xmm2 + [-,%xmm2] v13 = fpromote.f64 v10 ; bin: f3 0f 5a d5 + + ; asm: movd %ecx, %xmm5 + [-,%xmm5] v14 = bitcast.f32 v0 ; bin: 66 0f 6e e9 + ; asm: movd %esi, %xmm2 + [-,%xmm2] v15 = bitcast.f32 v1 ; bin: 66 0f 6e d6 + + ; asm: movd %xmm5, %ecx + [-,%rcx] v16 = bitcast.i32 v10 ; bin: 66 0f 7e e9 + ; asm: movd %xmm2, %esi + [-,%rsi] v17 = bitcast.i32 v11 ; bin: 66 0f 7e d6 + + ; asm: movaps %xmm2, %xmm5 + [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea + ; asm: movaps %xmm5, %xmm2 + [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 + + ; asm: movaps %xmm2, %xmm5 + regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea + ; asm: movaps %xmm5, %xmm2 + regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 + + ; Binary arithmetic. + + ; asm: addss %xmm2, %xmm5 + [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 0f 58 ea + ; asm: addss %xmm5, %xmm2 + [-,%xmm2] v21 = fadd v11, v10 ; bin: f3 0f 58 d5 + + ; asm: subss %xmm2, %xmm5 + [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 0f 5c ea + ; asm: subss %xmm5, %xmm2 + [-,%xmm2] v23 = fsub v11, v10 ; bin: f3 0f 5c d5 + + ; asm: mulss %xmm2, %xmm5 + [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 0f 59 ea + ; asm: mulss %xmm5, %xmm2 + [-,%xmm2] v25 = fmul v11, v10 ; bin: f3 0f 59 d5 + + ; asm: divss %xmm2, %xmm5 + [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 0f 5e ea + ; asm: divss %xmm5, %xmm2 + [-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5 + + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm2, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea + ; asm: andps %xmm5, %xmm2 + [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 + + ; asm: andnps %xmm2, %xmm5 + [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea + ; asm: andnps %xmm5, %xmm2 + [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 + + ; asm: orps %xmm2, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea + ; asm: orps %xmm5, %xmm2 + [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 + + ; asm: xorps %xmm2, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea + ; asm: xorps %xmm5, %xmm2 + [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + + ; Convert float to int. (No i64 dest on i386). + + ; asm: cvttss2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd + ; asm: cvttss2si %xmm2, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2 + + ; Min/max. + + ; asm: minss %xmm2, %xmm5 + [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f3 0f 5d ea + ; asm: minss %xmm5, %xmm2 + [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f3 0f 5d d5 + ; asm: maxss %xmm2, %xmm5 + [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f3 0f 5f ea + ; asm: maxss %xmm5, %xmm2 + [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f3 0f 5f d5 + + ; Unary arithmetic. + + ; asm: sqrtss %xmm5, %xmm2 + [-,%xmm2] v50 = sqrt v10 ; bin: f3 0f 51 d5 + ; asm: sqrtss %xmm2, %xmm5 + [-,%xmm5] v51 = sqrt v11 ; bin: f3 0f 51 ea + + ; asm: roundss $0, %xmm5, %xmm4 + [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0a e5 00 + ; asm: roundss $0, %xmm2, %xmm5 + [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0a ea 00 + ; asm: roundss $0, %xmm5, %xmm2 + [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 + + ; asm: roundss $1, %xmm5, %xmm4 + [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0a e5 01 + ; asm: roundss $1, %xmm2, %xmm5 + [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0a ea 01 + ; asm: roundss $1, %xmm5, %xmm2 + [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 + + ; asm: roundss $2, %xmm5, %xmm4 + [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0a e5 02 + ; asm: roundss $2, %xmm2, %xmm5 + [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0a ea 02 + ; asm: roundss $2, %xmm5, %xmm2 + [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 + + ; asm: roundss $3, %xmm5, %xmm4 + [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0a e5 03 + ; asm: roundss $3, %xmm2, %xmm5 + [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0a ea 03 + ; asm: roundss $3, %xmm5, %xmm2 + [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 + + ; Load/Store + + ; asm: movss (%ecx), %xmm5 + [-,%xmm5] v100 = load.f32 v0 ; bin: heap_oob f3 0f 10 29 + ; asm: movss (%esi), %xmm2 + [-,%xmm2] v101 = load.f32 v1 ; bin: heap_oob f3 0f 10 16 + ; asm: movss 50(%ecx), %xmm5 + [-,%xmm5] v110 = load.f32 v0+50 ; bin: heap_oob f3 0f 10 69 32 + ; asm: movss -50(%esi), %xmm2 + [-,%xmm2] v111 = load.f32 v1-50 ; bin: heap_oob f3 0f 10 56 ce + ; asm: movss 10000(%ecx), %xmm5 + [-,%xmm5] v120 = load.f32 v0+10000 ; bin: heap_oob f3 0f 10 a9 00002710 + ; asm: movss -10000(%esi), %xmm2 + [-,%xmm2] v121 = load.f32 v1-10000 ; bin: heap_oob f3 0f 10 96 ffffd8f0 + + ; asm: movss %xmm5, (%ecx) + [-] store.f32 v100, v0 ; bin: heap_oob f3 0f 11 29 + ; asm: movss %xmm2, (%esi) + [-] store.f32 v101, v1 ; bin: heap_oob f3 0f 11 16 + ; asm: movss %xmm5, 50(%ecx) + [-] store.f32 v100, v0+50 ; bin: heap_oob f3 0f 11 69 32 + ; asm: movss %xmm2, -50(%esi) + [-] store.f32 v101, v1-50 ; bin: heap_oob f3 0f 11 56 ce + ; asm: movss %xmm5, 10000(%ecx) + [-] store.f32 v100, v0+10000 ; bin: heap_oob f3 0f 11 a9 00002710 + ; asm: movss %xmm2, -10000(%esi) + [-] store.f32 v101, v1-10000 ; bin: heap_oob f3 0f 11 96 ffffd8f0 + + ; Spill / Fill. + + ; asm: movss %xmm5, 1032(%esp) + [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 + ; asm: movss %xmm2, 1032(%esp) + [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 0f 11 94 24 00000408 + + ; asm: movss 1032(%esp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 + ; asm: movss 1032(%esp), %xmm2 + [-,%xmm2] v211 = fill v201 ; bin: f3 0f 10 94 24 00000408 + + ; asm: movss %xmm5, 1032(%esp) + regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 + ; asm: movss 1032(%esp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 + + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization patterns. + + ; asm: ucomiss %xmm2, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3 + ; asm: ucomiss %xmm2, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2 + ; asm: ucomiss %xmm2, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3 + ; asm: ucomiss %xmm2, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2 + ; asm: ucomiss %xmm5, %xmm2 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2 + + ; asm: ucomiss %xmm2, %xmm5 + [-,%rflags] v310 = ffcmp v10, v11 ; bin: 0f 2e ea + ; asm: ucomiss %xmm2, %xmm5 + [-,%rflags] v311 = ffcmp v11, v10 ; bin: 0f 2e d5 + ; asm: ucomiss %xmm5, %xmm5 + [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed + + ; Load/Store Complex + + [-,%rax] v350 = iconst.i32 1 + [-,%rbx] v351 = iconst.i32 2 + + ; asm: movss (%rax,%rbx,1),%xmm5 + [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 + ; asm: movss 0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 + ; asm: movss -0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce + ; asm: movss 0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 + ; asm: movss -0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0 + ; asm: movss %xmm5,(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 + ; asm: movss %xmm5,0x32(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 + ; asm: movss %xmm2,-0x32(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce + ; asm: movss %xmm5,0x2710(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 + ; asm: movss %xmm2,-0x2710(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0 + + return +} + +function %F64() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + [-,%rcx] v0 = iconst.i32 1 + [-,%rsi] v1 = iconst.i32 2 + + ; asm: cvtsi2sd %ecx, %xmm5 + [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 0f 2a e9 + ; asm: cvtsi2sd %esi, %xmm2 + [-,%xmm2] v11 = fcvt_from_sint.f64 v1 ; bin: f2 0f 2a d6 + + ; asm: cvtsd2ss %xmm2, %xmm5 + [-,%xmm5] v12 = fdemote.f32 v11 ; bin: f2 0f 5a ea + ; asm: cvtsd2ss %xmm5, %xmm2 + [-,%xmm2] v13 = fdemote.f32 v10 ; bin: f2 0f 5a d5 + + ; No i64 <-> f64 bitcasts in 32-bit mode. + + ; asm: movaps %xmm2, %xmm5 + [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea + ; asm: movaps %xmm5, %xmm2 + [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 + + ; asm: movaps %xmm2, %xmm5 + regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea + ; asm: movaps %xmm5, %xmm2 + regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 + + ; Binary arithmetic. + + ; asm: addsd %xmm2, %xmm5 + [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 0f 58 ea + ; asm: addsd %xmm5, %xmm2 + [-,%xmm2] v21 = fadd v11, v10 ; bin: f2 0f 58 d5 + + ; asm: subsd %xmm2, %xmm5 + [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 0f 5c ea + ; asm: subsd %xmm5, %xmm2 + [-,%xmm2] v23 = fsub v11, v10 ; bin: f2 0f 5c d5 + + ; asm: mulsd %xmm2, %xmm5 + [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 0f 59 ea + ; asm: mulsd %xmm5, %xmm2 + [-,%xmm2] v25 = fmul v11, v10 ; bin: f2 0f 59 d5 + + ; asm: divsd %xmm2, %xmm5 + [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 0f 5e ea + ; asm: divsd %xmm5, %xmm2 + [-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5 + + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm2, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea + ; asm: andps %xmm5, %xmm2 + [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 + + ; asm: andnps %xmm2, %xmm5 + [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea + ; asm: andnps %xmm5, %xmm2 + [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 + + ; asm: orps %xmm2, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea + ; asm: orps %xmm5, %xmm2 + [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 + + ; asm: xorps %xmm2, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea + ; asm: xorps %xmm5, %xmm2 + [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + + ; Convert float to int. (No i64 dest on i386). + + ; asm: cvttsd2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd + ; asm: cvttsd2si %xmm2, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2 + + ; Min/max. + + ; asm: minsd %xmm2, %xmm5 + [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f2 0f 5d ea + ; asm: minsd %xmm5, %xmm2 + [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f2 0f 5d d5 + ; asm: maxsd %xmm2, %xmm5 + [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f2 0f 5f ea + ; asm: maxsd %xmm5, %xmm2 + [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f2 0f 5f d5 + + ; Unary arithmetic. + + ; asm: sqrtsd %xmm5, %xmm2 + [-,%xmm2] v50 = sqrt v10 ; bin: f2 0f 51 d5 + ; asm: sqrtsd %xmm2, %xmm5 + [-,%xmm5] v51 = sqrt v11 ; bin: f2 0f 51 ea + + ; asm: roundsd $0, %xmm5, %xmm4 + [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0b e5 00 + ; asm: roundsd $0, %xmm2, %xmm5 + [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0b ea 00 + ; asm: roundsd $0, %xmm5, %xmm2 + [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 + + ; asm: roundsd $1, %xmm5, %xmm4 + [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0b e5 01 + ; asm: roundsd $1, %xmm2, %xmm5 + [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0b ea 01 + ; asm: roundsd $1, %xmm5, %xmm2 + [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 + + ; asm: roundsd $2, %xmm5, %xmm4 + [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0b e5 02 + ; asm: roundsd $2, %xmm2, %xmm5 + [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0b ea 02 + ; asm: roundsd $2, %xmm5, %xmm2 + [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 + + ; asm: roundsd $3, %xmm5, %xmm4 + [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0b e5 03 + ; asm: roundsd $3, %xmm2, %xmm5 + [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0b ea 03 + ; asm: roundsd $3, %xmm5, %xmm2 + [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 + + ; Load/Store + + ; asm: movsd (%ecx), %xmm5 + [-,%xmm5] v100 = load.f64 v0 ; bin: heap_oob f2 0f 10 29 + ; asm: movsd (%esi), %xmm2 + [-,%xmm2] v101 = load.f64 v1 ; bin: heap_oob f2 0f 10 16 + ; asm: movsd 50(%ecx), %xmm5 + [-,%xmm5] v110 = load.f64 v0+50 ; bin: heap_oob f2 0f 10 69 32 + ; asm: movsd -50(%esi), %xmm2 + [-,%xmm2] v111 = load.f64 v1-50 ; bin: heap_oob f2 0f 10 56 ce + ; asm: movsd 10000(%ecx), %xmm5 + [-,%xmm5] v120 = load.f64 v0+10000 ; bin: heap_oob f2 0f 10 a9 00002710 + ; asm: movsd -10000(%esi), %xmm2 + [-,%xmm2] v121 = load.f64 v1-10000 ; bin: heap_oob f2 0f 10 96 ffffd8f0 + + ; asm: movsd %xmm5, (%ecx) + [-] store.f64 v100, v0 ; bin: heap_oob f2 0f 11 29 + ; asm: movsd %xmm2, (%esi) + [-] store.f64 v101, v1 ; bin: heap_oob f2 0f 11 16 + ; asm: movsd %xmm5, 50(%ecx) + [-] store.f64 v100, v0+50 ; bin: heap_oob f2 0f 11 69 32 + ; asm: movsd %xmm2, -50(%esi) + [-] store.f64 v101, v1-50 ; bin: heap_oob f2 0f 11 56 ce + ; asm: movsd %xmm5, 10000(%ecx) + [-] store.f64 v100, v0+10000 ; bin: heap_oob f2 0f 11 a9 00002710 + ; asm: movsd %xmm2, -10000(%esi) + [-] store.f64 v101, v1-10000 ; bin: heap_oob f2 0f 11 96 ffffd8f0 + + ; Spill / Fill. + + ; asm: movsd %xmm5, 1032(%esp) + [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 + ; asm: movsd %xmm2, 1032(%esp) + [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 0f 11 94 24 00000408 + + ; asm: movsd 1032(%esp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 + ; asm: movsd 1032(%esp), %xmm2 + [-,%xmm2] v211 = fill v201 ; bin: f2 0f 10 94 24 00000408 + + ; asm: movsd %xmm5, 1032(%esp) + regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 + ; asm: movsd 1032(%esp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 + + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization patterns. + + ; asm: ucomisd %xmm2, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3 + ; asm: ucomisd %xmm2, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2 + ; asm: ucomisd %xmm2, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3 + ; asm: ucomisd %xmm2, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2 + ; asm: ucomisd %xmm5, %xmm2 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2 + + ; asm: ucomisd %xmm2, %xmm5 + [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 0f 2e ea + ; asm: ucomisd %xmm2, %xmm5 + [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 0f 2e d5 + ; asm: ucomisd %xmm5, %xmm5 + [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed + + return +} + +function %cpuflags_float(f32 [%xmm0]) { +block0(v0: f32 [%xmm0]): + ; asm: ucomiss %xmm0, %xmm0 + [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 + + jump block1 + +block1: + ; asm: jnp block1 + brff ord v1, block1 ; bin: 7b fe + jump block2 + +block2: + ; asm: jp block1 + brff uno v1, block1 ; bin: 7a fc + jump block3 + +block3: + ; asm: jne block1 + brff one v1, block1 ; bin: 75 fa + jump block4 + +block4: + ; asm: je block1 + brff ueq v1, block1 ; bin: 74 f8 + jump block5 + +block5: + ; asm: ja block1 + brff gt v1, block1 ; bin: 77 f6 + jump block6 + +block6: + ; asm: jae block1 + brff ge v1, block1 ; bin: 73 f4 + jump block7 + +block7: + ; asm: jb block1 + brff ult v1, block1 ; bin: 72 f2 + jump block8 + +block8: + ; asm: jbe block1 + brff ule v1, block1 ; bin: 76 f0 + jump block9 + +block9: + ; asm: jp .+4; ud2 + trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b + ; asm: jnp .+4; ud2 + trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b + ; asm: je .+4; ud2 + trapff one v1, user0 ; bin: 74 02 user0 0f 0b + ; asm: jne .+4; ud2 + trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b + ; asm: jna .+4; ud2 + trapff gt v1, user0 ; bin: 76 02 user0 0f 0b + ; asm: jnae .+4; ud2 + trapff ge v1, user0 ; bin: 72 02 user0 0f 0b + ; asm: jnb .+4; ud2 + trapff ult v1, user0 ; bin: 73 02 user0 0f 0b + ; asm: jnbe .+4; ud2 + trapff ule v1, user0 ; bin: 77 02 user0 0f 0b + + ; asm: setnp %bl + [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 + ; asm: setp %bl + [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 + ; asm: setne %dl + [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 + ; asm: sete %dl + [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 + ; asm: seta %al + [-,%rax] v14 = trueff gt v1 ; bin: 0f 97 c0 + ; asm: setae %al + [-,%rax] v15 = trueff ge v1 ; bin: 0f 93 c0 + ; asm: setb %cl + [-,%rcx] v16 = trueff ult v1 ; bin: 0f 92 c1 + ; asm: setbe %cl + [-,%rcx] v17 = trueff ule v1 ; bin: 0f 96 c1 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/binary32.clif b/cranelift/filetests/filetests/isa/x86/binary32.clif new file mode 100644 index 0000000000..abe99ce0ea --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/binary32.clif @@ -0,0 +1,721 @@ +; binary emission of x86-32 code. +test binemit +set opt_level=speed_and_size +target i686 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32.clif | llvm-mc -show-encoding -triple=i386 +; + +function %I32() { + sig0 = () + fn0 = %foo() + + gv0 = symbol %some_gv + + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + ; asm: movl $1, %ecx + [-,%rcx] v1 = iconst.i32 1 ; bin: b9 00000001 + ; asm: movl $2, %esi + [-,%rsi] v2 = iconst.i32 2 ; bin: be 00000002 + + ; asm: movb $1, %cl + [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 + + ; Integer Register-Register Operations. + + ; asm: addl %esi, %ecx + [-,%rcx] v10 = iadd v1, v2 ; bin: 01 f1 + ; asm: addl %ecx, %esi + [-,%rsi] v11 = iadd v2, v1 ; bin: 01 ce + ; asm: subl %esi, %ecx + [-,%rcx] v12 = isub v1, v2 ; bin: 29 f1 + ; asm: subl %ecx, %esi + [-,%rsi] v13 = isub v2, v1 ; bin: 29 ce + + ; asm: andl %esi, %ecx + [-,%rcx] v14 = band v1, v2 ; bin: 21 f1 + ; asm: andl %ecx, %esi + [-,%rsi] v15 = band v2, v1 ; bin: 21 ce + ; asm: orl %esi, %ecx + [-,%rcx] v16 = bor v1, v2 ; bin: 09 f1 + ; asm: orl %ecx, %esi + [-,%rsi] v17 = bor v2, v1 ; bin: 09 ce + ; asm: xorl %esi, %ecx + [-,%rcx] v18 = bxor v1, v2 ; bin: 31 f1 + ; asm: xorl %ecx, %esi + [-,%rsi] v19 = bxor v2, v1 ; bin: 31 ce + + ; Dynamic shifts take the shift amount in %rcx. + + ; asm: shll %cl, %esi + [-,%rsi] v20 = ishl v2, v1 ; bin: d3 e6 + ; asm: shll %cl, %ecx + [-,%rcx] v21 = ishl v1, v1 ; bin: d3 e1 + ; asm: shrl %cl, %esi + [-,%rsi] v22 = ushr v2, v1 ; bin: d3 ee + ; asm: shrl %cl, %ecx + [-,%rcx] v23 = ushr v1, v1 ; bin: d3 e9 + ; asm: sarl %cl, %esi + [-,%rsi] v24 = sshr v2, v1 ; bin: d3 fe + ; asm: sarl %cl, %ecx + [-,%rcx] v25 = sshr v1, v1 ; bin: d3 f9 + ; asm: roll %cl, %esi + [-,%rsi] v26 = rotl v2, v1 ; bin: d3 c6 + ; asm: roll %cl, %ecx + [-,%rcx] v27 = rotl v1, v1 ; bin: d3 c1 + ; asm: rorl %cl, %esi + [-,%rsi] v28 = rotr v2, v1 ; bin: d3 ce + ; asm: rorl %cl, %ecx + [-,%rcx] v29 = rotr v1, v1 ; bin: d3 c9 + + ; Integer Register - Immediate 8-bit operations. + ; The 8-bit immediate is sign-extended. + + ; asm: addl $-128, %ecx + [-,%rcx] v30 = iadd_imm v1, -128 ; bin: 83 c1 80 + ; asm: addl $10, %esi + [-,%rsi] v31 = iadd_imm v2, 10 ; bin: 83 c6 0a + + ; asm: andl $-128, %ecx + [-,%rcx] v32 = band_imm v1, -128 ; bin: 83 e1 80 + ; asm: andl $10, %esi + [-,%rsi] v33 = band_imm v2, 10 ; bin: 83 e6 0a + ; asm: orl $-128, %ecx + [-,%rcx] v34 = bor_imm v1, -128 ; bin: 83 c9 80 + ; asm: orl $10, %esi + [-,%rsi] v35 = bor_imm v2, 10 ; bin: 83 ce 0a + ; asm: xorl $-128, %ecx + [-,%rcx] v36 = bxor_imm v1, -128 ; bin: 83 f1 80 + ; asm: xorl $10, %esi + [-,%rsi] v37 = bxor_imm v2, 10 ; bin: 83 f6 0a + + ; Integer Register - Immediate 32-bit operations. + + ; asm: addl $-128000, %ecx + [-,%rcx] v40 = iadd_imm v1, -128000 ; bin: 81 c1 fffe0c00 + ; asm: addl $1000000, %esi + [-,%rsi] v41 = iadd_imm v2, 1000000 ; bin: 81 c6 000f4240 + + ; asm: andl $-128000, %ecx + [-,%rcx] v42 = band_imm v1, -128000 ; bin: 81 e1 fffe0c00 + ; asm: andl $1000000, %esi + [-,%rsi] v43 = band_imm v2, 1000000 ; bin: 81 e6 000f4240 + ; asm: orl $-128000, %ecx + [-,%rcx] v44 = bor_imm v1, -128000 ; bin: 81 c9 fffe0c00 + ; asm: orl $1000000, %esi + [-,%rsi] v45 = bor_imm v2, 1000000 ; bin: 81 ce 000f4240 + ; asm: xorl $-128000, %ecx + [-,%rcx] v46 = bxor_imm v1, -128000 ; bin: 81 f1 fffe0c00 + ; asm: xorl $1000000, %esi + [-,%rsi] v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240 + + ; More arithmetic. + + ; asm: imull %esi, %ecx + [-,%rcx] v50 = imul v1, v2 ; bin: 0f af ce + ; asm: imull %ecx, %esi + [-,%rsi] v51 = imul v2, v1 ; bin: 0f af f1 + + ; asm: movl $1, %eax + [-,%rax] v52 = iconst.i32 1 ; bin: b8 00000001 + ; asm: movl $2, %edx + [-,%rdx] v53 = iconst.i32 2 ; bin: ba 00000002 + ; asm: idivl %ecx + [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1 ; bin: int_divz f7 f9 + ; asm: idivl %esi + [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2 ; bin: int_divz f7 fe + ; asm: divl %ecx + [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1 ; bin: int_divz f7 f1 + ; asm: divl %esi + [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2 ; bin: int_divz f7 f6 + + ; Register copies. + + ; asm: movl %esi, %ecx + [-,%rcx] v80 = copy v2 ; bin: 89 f1 + ; asm: movl %ecx, %esi + [-,%rsi] v81 = copy v1 ; bin: 89 ce + + ; Copy Special + ; asm: movl %esp, %ebp + copy_special %rsp -> %rbp ; bin: 89 e5 + ; asm: movl %ebp, %esp + copy_special %rbp -> %rsp ; bin: 89 ec + + + ; Load/Store instructions. + + ; Register indirect addressing with no displacement. + + ; asm: movl %ecx, (%esi) + store v1, v2 ; bin: heap_oob 89 0e + ; asm: movl %esi, (%ecx) + store v2, v1 ; bin: heap_oob 89 31 + ; asm: movw %cx, (%esi) + istore16 v1, v2 ; bin: heap_oob 66 89 0e + ; asm: movw %si, (%ecx) + istore16 v2, v1 ; bin: heap_oob 66 89 31 + ; asm: movb %cl, (%esi) + istore8 v1, v2 ; bin: heap_oob 88 0e + ; Can't store %sil in 32-bit mode (needs REX prefix). + + ; asm: movl (%ecx), %edi + [-,%rdi] v100 = load.i32 v1 ; bin: heap_oob 8b 39 + ; asm: movl (%esi), %edx + [-,%rdx] v101 = load.i32 v2 ; bin: heap_oob 8b 16 + ; asm: movzwl (%ecx), %edi + [-,%rdi] v102 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 + ; asm: movzwl (%esi), %edx + [-,%rdx] v103 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 + ; asm: movswl (%ecx), %edi + [-,%rdi] v104 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 + ; asm: movswl (%esi), %edx + [-,%rdx] v105 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 + ; asm: movzbl (%ecx), %edi + [-,%rdi] v106 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 + ; asm: movzbl (%esi), %edx + [-,%rdx] v107 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 + ; asm: movsbl (%ecx), %edi + [-,%rdi] v108 = sload8.i32 v1 ; bin: heap_oob 0f be 39 + ; asm: movsbl (%esi), %edx + [-,%rdx] v109 = sload8.i32 v2 ; bin: heap_oob 0f be 16 + + ; Register-indirect with 8-bit signed displacement. + + ; asm: movl %ecx, 100(%esi) + store v1, v2+100 ; bin: heap_oob 89 4e 64 + ; asm: movl %esi, -100(%ecx) + store v2, v1-100 ; bin: heap_oob 89 71 9c + ; asm: movw %cx, 100(%esi) + istore16 v1, v2+100 ; bin: heap_oob 66 89 4e 64 + ; asm: movw %si, -100(%ecx) + istore16 v2, v1-100 ; bin: heap_oob 66 89 71 9c + ; asm: movb %cl, 100(%esi) + istore8 v1, v2+100 ; bin: heap_oob 88 4e 64 + + ; asm: movl 50(%ecx), %edi + [-,%rdi] v110 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 + ; asm: movl -50(%esi), %edx + [-,%rdx] v111 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce + ; asm: movzwl 50(%ecx), %edi + [-,%rdi] v112 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 + ; asm: movzwl -50(%esi), %edx + [-,%rdx] v113 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce + ; asm: movswl 50(%ecx), %edi + [-,%rdi] v114 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 + ; asm: movswl -50(%esi), %edx + [-,%rdx] v115 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce + ; asm: movzbl 50(%ecx), %edi + [-,%rdi] v116 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 + ; asm: movzbl -50(%esi), %edx + [-,%rdx] v117 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce + ; asm: movsbl 50(%ecx), %edi + [-,%rdi] v118 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 + ; asm: movsbl -50(%esi), %edx + [-,%rdx] v119 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce + + ; Register-indirect with 32-bit signed displacement. + + ; asm: movl %ecx, 10000(%esi) + store v1, v2+10000 ; bin: heap_oob 89 8e 00002710 + ; asm: movl %esi, -10000(%ecx) + store v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 + ; asm: movw %cx, 10000(%esi) + istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 + ; asm: movw %si, -10000(%ecx) + istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 + ; asm: movb %cl, 10000(%esi) + istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 + + ; asm: movl 50000(%ecx), %edi + [-,%rdi] v120 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 + ; asm: movl -50000(%esi), %edx + [-,%rdx] v121 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 + ; asm: movzwl 50000(%ecx), %edi + [-,%rdi] v122 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 + ; asm: movzwl -50000(%esi), %edx + [-,%rdx] v123 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 + ; asm: movswl 50000(%ecx), %edi + [-,%rdi] v124 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 + ; asm: movswl -50000(%esi), %edx + [-,%rdx] v125 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 + ; asm: movzbl 50000(%ecx), %edi + [-,%rdi] v126 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 + ; asm: movzbl -50000(%esi), %edx + [-,%rdx] v127 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 + ; asm: movsbl 50000(%ecx), %edi + [-,%rdi] v128 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 + ; asm: movsbl -50000(%esi), %edx + [-,%rdx] v129 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 + + ; Bit-counting instructions. + + ; asm: popcntl %esi, %ecx + [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce + ; asm: popcntl %ecx, %esi + [-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1 + + ; asm: lzcntl %esi, %ecx + [-,%rcx] v202 = clz v2 ; bin: f3 0f bd ce + ; asm: lzcntl %ecx, %esi + [-,%rsi] v203 = clz v1 ; bin: f3 0f bd f1 + + ; asm: tzcntl %esi, %ecx + [-,%rcx] v204 = ctz v2 ; bin: f3 0f bc ce + ; asm: tzcntl %ecx, %esi + [-,%rsi] v205 = ctz v1 ; bin: f3 0f bc f1 + + ; Integer comparisons. + + ; asm: cmpl %esi, %ecx + ; asm: sete %bl + [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 + ; asm: cmpl %ecx, %esi + ; asm: sete %dl + [-,%rdx] v301 = icmp eq v2, v1 ; bin: 39 ce 0f 94 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setne %bl + [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 + ; asm: cmpl %ecx, %esi + ; asm: setne %dl + [-,%rdx] v303 = icmp ne v2, v1 ; bin: 39 ce 0f 95 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setl %bl + [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 + ; asm: cmpl %ecx, %esi + ; asm: setl %dl + [-,%rdx] v305 = icmp slt v2, v1 ; bin: 39 ce 0f 9c c2 + + ; asm: cmpl %esi, %ecx + ; asm: setge %bl + [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 + ; asm: cmpl %ecx, %esi + ; asm: setge %dl + [-,%rdx] v307 = icmp sge v2, v1 ; bin: 39 ce 0f 9d c2 + + ; asm: cmpl %esi, %ecx + ; asm: setg %bl + [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 + ; asm: cmpl %ecx, %esi + ; asm: setg %dl + [-,%rdx] v309 = icmp sgt v2, v1 ; bin: 39 ce 0f 9f c2 + + ; asm: cmpl %esi, %ecx + ; asm: setle %bl + [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 + ; asm: cmpl %ecx, %esi + ; asm: setle %dl + [-,%rdx] v311 = icmp sle v2, v1 ; bin: 39 ce 0f 9e c2 + + ; asm: cmpl %esi, %ecx + ; asm: setb %bl + [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 + ; asm: cmpl %ecx, %esi + ; asm: setb %dl + [-,%rdx] v313 = icmp ult v2, v1 ; bin: 39 ce 0f 92 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setae %bl + [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 + ; asm: cmpl %ecx, %esi + ; asm: setae %dl + [-,%rdx] v315 = icmp uge v2, v1 ; bin: 39 ce 0f 93 c2 + + ; asm: cmpl %esi, %ecx + ; asm: seta %bl + [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 + ; asm: cmpl %ecx, %esi + ; asm: seta %dl + [-,%rdx] v317 = icmp ugt v2, v1 ; bin: 39 ce 0f 97 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setbe %bl + [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 + ; asm: cmpl %ecx, %esi + ; asm: setbe %dl + [-,%rdx] v319 = icmp ule v2, v1 ; bin: 39 ce 0f 96 c2 + + ; Bool-to-int conversions. + + ; asm: movzbl %bl, %ecx + [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb + ; asm: movzbl %dl, %esi + [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 + + ; asm: call foo + call fn0() ; bin: stk_ovf e8 CallPCRel4(%foo-4) 00000000 + + ; asm: movl $0, %ecx + [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) 00000000 + ; asm: movl $0, %esi + [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) 00000000 + + ; asm: call *%ecx + call_indirect sig0, v400() ; bin: stk_ovf ff d1 + ; asm: call *%esi + call_indirect sig0, v401() ; bin: stk_ovf ff d6 + + ; asm: movl $0, %ecx + [-,%rcx] v450 = symbol_value.i32 gv0 ; bin: b9 Abs4(%some_gv) 00000000 + ; asm: movl $0, %esi + [-,%rsi] v451 = symbol_value.i32 gv0 ; bin: be Abs4(%some_gv) 00000000 + + ; Spill / Fill. + + ; asm: movl %ecx, 1032(%esp) + [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 + ; asm: movl %esi, 1032(%esp) + [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 + + ; asm: movl 1032(%esp), %ecx + [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 + ; asm: movl 1032(%esp), %esi + [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 + + ; asm: movl %ecx, 1032(%esp) + regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 + ; asm: movl 1032(%esp), %ecx + regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 + + ; Push and Pop + ; asm: pushl %ecx + x86_push v1 ; bin: stk_ovf 51 + ; asm: popl %ecx + [-,%rcx] v512 = x86_pop.i32 ; bin: 59 + + ; Adjust Stack Pointer Up + ; asm: addl $64, %esp + adjust_sp_up_imm 64 ; bin: 83 c4 40 + ; asm: addl $-64, %esp + adjust_sp_up_imm -64 ; bin: 83 c4 c0 + ; asm: addl $1024, %esp + adjust_sp_up_imm 1024 ; bin: 81 c4 00000400 + ; asm: addl $-1024, %esp + adjust_sp_up_imm -1024 ; bin: 81 c4 fffffc00 + ; asm: addl $2147483647, %esp + adjust_sp_up_imm 2147483647 ; bin: 81 c4 7fffffff + ; asm: addl $-2147483648, %esp + adjust_sp_up_imm -2147483648 ; bin: 81 c4 80000000 + + ; Adjust Stack Pointer Down + ; asm: subl %ecx, %esp + adjust_sp_down v1 ; bin: 29 cc + ; asm: subl %esi, %esp + adjust_sp_down v2 ; bin: 29 f4 + ; asm: addl $64, %esp + adjust_sp_down_imm 64 ; bin: 83 ec 40 + ; asm: addl $-64, %esp + adjust_sp_down_imm -64 ; bin: 83 ec c0 + ; asm: addl $1024, %esp + adjust_sp_down_imm 1024 ; bin: 81 ec 00000400 + ; asm: addl $-1024, %esp + adjust_sp_down_imm -1024 ; bin: 81 ec fffffc00 + ; asm: addl $2147483647, %esp + adjust_sp_down_imm 2147483647 ; bin: 81 ec 7fffffff + ; asm: addl $-2147483648, %esp + adjust_sp_down_imm -2147483648 ; bin: 81 ec 80000000 + + ; Shift immediates + ; asm: shll $2, %esi + [-,%rsi] v513 = ishl_imm v2, 2 ; bin: c1 e6 02 + ; asm: sarl $5, %esi + [-,%rsi] v514 = sshr_imm v2, 5 ; bin: c1 fe 05 + ; asm: shrl $8, %esi + [-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08 + + ; Rotate immediates + ; asm: rolq $12, %esi + [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: c1 c6 0c + ; asm: rorq $5, %esi + [-,%rsi] v5103 = rotr_imm v2, 5 ; bin: c1 ce 05 + + ; Load Complex + [-,%rax] v521 = iconst.i32 1 + [-,%rbx] v522 = iconst.i32 1 + ; asm: movl (%eax,%ebx,1), %ecx + [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 + ; asm: movl 1(%eax,%ebx,1), %ecx + [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 + ; asm: mov 0x100000(%eax,%ebx,1),%ecx + [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 + ; asm: movzbl (%eax,%ebx,1),%ecx + [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 + ; asm: movsbl (%eax,%ebx,1),%ecx + [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 + ; asm: movzwl (%eax,%ebx,1),%ecx + [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 + ; asm: movswl (%eax,%ebx,1),%ecx + [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 + + ; Store Complex + [-,%rcx] v601 = iconst.i32 1 + ; asm: mov %ecx,(%eax,%ebx,1) + store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 + ; asm: mov %ecx,0x1(%eax,%ebx,1) + store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 + ; asm: mov %ecx,0x100000(%eax,%ebx,1) + store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 + ; asm: mov %cx,(%eax,%ebx,1) + istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 + ; asm: mov %cl,(%eax,%ebx,1) + istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 + + ; Carry Addition + ; asm: addl %esi, %ecx + [-,%rcx,%rflags] v701, v702 = iadd_ifcout v1, v2 ; bin: 01 f1 + ; asm: adcl %esi, %ecx + [-,%rcx] v703 = iadd_ifcin v1, v2, v702 ; bin: 11 f1 + ; asm: adcl %esi, %ecx + [-,%rcx,%rflags] v704, v705 = iadd_ifcarry v1, v2, v702 ; bin: 11 f1 + + ; Borrow Subtraction + ; asm: subl %esi, %ecx + [-,%rcx,%rflags] v706, v707 = isub_ifbout v1, v2 ; bin: 29 f1 + ; asm: sbbl %esi, %ecx + [-,%rcx] v708 = isub_ifbin v1, v2, v707 ; bin: 19 f1 + ; asm: sbbl %esi, %ecx + [-,%rcx,%rflags] v709, v710 = isub_ifborrow v1, v2, v707 ; bin: 19 f1 + + ; asm: testl %ecx, %ecx + ; asm: je block1 + brz v1, block1 ; bin: 85 c9 74 0e + fallthrough block3 + +block3: + ; asm: testl %esi, %esi + ; asm: je block1 + brz v2, block1 ; bin: 85 f6 74 0a + fallthrough block4 + +block4: + ; asm: testl %ecx, %ecx + ; asm: jne block1 + brnz v1, block1 ; bin: 85 c9 75 06 + fallthrough block5 + +block5: + ; asm: testl %esi, %esi + ; asm: jne block1 + brnz v2, block1 ; bin: 85 f6 75 02 + + ; asm: jmp block2 + jump block2 ; bin: eb 01 + + ; asm: block1: +block1: + ; asm: ret + return ; bin: c3 + + ; asm: block2: +block2: + trap user0 ; bin: user0 0f 0b +} + +; Special branch encodings only for I32 mode. +function %special_branches() { +block0: + [-,%rcx] v1 = iconst.i32 1 + [-,%rsi] v2 = iconst.i32 2 + [-,%rdi] v3 = icmp eq v1, v2 + [-,%rbx] v4 = icmp ugt v1, v2 + + ; asm: testl $0xff, %edi + ; asm: je block1 + brz v3, block1 ; bin: f7 c7 000000ff 0f 84 00000015 + fallthrough block2 + +block2: + ; asm: testb %bl, %bl + ; asm: je block1 + brz v4, block1 ; bin: 84 db 74 11 + fallthrough block3 + +block3: + ; asm: testl $0xff, %edi + ; asm: jne block1 + brnz v3, block1 ; bin: f7 c7 000000ff 0f 85 00000005 + fallthrough block4 + +block4: + ; asm: testb %bl, %bl + ; asm: jne block1 + brnz v4, block1 ; bin: 84 db 75 01 + fallthrough block5 + +block5: + return + +block1: + return +} + +; CPU flag instructions. +function %cpu_flags() { +block0: + [-,%rcx] v1 = iconst.i32 1 + [-,%rsi] v2 = iconst.i32 2 + jump block1 + +block1: + ; asm: cmpl %esi, %ecx + [-,%rflags] v10 = ifcmp v1, v2 ; bin: 39 f1 + ; asm: cmpl %ecx, %esi + [-,%rflags] v11 = ifcmp v2, v1 ; bin: 39 ce + + ; asm: je block1 + brif eq v11, block1 ; bin: 74 fa + jump block2 + +block2: + ; asm: jne block1 + brif ne v11, block1 ; bin: 75 f8 + jump block3 + +block3: + ; asm: jl block1 + brif slt v11, block1 ; bin: 7c f6 + jump block4 + +block4: + ; asm: jge block1 + brif sge v11, block1 ; bin: 7d f4 + jump block5 + +block5: + ; asm: jg block1 + brif sgt v11, block1 ; bin: 7f f2 + jump block6 + +block6: + ; asm: jle block1 + brif sle v11, block1 ; bin: 7e f0 + jump block7 + +block7: + ; asm: jb block1 + brif ult v11, block1 ; bin: 72 ee + jump block8 + +block8: + ; asm: jae block1 + brif uge v11, block1 ; bin: 73 ec + jump block9 + +block9: + ; asm: ja block1 + brif ugt v11, block1 ; bin: 77 ea + jump block10 + +block10: + ; asm: jbe block1 + brif ule v11, block1 ; bin: 76 e8 + jump block11 + +block11: + + ; asm: sete %bl + [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 + ; asm: setne %bl + [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 + ; asm: setl %dl + [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 + ; asm: setge %dl + [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 + ; asm: setg %bl + [-,%rbx] v24 = trueif sgt v11 ; bin: 0f 9f c3 + ; asm: setle %bl + [-,%rbx] v25 = trueif sle v11 ; bin: 0f 9e c3 + ; asm: setb %dl + [-,%rdx] v26 = trueif ult v11 ; bin: 0f 92 c2 + ; asm: setae %dl + [-,%rdx] v27 = trueif uge v11 ; bin: 0f 93 c2 + ; asm: seta %bl + [-,%rbx] v28 = trueif ugt v11 ; bin: 0f 97 c3 + ; asm: setbe %bl + [-,%rbx] v29 = trueif ule v11 ; bin: 0f 96 c3 + + ; The trapif instructions are encoded as macros: a conditional jump over a ud2. + ; asm: jne .+4; ud2 + trapif eq v11, user0 ; bin: 75 02 user0 0f 0b + ; asm: je .+4; ud2 + trapif ne v11, user0 ; bin: 74 02 user0 0f 0b + ; asm: jnl .+4; ud2 + trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b + ; asm: jnge .+4; ud2 + trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b + ; asm: jng .+4; ud2 + trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b + ; asm: jnle .+4; ud2 + trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b + ; asm: jnb .+4; ud2 + trapif ult v11, user0 ; bin: 73 02 user0 0f 0b + ; asm: jnae .+4; ud2 + trapif uge v11, user0 ; bin: 72 02 user0 0f 0b + ; asm: jna .+4; ud2 + trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b + ; asm: jnbe .+4; ud2 + trapif ule v11, user0 ; bin: 77 02 user0 0f 0b + ; asm: jo .+4; ud2 + trapif of v11, user0 ; bin: 71 02 user0 0f 0b + ; asm: jno .+4; ud2 + trapif nof v11, user0 ; bin: 70 02 user0 0f 0b + + ; Stack check. + ; asm: cmpl %esp, %ecx + [-,%rflags] v40 = ifcmp_sp v1 ; bin: 39 e1 + ; asm: cmpl %esp, %esi + [-,%rflags] v41 = ifcmp_sp v2 ; bin: 39 e6 + + ; asm: cmpl $-100, %ecx + [-,%rflags] v42 = ifcmp_imm v1, -100 ; bin: 83 f9 9c + ; asm: cmpl $100, %esi + [-,%rflags] v43 = ifcmp_imm v2, 100 ; bin: 83 fe 64 + + ; asm: cmpl $-10000, %ecx + [-,%rflags] v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 + ; asm: cmpl $10000, %esi + [-,%rflags] v45 = ifcmp_imm v2, 10000 ; bin: 81 fe 00002710 + + return +} + +; Tests for i32/i8 conversion instructions. +function %I32_I8() { +block0: + [-,%rcx] v1 = iconst.i32 1 + + [-,%rcx] v11 = ireduce.i8 v1 ; bin: + + ; asm: movsbl %cl, %esi + [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 + + ; asm: movzbl %cl, %esi + [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 + + trap user0 ; bin: user0 0f 0b +} + +; Tests for i32/i16 conversion instructions. +function %I32_I16() { +block0: + [-,%rcx] v1 = iconst.i32 1 + + [-,%rcx] v11 = ireduce.i16 v1 ; bin: + + ; asm: movswl %cx, %esi + [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 + + ; asm: movzwl %cx, %esi + [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 + + trap user0 ; bin: user0 0f 0b +} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif new file mode 100644 index 0000000000..2ec733b2c0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/binary64-float.clif @@ -0,0 +1,638 @@ +; Binary emission of 64-bit floating point code. +test binemit +set opt_level=speed_and_size +target x86_64 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-float.clif | llvm-mc -show-encoding -triple=x86_64 +; + +function %F32() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + [-,%r11] v0 = iconst.i32 1 + [-,%rsi] v1 = iconst.i32 2 + [-,%rax] v2 = iconst.i64 11 + [-,%r14] v3 = iconst.i64 12 + [-,%r13] v4 = iconst.i64 13 + + ; asm: cvtsi2ssl %r11d, %xmm5 + [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 41 0f 2a eb + ; asm: cvtsi2ssl %esi, %xmm10 + [-,%xmm10] v11 = fcvt_from_sint.f32 v1 ; bin: f3 44 0f 2a d6 + + ; asm: cvtsi2ssq %rax, %xmm5 + [-,%xmm5] v12 = fcvt_from_sint.f32 v2 ; bin: f3 48 0f 2a e8 + ; asm: cvtsi2ssq %r14, %xmm10 + [-,%xmm10] v13 = fcvt_from_sint.f32 v3 ; bin: f3 4d 0f 2a d6 + + ; asm: cvtss2sd %xmm10, %xmm5 + [-,%xmm5] v14 = fpromote.f64 v11 ; bin: f3 41 0f 5a ea + ; asm: cvtss2sd %xmm5, %xmm10 + [-,%xmm10] v15 = fpromote.f64 v10 ; bin: f3 44 0f 5a d5 + + ; asm: movd %r11d, %xmm5 + [-,%xmm5] v16 = bitcast.f32 v0 ; bin: 66 41 0f 6e eb + ; asm: movd %esi, %xmm10 + [-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6 + + ; asm: movd %xmm5, %ecx + [-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9 + ; asm: movd %xmm10, %esi + [-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6 + + ; Binary arithmetic. + + ; asm: addss %xmm10, %xmm5 + [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 41 0f 58 ea + ; asm: addss %xmm5, %xmm10 + [-,%xmm10] v21 = fadd v11, v10 ; bin: f3 44 0f 58 d5 + + ; asm: subss %xmm10, %xmm5 + [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 41 0f 5c ea + ; asm: subss %xmm5, %xmm10 + [-,%xmm10] v23 = fsub v11, v10 ; bin: f3 44 0f 5c d5 + + ; asm: mulss %xmm10, %xmm5 + [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 41 0f 59 ea + ; asm: mulss %xmm5, %xmm10 + [-,%xmm10] v25 = fmul v11, v10 ; bin: f3 44 0f 59 d5 + + ; asm: divss %xmm10, %xmm5 + [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 41 0f 5e ea + ; asm: divss %xmm5, %xmm10 + [-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5 + + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm10, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea + ; asm: andps %xmm5, %xmm10 + [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 + + ; asm: andnps %xmm10, %xmm5 + [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea + ; asm: andnps %xmm5, %xmm10 + [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 + + ; asm: orps %xmm10, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea + ; asm: orps %xmm5, %xmm10 + [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 + + ; asm: xorps %xmm10, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea + ; asm: xorps %xmm5, %xmm10 + [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 + + ; asm: movaps %xmm10, %xmm5 + [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea + ; asm: movaps %xmm5, %xmm10 + [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 + + ; Copy to SSA + + ; asm: movsd %xmm0, %xmm15 + [-,%xmm15] v400 = copy_to_ssa.f64 %xmm0 ; bin: f2 44 0f 10 f8 + ; asm: movsd %xmm15, %xmm0 + [-,%xmm0] v401 = copy_to_ssa.f64 %xmm15 ; bin: f2 41 0f 10 c7 + ; asm: movsd %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. + [-,%xmm6] v402 = copy_to_ssa.f64 %xmm7 ; bin: f2 40 0f 10 f7 + ; asm: movsd %xmm11, %xmm14 + [-,%xmm14] v403 = copy_to_ssa.f64 %xmm11 ; bin: f2 45 0f 10 f3 + + ; asm: movss %xmm0, %xmm15 + [-,%xmm15] v404 = copy_to_ssa.f32 %xmm0 ; bin: f3 44 0f 10 f8 + ; asm: movss %xmm15, %xmm0 + [-,%xmm0] v405 = copy_to_ssa.f32 %xmm15 ; bin: f3 41 0f 10 c7 + ; asm: movss %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. + [-,%xmm6] v406 = copy_to_ssa.f32 %xmm7 ; bin: f3 40 0f 10 f7 + ; asm: movss %xmm11, %xmm14 + [-,%xmm14] v407 = copy_to_ssa.f32 %xmm11 ; bin: f3 45 0f 10 f3 + + ; Convert float to int. + + ; asm: cvttss2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd + ; asm: cvttss2si %xmm10, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2 + + ; asm: cvttss2si %xmm5, %rcx + [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd + ; asm: cvttss2si %xmm10, %rsi + [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2 + + ; Min/max. + + ; asm: minss %xmm10, %xmm5 + [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f3 41 0f 5d ea + ; asm: minss %xmm5, %xmm10 + [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f3 44 0f 5d d5 + ; asm: maxss %xmm10, %xmm5 + [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f3 41 0f 5f ea + ; asm: maxss %xmm5, %xmm10 + [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f3 44 0f 5f d5 + + ; Unary arithmetic. + + ; asm: sqrtss %xmm5, %xmm10 + [-,%xmm10] v50 = sqrt v10 ; bin: f3 44 0f 51 d5 + ; asm: sqrtss %xmm10, %xmm5 + [-,%xmm5] v51 = sqrt v11 ; bin: f3 41 0f 51 ea + + ; asm: roundss $0, %xmm5, %xmm10 + [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0a d5 00 + ; asm: roundss $0, %xmm10, %xmm5 + [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0a ea 00 + ; asm: roundss $0, %xmm5, %xmm2 + [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 + + ; asm: roundss $1, %xmm5, %xmm10 + [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0a d5 01 + ; asm: roundss $1, %xmm10, %xmm5 + [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0a ea 01 + ; asm: roundss $1, %xmm5, %xmm2 + [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 + + ; asm: roundss $2, %xmm5, %xmm10 + [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0a d5 02 + ; asm: roundss $2, %xmm10, %xmm5 + [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0a ea 02 + ; asm: roundss $2, %xmm5, %xmm2 + [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 + + ; asm: roundss $3, %xmm5, %xmm10 + [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0a d5 03 + ; asm: roundss $3, %xmm10, %xmm5 + [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0a ea 03 + ; asm: roundss $3, %xmm5, %xmm2 + [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 + + ; Load/Store + + ; asm: movss (%r14), %xmm5 + [-,%xmm5] v100 = load.f32 v3 ; bin: heap_oob f3 41 0f 10 2e + ; asm: movss (%rax), %xmm10 + [-,%xmm10] v101 = load.f32 v2 ; bin: heap_oob f3 44 0f 10 10 + ; asm: movss 50(%r14), %xmm5 + [-,%xmm5] v110 = load.f32 v3+50 ; bin: heap_oob f3 41 0f 10 6e 32 + ; asm: movss -50(%rax), %xmm10 + [-,%xmm10] v111 = load.f32 v2-50 ; bin: heap_oob f3 44 0f 10 50 ce + ; asm: movss 10000(%r14), %xmm5 + [-,%xmm5] v120 = load.f32 v3+10000 ; bin: heap_oob f3 41 0f 10 ae 00002710 + ; asm: movss -10000(%rax), %xmm10 + [-,%xmm10] v121 = load.f32 v2-10000 ; bin: heap_oob f3 44 0f 10 90 ffffd8f0 + + ; asm: movss %xmm5, (%r14) + [-] store.f32 v100, v3 ; bin: heap_oob f3 41 0f 11 2e + ; asm: movss %xmm10, (%rax) + [-] store.f32 v101, v2 ; bin: heap_oob f3 44 0f 11 10 + ; asm: movss %xmm5, (%r13) + [-] store.f32 v100, v4 ; bin: heap_oob f3 41 0f 11 6d 00 + ; asm: movss %xmm10, (%r13) + [-] store.f32 v101, v4 ; bin: heap_oob f3 45 0f 11 55 00 + ; asm: movss %xmm5, 50(%r14) + [-] store.f32 v100, v3+50 ; bin: heap_oob f3 41 0f 11 6e 32 + ; asm: movss %xmm10, -50(%rax) + [-] store.f32 v101, v2-50 ; bin: heap_oob f3 44 0f 11 50 ce + ; asm: movss %xmm5, 10000(%r14) + [-] store.f32 v100, v3+10000 ; bin: heap_oob f3 41 0f 11 ae 00002710 + ; asm: movss %xmm10, -10000(%rax) + [-] store.f32 v101, v2-10000 ; bin: heap_oob f3 44 0f 11 90 ffffd8f0 + + ; Spill / Fill. + + ; asm: movss %xmm5, 1032(%rsp) + [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 + ; asm: movss %xmm10, 1032(%rsp) + [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 44 0f 11 94 24 00000408 + + ; asm: movss 1032(%rsp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 + ; asm: movss 1032(%rsp), %xmm10 + [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 10 94 24 00000408 + + ; asm: movss %xmm5, 1032(%rsp) + regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 + ; asm: movss 1032(%rsp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 + + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization patterns. + + ; asm: ucomiss %xmm10, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3 + ; asm: ucomiss %xmm10, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2 + ; asm: ucomiss %xmm10, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3 + ; asm: ucomiss %xmm10, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2 + ; asm: ucomiss %xmm5, %xmm10 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2 + + ; asm: ucomiss %xmm10, %xmm5 + [-,%rflags] v310 = ffcmp v10, v11 ; bin: 41 0f 2e ea + ; asm: ucomiss %xmm10, %xmm5 + [-,%rflags] v311 = ffcmp v11, v10 ; bin: 44 0f 2e d5 + ; asm: ucomiss %xmm5, %xmm5 + [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed + + + ; Load/Store Complex + + [-,%rax] v350 = iconst.i64 1 + [-,%rbx] v351 = iconst.i64 2 + + ; asm: movss (%rax,%rbx,1),%xmm5 + [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 + ; asm: movss 0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 + ; asm: movss -0x32(%rax,%rbx,1),%xmm10 + [-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce + ; asm: 0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 + ; asm: -0x2710(%rax,%rbx,1),%xmm10 + [-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0 + + ; asm: movsd %xmm5, (%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 + ; asm: movsd %xmm5, 50(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 + ; asm: movsd %xmm10, -50(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce + ; asm: movsd %xmm5, 10000(%rax,%rbx,1) + [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 + ; asm: movsd %xmm10, -10000(%rax,%rbx,1) + [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0 + + return +} + +function %F64() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + [-,%r11] v0 = iconst.i32 1 + [-,%rsi] v1 = iconst.i32 2 + [-,%rax] v2 = iconst.i64 11 + [-,%r14] v3 = iconst.i64 12 + [-,%r13] v4 = iconst.i64 13 + + ; asm: cvtsi2sdl %r11d, %xmm5 + [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 41 0f 2a eb + ; asm: cvtsi2sdl %esi, %xmm10 + [-,%xmm10] v11 = fcvt_from_sint.f64 v1 ; bin: f2 44 0f 2a d6 + + ; asm: cvtsi2sdq %rax, %xmm5 + [-,%xmm5] v12 = fcvt_from_sint.f64 v2 ; bin: f2 48 0f 2a e8 + ; asm: cvtsi2sdq %r14, %xmm10 + [-,%xmm10] v13 = fcvt_from_sint.f64 v3 ; bin: f2 4d 0f 2a d6 + + ; asm: cvtsd2ss %xmm10, %xmm5 + [-,%xmm5] v14 = fdemote.f32 v11 ; bin: f2 41 0f 5a ea + ; asm: cvtsd2ss %xmm5, %xmm10 + [-,%xmm10] v15 = fdemote.f32 v10 ; bin: f2 44 0f 5a d5 + + ; asm: movq %rax, %xmm5 + [-,%xmm5] v16 = bitcast.f64 v2 ; bin: 66 48 0f 6e e8 + ; asm: movq %r14, %xmm10 + [-,%xmm10] v17 = bitcast.f64 v3 ; bin: 66 4d 0f 6e d6 + + ; asm: movq %xmm5, %rcx + [-,%rcx] v18 = bitcast.i64 v10 ; bin: 66 48 0f 7e e9 + ; asm: movq %xmm10, %rsi + [-,%rsi] v19 = bitcast.i64 v11 ; bin: 66 4c 0f 7e d6 + + ; Binary arithmetic. + + ; asm: addsd %xmm10, %xmm5 + [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea + ; asm: addsd %xmm5, %xmm10 + [-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5 + + ; asm: subsd %xmm10, %xmm5 + [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea + ; asm: subsd %xmm5, %xmm10 + [-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5 + + ; asm: mulsd %xmm10, %xmm5 + [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea + ; asm: mulsd %xmm5, %xmm10 + [-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5 + + ; asm: divsd %xmm10, %xmm5 + [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea + ; asm: divsd %xmm5, %xmm10 + [-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5 + + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm10, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea + ; asm: andps %xmm5, %xmm10 + [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 + + ; asm: andnps %xmm10, %xmm5 + [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea + ; asm: andnps %xmm5, %xmm10 + [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 + + ; asm: orps %xmm10, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea + ; asm: orps %xmm5, %xmm10 + [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 + + ; asm: xorps %xmm10, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea + ; asm: xorps %xmm5, %xmm10 + [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 + + ; asm: movaps %xmm10, %xmm5 + [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea + ; asm: movaps %xmm5, %xmm10 + [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 + + ; Convert float to int. + + ; asm: cvttsd2si %xmm5, %ecx + [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd + ; asm: cvttsd2si %xmm10, %esi + [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2 + + ; asm: cvttsd2si %xmm5, %rcx + [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd + ; asm: cvttsd2si %xmm10, %rsi + [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2 + + ; Min/max. + + ; asm: minsd %xmm10, %xmm5 + [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f2 41 0f 5d ea + ; asm: minsd %xmm5, %xmm10 + [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f2 44 0f 5d d5 + ; asm: maxsd %xmm10, %xmm5 + [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f2 41 0f 5f ea + ; asm: maxsd %xmm5, %xmm10 + [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f2 44 0f 5f d5 + + ; Unary arithmetic. + + ; asm: sqrtsd %xmm5, %xmm10 + [-,%xmm10] v50 = sqrt v10 ; bin: f2 44 0f 51 d5 + ; asm: sqrtsd %xmm10, %xmm5 + [-,%xmm5] v51 = sqrt v11 ; bin: f2 41 0f 51 ea + + ; asm: roundsd $0, %xmm5, %xmm10 + [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0b d5 00 + ; asm: roundsd $0, %xmm10, %xmm5 + [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0b ea 00 + ; asm: roundsd $0, %xmm5, %xmm2 + [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 + + ; asm: roundsd $1, %xmm5, %xmm10 + [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0b d5 01 + ; asm: roundsd $1, %xmm10, %xmm5 + [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0b ea 01 + ; asm: roundsd $1, %xmm5, %xmm2 + [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 + + ; asm: roundsd $2, %xmm5, %xmm10 + [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0b d5 02 + ; asm: roundsd $2, %xmm10, %xmm5 + [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0b ea 02 + ; asm: roundsd $2, %xmm5, %xmm2 + [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 + + ; asm: roundsd $3, %xmm5, %xmm10 + [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0b d5 03 + ; asm: roundsd $3, %xmm10, %xmm5 + [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0b ea 03 + ; asm: roundsd $3, %xmm5, %xmm2 + [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 + + ; Load/Store + + ; asm: movsd (%r14), %xmm5 + [-,%xmm5] v100 = load.f64 v3 ; bin: heap_oob f2 41 0f 10 2e + ; asm: movsd (%rax), %xmm10 + [-,%xmm10] v101 = load.f64 v2 ; bin: heap_oob f2 44 0f 10 10 + ; asm: movsd 50(%r14), %xmm5 + [-,%xmm5] v110 = load.f64 v3+50 ; bin: heap_oob f2 41 0f 10 6e 32 + ; asm: movsd -50(%rax), %xmm10 + [-,%xmm10] v111 = load.f64 v2-50 ; bin: heap_oob f2 44 0f 10 50 ce + ; asm: movsd 10000(%r14), %xmm5 + [-,%xmm5] v120 = load.f64 v3+10000 ; bin: heap_oob f2 41 0f 10 ae 00002710 + ; asm: movsd -10000(%rax), %xmm10 + [-,%xmm10] v121 = load.f64 v2-10000 ; bin: heap_oob f2 44 0f 10 90 ffffd8f0 + + ; asm: movsd %xmm5, (%r14) + [-] store.f64 v100, v3 ; bin: heap_oob f2 41 0f 11 2e + ; asm: movsd %xmm10, (%rax) + [-] store.f64 v101, v2 ; bin: heap_oob f2 44 0f 11 10 + ; asm: movsd %xmm5, (%r13) + [-] store.f64 v100, v4 ; bin: heap_oob f2 41 0f 11 6d 00 + ; asm: movsd %xmm10, (%r13) + [-] store.f64 v101, v4 ; bin: heap_oob f2 45 0f 11 55 00 + ; asm: movsd %xmm5, 50(%r14) + [-] store.f64 v100, v3+50 ; bin: heap_oob f2 41 0f 11 6e 32 + ; asm: movsd %xmm10, -50(%rax) + [-] store.f64 v101, v2-50 ; bin: heap_oob f2 44 0f 11 50 ce + ; asm: movsd %xmm5, 10000(%r14) + [-] store.f64 v100, v3+10000 ; bin: heap_oob f2 41 0f 11 ae 00002710 + ; asm: movsd %xmm10, -10000(%rax) + [-] store.f64 v101, v2-10000 ; bin: heap_oob f2 44 0f 11 90 ffffd8f0 + + ; Spill / Fill. + + ; asm: movsd %xmm5, 1032(%rsp) + [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 + ; asm: movsd %xmm10, 1032(%rsp) + [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 44 0f 11 94 24 00000408 + + ; asm: movsd 1032(%rsp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 + ; asm: movsd 1032(%rsp), %xmm10 + [-,%xmm10] v211 = fill v201 ; bin: f2 44 0f 10 94 24 00000408 + + ; asm: movsd %xmm5, 1032(%rsp) + regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 + ; asm: movsd 1032(%rsp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 + + ; Comparisons. + ; + ; Only `supported_floatccs` are tested here. Others are handled by + ; legalization patterns. + + ; asm: ucomisd %xmm10, %xmm5 + ; asm: setnp %bl + [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: setp %bl + [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3 + ; asm: ucomisd %xmm10, %xmm5 + ; asm: setne %dl + [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: sete %dl + [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2 + ; asm: ucomisd %xmm10, %xmm5 + ; asm: seta %bl + [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: setae %bl + [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3 + ; asm: ucomisd %xmm10, %xmm5 + ; asm: setb %dl + [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2 + ; asm: ucomisd %xmm5, %xmm10 + ; asm: setbe %dl + [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2 + + ; asm: ucomisd %xmm10, %xmm5 + [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 41 0f 2e ea + ; asm: ucomisd %xmm10, %xmm5 + [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 44 0f 2e d5 + ; asm: ucomisd %xmm5, %xmm5 + [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed + + ; Load/Store Complex + + [-,%rax] v350 = iconst.i64 1 + [-,%rbx] v351 = iconst.i64 2 + ; asm: movsd (%rax,%rbx,1),%xmm5 + [-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18 + ; asm: movsd 0x32(%rax,%rbx,1),%xmm5 + [-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32 + ; asm: movsd -0x32(%rax,%rbx,1),%xmm10 + [-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce + ; asm: movsd 0x2710(%rax,%rbx,1),%xmm5 + [-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710 + ; asm: movsd -0x2710(%rax,%rbx,1),%xmm10 + [-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0 + + ; asm: movsd %xmm5, (%rax,%rbx,1) + [-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18 + ; asm: movsd %xmm5, 50(%rax,%rbx,1) + [-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32 + ; asm: movsd %xmm10, -50(%rax,%rbx,1) + [-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce + ; asm: movsd %xmm5, 10000(%rax,%rbx,1) + [-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710 + ; asm: movsd %xmm10, -10000(%rax,%rbx,1) + [-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0 + + return +} + +function %cpuflags_float(f32 [%xmm0]) { +block0(v0: f32 [%xmm0]): + ; asm: ucomiss %xmm0, %xmm0 + [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 + + jump block1 + +block1: + ; asm: jnp block1 + brff ord v1, block1 ; bin: 7b fe + jump block2 + +block2: + ; asm: jp block1 + brff uno v1, block1 ; bin: 7a fc + jump block3 + +block3: + ; asm: jne block1 + brff one v1, block1 ; bin: 75 fa + jump block4 + +block4: + ; asm: je block1 + brff ueq v1, block1 ; bin: 74 f8 + jump block5 + +block5: + ; asm: ja block1 + brff gt v1, block1 ; bin: 77 f6 + jump block6 + +block6: + ; asm: jae block1 + brff ge v1, block1 ; bin: 73 f4 + jump block7 + +block7: + ; asm: jb block1 + brff ult v1, block1 ; bin: 72 f2 + jump block8 + +block8: + ; asm: jbe block1 + brff ule v1, block1 ; bin: 76 f0 + jump block9 + +block9: + ; asm: jp .+4; ud2 + trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b + ; asm: jnp .+4; ud2 + trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b + ; asm: je .+4; ud2 + trapff one v1, user0 ; bin: 74 02 user0 0f 0b + ; asm: jne .+4; ud2 + trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b + ; asm: jna .+4; ud2 + trapff gt v1, user0 ; bin: 76 02 user0 0f 0b + ; asm: jnae .+4; ud2 + trapff ge v1, user0 ; bin: 72 02 user0 0f 0b + ; asm: jnb .+4; ud2 + trapff ult v1, user0 ; bin: 73 02 user0 0f 0b + ; asm: jnbe .+4; ud2 + trapff ule v1, user0 ; bin: 77 02 user0 0f 0b + + ; asm: setnp %bl + [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 + ; asm: setp %bl + [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 + ; asm: setne %dl + [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 + ; asm: sete %dl + [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 + ; asm: seta %r10b + [-,%r10] v14 = trueff gt v1 ; bin: 41 0f 97 c2 + ; asm: setae %r10b + [-,%r10] v15 = trueff ge v1 ; bin: 41 0f 93 c2 + ; asm: setb %r14b + [-,%r14] v16 = trueff ult v1 ; bin: 41 0f 92 c6 + ; asm: setbe %r14b + [-,%r14] v17 = trueff ule v1 ; bin: 41 0f 96 c6 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif new file mode 100644 index 0000000000..5a2443adce --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif @@ -0,0 +1,83 @@ +; binary emission of 64-bit code. +test binemit +set opt_level=speed_and_size +set is_pic +target x86_64 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-pic.clif | llvm-mc -show-encoding -triple=x86_64 +; + +; Tests for i64 instructions. +function %I64() { + sig0 = () + fn0 = %foo() + fn1 = colocated %bar() + + gv0 = symbol %some_gv + gv1 = symbol colocated %some_gv + + ; Use incoming_arg stack slots because they won't be relocated by the frame + ; layout. + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + + ; Colocated functions. + + ; asm: call foo + call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 + + ; asm: lea 0x0(%rip), %rax + [-,%rax] v0 = func_addr.i64 fn1 ; bin: 48 8d 05 PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %rsi + [-,%rsi] v1 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %r10 + [-,%r10] v2 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 + + ; asm: call *%rax + call_indirect sig0, v0() ; bin: stk_ovf ff d0 + ; asm: call *%rsi + call_indirect sig0, v1() ; bin: stk_ovf ff d6 + ; asm: call *%r10 + call_indirect sig0, v2() ; bin: stk_ovf 41 ff d2 + + ; Non-colocated functions. + + ; asm: call foo@PLT + call fn0() ; bin: stk_ovf e8 CallPLTRel4(%foo-4) 00000000 + + ; asm: mov 0x0(%rip), %rax + [-,%rax] v100 = func_addr.i64 fn0 ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000 + ; asm: mov 0x0(%rip), %rsi + [-,%rsi] v101 = func_addr.i64 fn0 ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000 + ; asm: mov 0x0(%rip), %r10 + [-,%r10] v102 = func_addr.i64 fn0 ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000 + + ; asm: call *%rax + call_indirect sig0, v100() ; bin: stk_ovf ff d0 + ; asm: call *%rsi + call_indirect sig0, v101() ; bin: stk_ovf ff d6 + ; asm: call *%r10 + call_indirect sig0, v102() ; bin: stk_ovf 41 ff d2 + + ; asm: mov 0x0(%rip), %rcx + [-,%rcx] v3 = symbol_value.i64 gv0 ; bin: 48 8b 0d GOTPCRel4(%some_gv-4) 00000000 + ; asm: mov 0x0(%rip), %rsi + [-,%rsi] v4 = symbol_value.i64 gv0 ; bin: 48 8b 35 GOTPCRel4(%some_gv-4) 00000000 + ; asm: mov 0x0(%rip), %r10 + [-,%r10] v5 = symbol_value.i64 gv0 ; bin: 4c 8b 15 GOTPCRel4(%some_gv-4) 00000000 + + ; asm: lea 0x0(%rip), %rcx + [-,%rcx] v6 = symbol_value.i64 gv1 ; bin: 48 8d 0d PCRel4(%some_gv-4) 00000000 + ; asm: lea 0x0(%rip), %rsi + [-,%rsi] v7 = symbol_value.i64 gv1 ; bin: 48 8d 35 PCRel4(%some_gv-4) 00000000 + ; asm: lea 0x0(%rip), %r10 + [-,%r10] v8 = symbol_value.i64 gv1 ; bin: 4c 8d 15 PCRel4(%some_gv-4) 00000000 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-run.clif b/cranelift/filetests/filetests/isa/x86/binary64-run.clif new file mode 100644 index 0000000000..6e6e1071c5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/binary64-run.clif @@ -0,0 +1,10 @@ +test run +target x86_64 + +; this verifies that returning b64 immediates does not result in a segmentation fault, see https://github.com/bytecodealliance/cranelift/issues/911 +function %test_b64() -> b64 { +block0: +[-, %r10] v0 = bconst.b64 true + return v0 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif new file mode 100644 index 0000000000..ab5d516b40 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/binary64.clif @@ -0,0 +1,1692 @@ +; binary emission of x86-64 code. +test binemit +set opt_level=speed_and_size +target x86_64 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64.clif | llvm-mc -show-encoding -triple=x86_64 +; + +; Tests for i64 instructions. +function %I64() { + sig0 = () + fn0 = %foo() + fn1 = colocated %bar() + + gv0 = symbol %some_gv + + ; Use incoming_arg stack slots because they won't be relocated by the frame + ; layout. + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + + ; Integer Constants. + + ; asm: movq $0x01020304f1f2f3f4, %rcx + [-,%rcx] v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4 + ; asm: movq $0x11020304f1f2f3f4, %rsi + [-,%rsi] v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4 + ; asm: movq $0x21020304f1f2f3f4, %r10 + [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 + ; asm: movl $0xff001122, %r8d # 32-bit zero-extended constant. + [-,%r8] v4 = iconst.i64 0xff00_1122 ; bin: 41 b8 ff001122 + ; asm: movq $0xffffffff88001122, %r14 # 32-bit sign-extended constant. + [-,%r14] v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122 + + ; asm: movb $1, %cl + [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 + ; asm: movb $1, %sil + [-,%r10] v9008 = bconst.b1 true ; bin: 41 ba 00000001 + + ; Integer Register Operations. + + ; asm: notq %rcx + [-,%rcx] v4000 = bnot v1 ; bin: 48 f7 d1 + ; asm: notq %rsi + [-,%rsi] v4001 = bnot v2 ; bin: 48 f7 d6 + ; asm: notq %r10 + [-,%r10] v4002 = bnot v3 ; bin: 49 f7 d2 + + ; Integer Register-Register Operations. + + ; asm: addq %rsi, %rcx + [-,%rcx] v10 = iadd v1, v2 ; bin: 48 01 f1 + ; asm: addq %r10, %rsi + [-,%rsi] v11 = iadd v2, v3 ; bin: 4c 01 d6 + ; asm: addq %rcx, %r10 + [-,%r10] v12 = iadd v3, v1 ; bin: 49 01 ca + + ; asm: subq %rsi, %rcx + [-,%rcx] v20 = isub v1, v2 ; bin: 48 29 f1 + ; asm: subq %r10, %rsi + [-,%rsi] v21 = isub v2, v3 ; bin: 4c 29 d6 + ; asm: subq %rcx, %r10 + [-,%r10] v22 = isub v3, v1 ; bin: 49 29 ca + + ; asm: andq %rsi, %rcx + [-,%rcx] v30 = band v1, v2 ; bin: 48 21 f1 + ; asm: andq %r10, %rsi + [-,%rsi] v31 = band v2, v3 ; bin: 4c 21 d6 + ; asm: andq %rcx, %r10 + [-,%r10] v32 = band v3, v1 ; bin: 49 21 ca + + ; asm: orq %rsi, %rcx + [-,%rcx] v40 = bor v1, v2 ; bin: 48 09 f1 + ; asm: orq %r10, %rsi + [-,%rsi] v41 = bor v2, v3 ; bin: 4c 09 d6 + ; asm: orq %rcx, %r10 + [-,%r10] v42 = bor v3, v1 ; bin: 49 09 ca + + ; asm: xorq %rsi, %rcx + [-,%rcx] v50 = bxor v1, v2 ; bin: 48 31 f1 + ; asm: xorq %r10, %rsi + [-,%rsi] v51 = bxor v2, v3 ; bin: 4c 31 d6 + ; asm: xorq %rcx, %r10 + [-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca + + ; asm: shlq %cl, %rsi + [-,%rsi] v60 = ishl v2, v1 ; bin: 48 d3 e6 + ; asm: shlq %cl, %r10 + [-,%r10] v61 = ishl v3, v1 ; bin: 49 d3 e2 + ; asm: sarq %cl, %rsi + [-,%rsi] v62 = sshr v2, v1 ; bin: 48 d3 fe + ; asm: sarq %cl, %r10 + [-,%r10] v63 = sshr v3, v1 ; bin: 49 d3 fa + ; asm: shrq %cl, %rsi + [-,%rsi] v64 = ushr v2, v1 ; bin: 48 d3 ee + ; asm: shrq %cl, %r10 + [-,%r10] v65 = ushr v3, v1 ; bin: 49 d3 ea + + ; asm: rolq %cl, %rsi + [-,%rsi] v66 = rotl v2, v1 ; bin: 48 d3 c6 + ; asm: rolq %cl, %r10 + [-,%r10] v67 = rotl v3, v1 ; bin: 49 d3 c2 + ; asm: rorq %cl, %rsi + [-,%rsi] v68 = rotr v2, v1 ; bin: 48 d3 ce + ; asm: rorq %cl, %r10 + [-,%r10] v69 = rotr v3, v1 ; bin: 49 d3 ca + + ; Integer Register-Immediate Operations. + ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. + ; Some take 8-bit immediates that are sign-extended to 64 bits. + + ; asm: addq $-100000, %rcx + [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 48 81 c1 fffe7960 + ; asm: addq $100000, %rsi + [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 48 81 c6 000186a0 + ; asm: addq $0x7fffffff, %r10 + [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff + ; asm: addq $100, %r8 + [-,%r8] v73 = iadd_imm v4, 100 ; bin: 49 83 c0 64 + ; asm: addq $-100, %r14 + [-,%r14] v74 = iadd_imm v5, -100 ; bin: 49 83 c6 9c + + ; asm: andq $-100000, %rcx + [-,%rcx] v80 = band_imm v1, -100000 ; bin: 48 81 e1 fffe7960 + ; asm: andq $100000, %rsi + [-,%rsi] v81 = band_imm v2, 100000 ; bin: 48 81 e6 000186a0 + ; asm: andq $0x7fffffff, %r10 + [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff + ; asm: andq $100, %r8 + [-,%r8] v83 = band_imm v4, 100 ; bin: 49 83 e0 64 + ; asm: andq $-100, %r14 + [-,%r14] v84 = band_imm v5, -100 ; bin: 49 83 e6 9c + + ; asm: orq $-100000, %rcx + [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 48 81 c9 fffe7960 + ; asm: orq $100000, %rsi + [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 48 81 ce 000186a0 + ; asm: orq $0x7fffffff, %r10 + [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 49 81 ca 7fffffff + ; asm: orq $100, %r8 + [-,%r8] v93 = bor_imm v4, 100 ; bin: 49 83 c8 64 + ; asm: orq $-100, %r14 + [-,%r14] v94 = bor_imm v5, -100 ; bin: 49 83 ce 9c + ; asm: ret + + ; asm: xorq $-100000, %rcx + [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 48 81 f1 fffe7960 + ; asm: xorq $100000, %rsi + [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 48 81 f6 000186a0 + ; asm: xorq $0x7fffffff, %r10 + [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff + ; asm: xorq $100, %r8 + [-,%r8] v103 = bxor_imm v4, 100 ; bin: 49 83 f0 64 + ; asm: xorq $-100, %r14 + [-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c + + ; Register copies. + + ; asm: movq %rsi, %rcx + [-,%rcx] v110 = copy v2 ; bin: 48 89 f1 + ; asm: movq %r10, %rsi + [-,%rsi] v111 = copy v3 ; bin: 4c 89 d6 + ; asm: movq %rcx, %r10 + [-,%r10] v112 = copy v1 ; bin: 49 89 ca + + ; Copy Special + ; asm: movq %rsp, %rbp + copy_special %rsp -> %rbp ; bin: 48 89 e5 + ; asm: movq %r10, %r11 + copy_special %r10 -> %r11 ; bin: 4d 89 d3 + ; asm: movq %rsp, %r11 + copy_special %rsp -> %r11 ; bin: 49 89 e3 + ; asm: movq %r10, %rsp + copy_special %r10 -> %rsp ; bin: 4c 89 d4 + + ; Copy to SSA + + ; asm: movq %rax, %r15 + [-,%r15] v700 = copy_to_ssa.i64 %rax ; bin: 49 89 c7 + ; asm: movq %r15, %rax + [-,%rax] v701 = copy_to_ssa.i64 %r15 ; bin: 4c 89 f8 + ; asm: movq %rdi, %rsi + [-,%rsi] v702 = copy_to_ssa.i64 %rdi ; bin: 48 89 fe + ; asm: movq %r11, %r14 + [-,%r14] v703 = copy_to_ssa.i64 %r11 ; bin: 4d 89 de + + ; asm: movl %eax, %r15d + [-,%r15] v704 = copy_to_ssa.i32 %rax ; bin: 41 89 c7 + ; asm: movl %r15d, %eax + [-,%rax] v705 = copy_to_ssa.i32 %r15 ; bin: 44 89 f8 + ; asm: movl %edi, %esi. Unfortunately we get a redundant REX prefix. + [-,%rsi] v706 = copy_to_ssa.i32 %rdi ; bin: 40 89 fe + ; asm: movl %r11, %r14 + [-,%r14] v707 = copy_to_ssa.i32 %r11 ; bin: 45 89 de + + ; Load/Store instructions. + + ; Register indirect addressing with no displacement. + + ; asm: movq %rcx, (%r10) + store v1, v3 ; bin: heap_oob 49 89 0a + ; asm: movq %r10, (%rcx) + store v3, v1 ; bin: heap_oob 4c 89 11 + ; asm: movl %ecx, (%r10) + istore32 v1, v3 ; bin: heap_oob 41 89 0a + ; asm: movl %r10d, (%rcx) + istore32 v3, v1 ; bin: heap_oob 44 89 11 + ; asm: movw %cx, (%r10) + istore16 v1, v3 ; bin: heap_oob 66 41 89 0a + ; asm: movw %r10w, (%rcx) + istore16 v3, v1 ; bin: heap_oob 66 44 89 11 + ; asm: movb %cl, (%r10) + istore8 v1, v3 ; bin: heap_oob 41 88 0a + ; asm: movb %r10b, (%rcx) + istore8 v3, v1 ; bin: heap_oob 44 88 11 + + ; asm: movq (%rcx), %r14 + [-,%r14] v120 = load.i64 v1 ; bin: heap_oob 4c 8b 31 + ; asm: movq (%r10), %rdx + [-,%rdx] v121 = load.i64 v3 ; bin: heap_oob 49 8b 12 + ; asm: movl (%rcx), %r14d + [-,%r14] v122 = uload32.i64 v1 ; bin: heap_oob 44 8b 31 + ; asm: movl (%r10), %edx + [-,%rdx] v123 = uload32.i64 v3 ; bin: heap_oob 41 8b 12 + ; asm: movslq (%rcx), %r14 + [-,%r14] v124 = sload32.i64 v1 ; bin: heap_oob 4c 63 31 + ; asm: movslq (%r10), %rdx + [-,%rdx] v125 = sload32.i64 v3 ; bin: heap_oob 49 63 12 + ; asm: movzwq (%rcx), %r14 + [-,%r14] v126 = uload16.i64 v1 ; bin: heap_oob 4c 0f b7 31 + ; asm: movzwq (%r10), %rdx + [-,%rdx] v127 = uload16.i64 v3 ; bin: heap_oob 49 0f b7 12 + ; asm: movswq (%rcx), %r14 + [-,%r14] v128 = sload16.i64 v1 ; bin: heap_oob 4c 0f bf 31 + ; asm: movswq (%r10), %rdx + [-,%rdx] v129 = sload16.i64 v3 ; bin: heap_oob 49 0f bf 12 + ; asm: movzbq (%rcx), %r14 + [-,%r14] v130 = uload8.i64 v1 ; bin: heap_oob 4c 0f b6 31 + ; asm: movzbq (%r10), %rdx + [-,%rdx] v131 = uload8.i64 v3 ; bin: heap_oob 49 0f b6 12 + ; asm: movsbq (%rcx), %r14 + [-,%r14] v132 = sload8.i64 v1 ; bin: heap_oob 4c 0f be 31 + ; asm: movsbq (%r10), %rdx + [-,%rdx] v133 = sload8.i64 v3 ; bin: heap_oob 49 0f be 12 + + ; Register-indirect with 8-bit signed displacement. + + ; asm: movq %rcx, 100(%r10) + store v1, v3+100 ; bin: heap_oob 49 89 4a 64 + ; asm: movq %r10, -100(%rcx) + store v3, v1-100 ; bin: heap_oob 4c 89 51 9c + ; asm: movl %ecx, 100(%r10) + istore32 v1, v3+100 ; bin: heap_oob 41 89 4a 64 + ; asm: movl %r10d, -100(%rcx) + istore32 v3, v1-100 ; bin: heap_oob 44 89 51 9c + ; asm: movw %cx, 100(%r10) + istore16 v1, v3+100 ; bin: heap_oob 66 41 89 4a 64 + ; asm: movw %r10w, -100(%rcx) + istore16 v3, v1-100 ; bin: heap_oob 66 44 89 51 9c + ; asm: movb %cl, 100(%r10) + istore8 v1, v3+100 ; bin: heap_oob 41 88 4a 64 + ; asm: movb %r10b, 100(%rcx) + istore8 v3, v1+100 ; bin: heap_oob 44 88 51 64 + + ; asm: movq 50(%rcx), %r10 + [-,%r10] v140 = load.i64 v1+50 ; bin: heap_oob 4c 8b 51 32 + ; asm: movq -50(%r10), %rdx + [-,%rdx] v141 = load.i64 v3-50 ; bin: heap_oob 49 8b 52 ce + ; asm: movl 50(%rcx), %edi + [-,%rdi] v142 = uload32.i64 v1+50 ; bin: heap_oob 8b 79 32 + ; asm: movl -50(%rsi), %edx + [-,%rdx] v143 = uload32.i64 v2-50 ; bin: heap_oob 8b 56 ce + ; asm: movslq 50(%rcx), %rdi + [-,%rdi] v144 = sload32.i64 v1+50 ; bin: heap_oob 48 63 79 32 + ; asm: movslq -50(%rsi), %rdx + [-,%rdx] v145 = sload32.i64 v2-50 ; bin: heap_oob 48 63 56 ce + ; asm: movzwq 50(%rcx), %rdi + [-,%rdi] v146 = uload16.i64 v1+50 ; bin: heap_oob 48 0f b7 79 32 + ; asm: movzwq -50(%rsi), %rdx + [-,%rdx] v147 = uload16.i64 v2-50 ; bin: heap_oob 48 0f b7 56 ce + ; asm: movswq 50(%rcx), %rdi + [-,%rdi] v148 = sload16.i64 v1+50 ; bin: heap_oob 48 0f bf 79 32 + ; asm: movswq -50(%rsi), %rdx + [-,%rdx] v149 = sload16.i64 v2-50 ; bin: heap_oob 48 0f bf 56 ce + ; asm: movzbq 50(%rcx), %rdi + [-,%rdi] v150 = uload8.i64 v1+50 ; bin: heap_oob 48 0f b6 79 32 + ; asm: movzbq -50(%rsi), %rdx + [-,%rdx] v151 = uload8.i64 v2-50 ; bin: heap_oob 48 0f b6 56 ce + ; asm: movsbq 50(%rcx), %rdi + [-,%rdi] v152 = sload8.i64 v1+50 ; bin: heap_oob 48 0f be 79 32 + ; asm: movsbq -50(%rsi), %rdx + [-,%rdx] v153 = sload8.i64 v2-50 ; bin: heap_oob 48 0f be 56 ce + + ; Register-indirect with 32-bit signed displacement. + + ; asm: movq %rcx, 10000(%r10) + store v1, v3+10000 ; bin: heap_oob 49 89 8a 00002710 + ; asm: movq %r10, -10000(%rcx) + store v3, v1-10000 ; bin: heap_oob 4c 89 91 ffffd8f0 + ; asm: movl %ecx, 10000(%rsi) + istore32 v1, v2+10000 ; bin: heap_oob 89 8e 00002710 + ; asm: movl %esi, -10000(%rcx) + istore32 v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 + ; asm: movw %cx, 10000(%rsi) + istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 + ; asm: movw %si, -10000(%rcx) + istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 + ; asm: movb %cl, 10000(%rsi) + istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 + ; asm: movb %sil, 10000(%rcx) + istore8 v2, v1+10000 ; bin: heap_oob 40 88 b1 00002710 + + ; asm: movq 50000(%rcx), %r10 + [-,%r10] v160 = load.i64 v1+50000 ; bin: heap_oob 4c 8b 91 0000c350 + ; asm: movq -50000(%r10), %rdx + [-,%rdx] v161 = load.i64 v3-50000 ; bin: heap_oob 49 8b 92 ffff3cb0 + ; asm: movl 50000(%rcx), %edi + [-,%rdi] v162 = uload32.i64 v1+50000 ; bin: heap_oob 8b b9 0000c350 + ; asm: movl -50000(%rsi), %edx + [-,%rdx] v163 = uload32.i64 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 + ; asm: movslq 50000(%rcx), %rdi + [-,%rdi] v164 = sload32.i64 v1+50000 ; bin: heap_oob 48 63 b9 0000c350 + ; asm: movslq -50000(%rsi), %rdx + [-,%rdx] v165 = sload32.i64 v2-50000 ; bin: heap_oob 48 63 96 ffff3cb0 + ; asm: movzwq 50000(%rcx), %rdi + [-,%rdi] v166 = uload16.i64 v1+50000 ; bin: heap_oob 48 0f b7 b9 0000c350 + ; asm: movzwq -50000(%rsi), %rdx + [-,%rdx] v167 = uload16.i64 v2-50000 ; bin: heap_oob 48 0f b7 96 ffff3cb0 + ; asm: movswq 50000(%rcx), %rdi + [-,%rdi] v168 = sload16.i64 v1+50000 ; bin: heap_oob 48 0f bf b9 0000c350 + ; asm: movswq -50000(%rsi), %rdx + [-,%rdx] v169 = sload16.i64 v2-50000 ; bin: heap_oob 48 0f bf 96 ffff3cb0 + ; asm: movzbq 50000(%rcx), %rdi + [-,%rdi] v170 = uload8.i64 v1+50000 ; bin: heap_oob 48 0f b6 b9 0000c350 + ; asm: movzbq -50000(%rsi), %rdx + [-,%rdx] v171 = uload8.i64 v2-50000 ; bin: heap_oob 48 0f b6 96 ffff3cb0 + ; asm: movsbq 50000(%rcx), %rdi + [-,%rdi] v172 = sload8.i64 v1+50000 ; bin: heap_oob 48 0f be b9 0000c350 + ; asm: movsbq -50000(%rsi), %rdx + [-,%rdx] v173 = sload8.i64 v2-50000 ; bin: heap_oob 48 0f be 96 ffff3cb0 + + + ; More arithmetic. + + ; asm: imulq %rsi, %rcx + [-,%rcx] v180 = imul v1, v2 ; bin: 48 0f af ce + ; asm: imulq %r10, %rsi + [-,%rsi] v181 = imul v2, v3 ; bin: 49 0f af f2 + ; asm: imulq %rcx, %r10 + [-,%r10] v182 = imul v3, v1 ; bin: 4c 0f af d1 + + [-,%rax] v190 = iconst.i64 1 + [-,%rdx] v191 = iconst.i64 2 + ; asm: idivq %rcx + [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1 ; bin: int_divz 48 f7 f9 + ; asm: idivq %rsi + [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2 ; bin: int_divz 48 f7 fe + ; asm: idivq %r10 + [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3 ; bin: int_divz 49 f7 fa + ; asm: divq %rcx + [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1 ; bin: int_divz 48 f7 f1 + ; asm: divq %rsi + [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2 ; bin: int_divz 48 f7 f6 + ; asm: divq %r10 + [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3 ; bin: int_divz 49 f7 f2 + + ; double-length multiply instructions, 64 bit + [-,%rax] v1001 = iconst.i64 1 + [-,%r15] v1002 = iconst.i64 2 + ; asm: mulq %r15 + [-,%rax,%rdx] v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7 + ; asm: imulq %r15 + [-,%rax,%rdx] v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef + + ; double-length multiply instructions, 32 bit + [-,%rax] v1011 = iconst.i32 1 + [-,%r15] v1012 = iconst.i32 2 + [-,%rcx] v1017 = iconst.i32 3 + ; asm: mull %r15d + [-,%rax,%rdx] v1013, v1014 = x86_umulx v1011, v1012 ; bin: 41 f7 e7 + ; asm: imull %r15d + [-,%rax,%rdx] v1015, v1016 = x86_smulx v1011, v1012 ; bin: 41 f7 ef + + ; asm: mull %ecx + [-,%rax,%rdx] v1018, v1019 = x86_umulx v1011, v1017 ; bin: f7 e1 + ; asm: imull %ecx + [-,%rax,%rdx] v1020, v1021 = x86_smulx v1011, v1017 ; bin: f7 e9 + + ; Bit-counting instructions. + + ; asm: popcntq %rsi, %rcx + [-,%rcx] v210 = popcnt v2 ; bin: f3 48 0f b8 ce + ; asm: popcntq %r10, %rsi + [-,%rsi] v211 = popcnt v3 ; bin: f3 49 0f b8 f2 + ; asm: popcntq %rcx, %r10 + [-,%r10] v212 = popcnt v1 ; bin: f3 4c 0f b8 d1 + + ; asm: lzcntq %rsi, %rcx + [-,%rcx] v213 = clz v2 ; bin: f3 48 0f bd ce + ; asm: lzcntq %r10, %rsi + [-,%rsi] v214 = clz v3 ; bin: f3 49 0f bd f2 + ; asm: lzcntq %rcx, %r10 + [-,%r10] v215 = clz v1 ; bin: f3 4c 0f bd d1 + + ; asm: tzcntq %rsi, %rcx + [-,%rcx] v216 = ctz v2 ; bin: f3 48 0f bc ce + ; asm: tzcntq %r10, %rsi + [-,%rsi] v217 = ctz v3 ; bin: f3 49 0f bc f2 + ; asm: tzcntq %rcx, %r10 + [-,%r10] v218 = ctz v1 ; bin: f3 4c 0f bc d1 + + ; Integer comparisons. + + ; asm: cmpq %rsi, %rcx + ; asm: sete %bl + [-,%rbx] v300 = icmp eq v1, v2 ; bin: 48 39 f1 0f 94 c3 + ; asm: cmpq %r10, %rsi + ; asm: sete %dl + [-,%rdx] v301 = icmp eq v2, v3 ; bin: 4c 39 d6 0f 94 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setne %bl + [-,%rbx] v302 = icmp ne v1, v2 ; bin: 48 39 f1 0f 95 c3 + ; asm: cmpq %r10, %rsi + ; asm: setne %dl + [-,%rdx] v303 = icmp ne v2, v3 ; bin: 4c 39 d6 0f 95 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setl %bl + [-,%rbx] v304 = icmp slt v1, v2 ; bin: 48 39 f1 0f 9c c3 + ; asm: cmpq %r10, %rsi + ; asm: setl %dl + [-,%rdx] v305 = icmp slt v2, v3 ; bin: 4c 39 d6 0f 9c c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setge %bl + [-,%rbx] v306 = icmp sge v1, v2 ; bin: 48 39 f1 0f 9d c3 + ; asm: cmpq %r10, %rsi + ; asm: setge %dl + [-,%rdx] v307 = icmp sge v2, v3 ; bin: 4c 39 d6 0f 9d c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setg %bl + [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 48 39 f1 0f 9f c3 + ; asm: cmpq %r10, %rsi + ; asm: setg %dl + [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 4c 39 d6 0f 9f c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setle %bl + [-,%rbx] v310 = icmp sle v1, v2 ; bin: 48 39 f1 0f 9e c3 + ; asm: cmpq %r10, %rsi + ; asm: setle %dl + [-,%rdx] v311 = icmp sle v2, v3 ; bin: 4c 39 d6 0f 9e c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setb %bl + [-,%rbx] v312 = icmp ult v1, v2 ; bin: 48 39 f1 0f 92 c3 + ; asm: cmpq %r10, %rsi + ; asm: setb %dl + [-,%rdx] v313 = icmp ult v2, v3 ; bin: 4c 39 d6 0f 92 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setae %bl + [-,%rbx] v314 = icmp uge v1, v2 ; bin: 48 39 f1 0f 93 c3 + ; asm: cmpq %r10, %rsi + ; asm: setae %dl + [-,%rdx] v315 = icmp uge v2, v3 ; bin: 4c 39 d6 0f 93 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: seta %bl + [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 48 39 f1 0f 97 c3 + ; asm: cmpq %r10, %rsi + ; asm: seta %dl + [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 4c 39 d6 0f 97 c2 + + ; asm: cmpq %rsi, %rcx + ; asm: setbe %bl + [-,%rbx] v318 = icmp ule v1, v2 ; bin: 48 39 f1 0f 96 c3 + ; asm: cmpq %r10, %rsi + ; asm: setbe %dl + [-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2 + + ; asm: cmpq $37, %rcx + ; asm: setl %bl + [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 48 83 f9 25 0f 9c c3 + + ; asm: cmpq $100000, %rcx + ; asm: setl %bl + [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3 + + ; Bool-to-int conversions. + + ; asm: movzbq %bl, %rcx + [-,%rcx] v350 = bint.i64 v300 ; bin: 0f b6 cb + ; asm: movzbq %dl, %rsi + [-,%rsi] v351 = bint.i64 v301 ; bin: 0f b6 f2 + + ; Colocated functions. + + ; asm: call bar + call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 + + ; asm: lea 0x0(%rip), %rcx + [-,%rcx] v400 = func_addr.i64 fn1 ; bin: 48 8d 0d PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %rsi + [-,%rsi] v401 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 + ; asm: lea 0x0(%rip), %r10 + [-,%r10] v402 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 + + ; asm: call *%rcx + call_indirect sig0, v400() ; bin: stk_ovf ff d1 + ; asm: call *%rsi + call_indirect sig0, v401() ; bin: stk_ovf ff d6 + ; asm: call *%r10 + call_indirect sig0, v402() ; bin: stk_ovf 41 ff d2 + + ; Non-colocated functions. Note that there is no non-colocated non-PIC call. + + ; asm: movabsq $0, %rcx + [-,%rcx] v410 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) 0000000000000000 + ; asm: movabsq $0, %rsi + [-,%rsi] v411 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) 0000000000000000 + ; asm: movabsq $0, %r10 + [-,%r10] v412 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) 0000000000000000 + + ; asm: call *%rcx + call_indirect sig0, v410() ; bin: stk_ovf ff d1 + ; asm: call *%rsi + call_indirect sig0, v411() ; bin: stk_ovf ff d6 + ; asm: call *%r10 + call_indirect sig0, v412() ; bin: stk_ovf 41 ff d2 + + ; asm: movabsq $-1, %rcx + [-,%rcx] v450 = symbol_value.i64 gv0 ; bin: 48 b9 Abs8(%some_gv) 0000000000000000 + ; asm: movabsq $-1, %rsi + [-,%rsi] v451 = symbol_value.i64 gv0 ; bin: 48 be Abs8(%some_gv) 0000000000000000 + ; asm: movabsq $-1, %r10 + [-,%r10] v452 = symbol_value.i64 gv0 ; bin: 49 ba Abs8(%some_gv) 0000000000000000 + + ; Spill / Fill. + + ; asm: movq %rcx, 1032(%rsp) + [-,ss1] v500 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000408 + ; asm: movq %rsi, 1032(%rsp) + [-,ss1] v501 = spill v2 ; bin: stk_ovf 48 89 b4 24 00000408 + ; asm: movq %r10, 1032(%rsp) + [-,ss1] v502 = spill v3 ; bin: stk_ovf 4c 89 94 24 00000408 + + ; asm: movq 1032(%rsp), %rcx + [-,%rcx] v510 = fill v500 ; bin: 48 8b 8c 24 00000408 + ; asm: movq 1032(%rsp), %rsi + [-,%rsi] v511 = fill v501 ; bin: 48 8b b4 24 00000408 + ; asm: movq 1032(%rsp), %r10 + [-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408 + + ; asm: movq %rcx, 1032(%rsp) + regspill v1, %rcx -> ss1 ; bin: stk_ovf 48 89 8c 24 00000408 + ; asm: movq 1032(%rsp), %rcx + regfill v1, ss1 -> %rcx ; bin: 48 8b 8c 24 00000408 + + ; Push and Pop + ; asm: pushq %rcx + x86_push v1 ; bin: stk_ovf 51 + ; asm: pushq %r10 + x86_push v3 ; bin: stk_ovf 41 52 + ; asm: popq %rcx + [-,%rcx] v513 = x86_pop.i64 ; bin: 59 + ; asm: popq %r10 + [-,%r10] v514 = x86_pop.i64 ; bin: 41 5a + + ; Adjust Stack Pointer Up + ; asm: addq $64, %rsp + adjust_sp_up_imm 64 ; bin: 48 83 c4 40 + ; asm: addq $-64, %rsp + adjust_sp_up_imm -64 ; bin: 48 83 c4 c0 + ; asm: addq $1024, %rsp + adjust_sp_up_imm 1024 ; bin: 48 81 c4 00000400 + ; asm: addq $-1024, %rsp + adjust_sp_up_imm -1024 ; bin: 48 81 c4 fffffc00 + ; asm: addq $2147483647, %rsp + adjust_sp_up_imm 2147483647 ; bin: 48 81 c4 7fffffff + ; asm: addq $-2147483648, %rsp + adjust_sp_up_imm -2147483648 ; bin: 48 81 c4 80000000 + + ; Adjust Stack Pointer Down + ; asm: subq %rcx, %rsp + adjust_sp_down v1 ; bin: 48 29 cc + ; asm: subq %r10, %rsp + adjust_sp_down v3 ; bin: 4c 29 d4 + ; asm: subq $64, %rsp + adjust_sp_down_imm 64 ; bin: 48 83 ec 40 + ; asm: subq $-64, %rsp + adjust_sp_down_imm -64 ; bin: 48 83 ec c0 + ; asm: subq $1024, %rsp + adjust_sp_down_imm 1024 ; bin: 48 81 ec 00000400 + ; asm: subq $-1024, %rsp + adjust_sp_down_imm -1024 ; bin: 48 81 ec fffffc00 + ; asm: subq $2147483647, %rsp + adjust_sp_down_imm 2147483647 ; bin: 48 81 ec 7fffffff + ; asm: subq $-2147483648, %rsp + adjust_sp_down_imm -2147483648 ; bin: 48 81 ec 80000000 + + ; Shift immediates + ; asm: shlq $12, %rsi + [-,%rsi] v515 = ishl_imm v2, 12 ; bin: 48 c1 e6 0c + ; asm: shlq $13, %r8 + [-,%r8] v516 = ishl_imm v4, 13 ; bin: 49 c1 e0 0d + ; asm: sarq $32, %rsi + [-,%rsi] v517 = sshr_imm v2, 32 ; bin: 48 c1 fe 20 + ; asm: sarq $33, %r8 + [-,%r8] v518 = sshr_imm v4, 33 ; bin: 49 c1 f8 21 + ; asm: shrq $62, %rsi + [-,%rsi] v519 = ushr_imm v2, 62 ; bin: 48 c1 ee 3e + ; asm: shrq $63, %r8 + [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f + + + ; Rotate immediates + ; asm: rolq $12, %rsi + [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: 48 c1 c6 0c + ; asm: rolq $13, %r8 + [-,%r8] v5102 = rotl_imm v4, 13 ; bin: 49 c1 c0 0d + ; asm: rorq $32, %rsi + [-,%rsi] v5103 = rotr_imm v2, 32 ; bin: 48 c1 ce 20 + ; asm: rorq $33, %r8 + [-,%r8] v5104 = rotr_imm v4, 33 ; bin: 49 c1 c8 21 + + + ; Load Complex + [-,%rax] v521 = iconst.i64 1 + [-,%rbx] v522 = iconst.i64 1 + [-,%rdi] v523 = iconst.i32 1 + [-,%rsi] v524 = iconst.i32 1 + ; asm: movq (%rax,%rbx,1), %rcx + [-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18 + ; asm: movl (%rax,%rbx,1), %ecx + [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 + ; asm: movq 1(%rax,%rbx,1), %rcx + [-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01 + ; asm: movl 1(%rax,%rbx,1), %ecx + [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 + ; asm: mov 0x100000(%rax,%rbx,1),%rcx + [-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000 + ; asm: mov 0x100000(%rax,%rbx,1),%ecx + [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 + ; asm: movzbq (%rax,%rbx,1),%rcx + [-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18 + ; asm: movzbl (%rax,%rbx,1),%ecx + [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 + ; asm: movsbq (%rax,%rbx,1),%rcx + [-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18 + ; asm: movsbl (%rax,%rbx,1),%ecx + [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 + ; asm: movzwq (%rax,%rbx,1),%rcx + [-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18 + ; asm: movzwl (%rax,%rbx,1),%ecx + [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 + ; asm: movswq (%rax,%rbx,1),%rcx + [-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18 + ; asm: movswl (%rax,%rbx,1),%ecx + [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 + ; asm: mov (%rax,%rbx,1),%ecx + [-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18 + ; asm: movslq (%rax,%rbx,1),%rcx + [-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18 + [-,%r13] v550 = iconst.i64 1 + [-,%r14] v551 = iconst.i64 1 + ; asm: mov 0x0(%r13,%r14,1),%r12d + [-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00 + + ; Store Complex + [-,%rcx] v600 = iconst.i64 1 + [-,%rcx] v601 = iconst.i32 1 + [-,%r10] v602 = iconst.i64 1 + [-,%r11] v603 = iconst.i32 1 + ; asm: mov %rcx,(%rax,%rbx,1) + store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18 + ; asm: mov %rcx,0x1(%rax,%rbx,1) + store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01 + ; asm: mov %rcx,0x100000(%rax,%rbx,1) + store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000 + ; asm: mov %ecx,(%rax,%rbx,1) + store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 + ; asm: mov %ecx,0x1(%rax,%rbx,1) + store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 + ; asm: mov %ecx,0x100000(%rax,%rbx,1) + store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 + ; asm: mov %ecx,(%rax,%rbx,1) + istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18 + ; asm: mov %cx,(%rax,%rbx,1) + istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18 + ; asm: mov %cx,(%rax,%rbx,1) + istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 + ; asm: mov %r10w,(%rax,%rbx,1) + istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18 + ; asm: mov %r11w,(%rax,%rbx,1) + istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18 + ; asm: mov %cl,(%rax,%rbx,1) + istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18 + ; asm: mov %cl,(%rax,%rbx,1) + istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 + + ; asm: testq %rcx, %rcx + ; asm: je block1 + brz v1, block1 ; bin: 48 85 c9 74 1b + fallthrough block3 + +block3: + ; asm: testq %rsi, %rsi + ; asm: je block1 + brz v2, block1 ; bin: 48 85 f6 74 16 + fallthrough block4 + +block4: + ; asm: testq %r10, %r10 + ; asm: je block1 + brz v3, block1 ; bin: 4d 85 d2 74 11 + fallthrough block5 + +block5: + ; asm: testq %rcx, %rcx + ; asm: jne block1 + brnz v1, block1 ; bin: 48 85 c9 75 0c + fallthrough block6 + +block6: + ; asm: testq %rsi, %rsi + ; asm: jne block1 + brnz v2, block1 ; bin: 48 85 f6 75 07 + fallthrough block7 + +block7: + ; asm: testq %r10, %r10 + ; asm: jne block1 + brnz v3, block1 ; bin: 4d 85 d2 75 02 + + ; asm: jmp block2 + jump block2 ; bin: eb 01 + + ; asm: block1: +block1: + return ; bin: c3 + + ; asm: block2: +block2: + ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. + ; asm: notq %rcx + [-,%rcx] v5000 = bnot v1 ; bin: 48 f7 d1 + jump block1 ; bin: eb fa +} + +; CPU flag instructions. +function %cpu_flags_I64() { +block0: + [-,%rcx] v1 = iconst.i64 1 + [-,%r10] v2 = iconst.i64 2 + jump block1 + +block1: + ; asm: cmpq %r10, %rcx + [-,%rflags] v10 = ifcmp v1, v2 ; bin: 4c 39 d1 + ; asm: cmpq %rcx, %r10 + [-,%rflags] v11 = ifcmp v2, v1 ; bin: 49 39 ca + + ; asm: je block1 + brif eq v11, block1 ; bin: 74 f8 + jump block2 + +block2: + ; asm: jne block1 + brif ne v11, block1 ; bin: 75 f6 + jump block3 + +block3: + ; asm: jl block1 + brif slt v11, block1 ; bin: 7c f4 + jump block4 + +block4: + ; asm: jge block1 + brif sge v11, block1 ; bin: 7d f2 + jump block5 + +block5: + ; asm: jg block1 + brif sgt v11, block1 ; bin: 7f f0 + jump block6 + +block6: + ; asm: jle block1 + brif sle v11, block1 ; bin: 7e ee + jump block7 + +block7: + ; asm: jb block1 + brif ult v11, block1 ; bin: 72 ec + jump block8 + +block8: + ; asm: jae block1 + brif uge v11, block1 ; bin: 73 ea + jump block9 + +block9: + ; asm: ja block1 + brif ugt v11, block1 ; bin: 77 e8 + jump block10 + +block10: + ; asm: jbe block1 + brif ule v11, block1 ; bin: 76 e6 + jump block11 + +block11: + + ; asm: sete %bl + [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 + ; asm: setne %bl + [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 + ; asm: setl %dl + [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 + ; asm: setge %dl + [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 + ; asm: setg %r10b + [-,%r10] v24 = trueif sgt v11 ; bin: 41 0f 9f c2 + ; asm: setle %r10b + [-,%r10] v25 = trueif sle v11 ; bin: 41 0f 9e c2 + ; asm: setb %r14b + [-,%r14] v26 = trueif ult v11 ; bin: 41 0f 92 c6 + ; asm: setae %r14b + [-,%r14] v27 = trueif uge v11 ; bin: 41 0f 93 c6 + ; asm: seta %r11b + [-,%r11] v28 = trueif ugt v11 ; bin: 41 0f 97 c3 + ; asm: setbe %r11b + [-,%r11] v29 = trueif ule v11 ; bin: 41 0f 96 c3 + + ; The trapif instructions are encoded as macros: a conditional jump over a ud2. + ; asm: jne .+4; ud2 + trapif eq v11, user0 ; bin: 75 02 user0 0f 0b + ; asm: je .+4; ud2 + trapif ne v11, user0 ; bin: 74 02 user0 0f 0b + ; asm: jnl .+4; ud2 + trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b + ; asm: jnge .+4; ud2 + trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b + ; asm: jng .+4; ud2 + trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b + ; asm: jnle .+4; ud2 + trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b + ; asm: jnb .+4; ud2 + trapif ult v11, user0 ; bin: 73 02 user0 0f 0b + ; asm: jnae .+4; ud2 + trapif uge v11, user0 ; bin: 72 02 user0 0f 0b + ; asm: jna .+4; ud2 + trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b + ; asm: jnbe .+4; ud2 + trapif ule v11, user0 ; bin: 77 02 user0 0f 0b + ; asm: jo .+4; ud2 + trapif of v11, user0 ; bin: 71 02 user0 0f 0b + ; asm: jno .+4; ud2 + trapif nof v11, user0 ; bin: 70 02 user0 0f 0b + + ; Debug trap. + debugtrap ; bin: cc + + ; Stack check. + ; asm: cmpq %rsp, %rcx + [-,%rflags] v40 = ifcmp_sp v1 ; bin: 48 39 e1 + ; asm: cmpq %rsp, %r10 + [-,%rflags] v41 = ifcmp_sp v2 ; bin: 49 39 e2 + + ; asm: cmpq $-100, %rcx + [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 48 83 f9 9c + ; asm: cmpq $100, %r10 + [-,%rflags] v523 = ifcmp_imm v2, 100 ; bin: 49 83 fa 64 + + ; asm: cmpq $-10000, %rcx + [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0 + ; asm: cmpq $10000, %r10 + [-,%rflags] v525 = ifcmp_imm v2, 10000 ; bin: 49 81 fa 00002710 + + + return +} + +; Test for the encoding of outgoing_arg stack slots. +function %outargs() { + ss0 = incoming_arg 16, offset -16 + ss1 = outgoing_arg 8, offset 8 + ss2 = outgoing_arg 8, offset 0 + +block0: + [-,%rcx] v1 = iconst.i64 1 + + ; asm: movq %rcx, 8(%rsp) + [-,ss1] v10 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000008 + ; asm: movq %rcx, (%rsp) + [-,ss2] v11 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000000 + + return +} + +; Tests for i32 instructions in 64-bit mode. +; +; Note that many i32 instructions can be encoded both with and without a REX +; prefix if they only use the low 8 registers. Here, we are testing the REX +; encodings which are chosen by default. Switching to non-REX encodings should +; be done by an instruction shrinking pass. +function %I32() { + sig0 = () + fn0 = %foo() + + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + +block0: + + ; Integer Constants. + + ; asm: movl $0x01020304, %ecx + [-,%rcx] v1 = iconst.i32 0x0102_0304 ; bin: b9 01020304 + ; asm: movl $0x11020304, %esi + [-,%rsi] v2 = iconst.i32 0x1102_0304 ; bin: be 11020304 + ; asm: movl $0x21020304, %r10d + [-,%r10] v3 = iconst.i32 0x2102_0304 ; bin: 41 ba 21020304 + ; asm: movl $0xff001122, %r8d + [-,%r8] v4 = iconst.i32 0xff00_1122 ; bin: 41 b8 ff001122 + ; asm: movl $0x88001122, %r14d + [-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122 + + ; Load/Store instructions. + + ; Register indirect addressing with no displacement. + + ; asm: movl (%rcx), %edi + [-,%rdi] v10 = load.i32 v1 ; bin: heap_oob 8b 39 + ; asm: movl (%rsi), %edx + [-,%rdx] v11 = load.i32 v2 ; bin: heap_oob 8b 16 + ; asm: movzwl (%rcx), %edi + [-,%rdi] v12 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 + ; asm: movzwl (%rsi), %edx + [-,%rdx] v13 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 + ; asm: movswl (%rcx), %edi + [-,%rdi] v14 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 + ; asm: movswl (%rsi), %edx + [-,%rdx] v15 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 + ; asm: movzbl (%rcx), %edi + [-,%rdi] v16 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 + ; asm: movzbl (%rsi), %edx + [-,%rdx] v17 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 + ; asm: movsbl (%rcx), %edi + [-,%rdi] v18 = sload8.i32 v1 ; bin: heap_oob 0f be 39 + ; asm: movsbl (%rsi), %edx + [-,%rdx] v19 = sload8.i32 v2 ; bin: heap_oob 0f be 16 + + ; Register-indirect with 8-bit signed displacement. + + ; asm: movl 50(%rcx), %edi + [-,%rdi] v20 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 + ; asm: movl -50(%rsi), %edx + [-,%rdx] v21 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce + ; asm: movzwl 50(%rcx), %edi + [-,%rdi] v22 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 + ; asm: movzwl -50(%rsi), %edx + [-,%rdx] v23 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce + ; asm: movswl 50(%rcx), %edi + [-,%rdi] v24 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 + ; asm: movswl -50(%rsi), %edx + [-,%rdx] v25 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce + ; asm: movzbl 50(%rcx), %edi + [-,%rdi] v26 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 + ; asm: movzbl -50(%rsi), %edx + [-,%rdx] v27 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce + ; asm: movsbl 50(%rcx), %edi + [-,%rdi] v28 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 + ; asm: movsbl -50(%rsi), %edx + [-,%rdx] v29 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce + + ; Register-indirect with 32-bit signed displacement. + + ; asm: movl 50000(%rcx), %edi + [-,%rdi] v30 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 + ; asm: movl -50000(%rsi), %edx + [-,%rdx] v31 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 + ; asm: movzwl 50000(%rcx), %edi + [-,%rdi] v32 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 + ; asm: movzwl -50000(%rsi), %edx + [-,%rdx] v33 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 + ; asm: movswl 50000(%rcx), %edi + [-,%rdi] v34 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 + ; asm: movswl -50000(%rsi), %edx + [-,%rdx] v35 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 + ; asm: movzbl 50000(%rcx), %edi + [-,%rdi] v36 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 + ; asm: movzbl -50000(%rsi), %edx + [-,%rdx] v37 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 + ; asm: movsbl 50000(%rcx), %edi + [-,%rdi] v38 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 + ; asm: movsbl -50000(%rsi), %edx + [-,%rdx] v39 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 + + ; Integer Register Operations. + + ; asm: notl %ecx + [-,%rcx] v4000 = bnot v1 ; bin: f7 d1 + ; asm: notl %esi + [-,%rsi] v4001 = bnot v2 ; bin: f7 d6 + ; asm: notl %r10d + [-,%r10] v4002 = bnot v3 ; bin: 41 f7 d2 + + ; Integer Register-Register Operations. + + ; asm: addl %esi, %ecx + [-,%rcx] v40 = iadd v1, v2 ; bin: 01 f1 + ; asm: addl %r10d, %esi + [-,%rsi] v41 = iadd v2, v3 ; bin: 44 01 d6 + ; asm: addl %ecx, %r10d + [-,%r10] v42 = iadd v3, v1 ; bin: 41 01 ca + + ; asm: subl %esi, %ecx + [-,%rcx] v50 = isub v1, v2 ; bin: 29 f1 + ; asm: subl %r10d, %esi + [-,%rsi] v51 = isub v2, v3 ; bin: 44 29 d6 + ; asm: subl %ecx, %r10d + [-,%r10] v52 = isub v3, v1 ; bin: 41 29 ca + + ; asm: andl %esi, %ecx + [-,%rcx] v60 = band v1, v2 ; bin: 21 f1 + ; asm: andl %r10d, %esi + [-,%rsi] v61 = band v2, v3 ; bin: 44 21 d6 + ; asm: andl %ecx, %r10d + [-,%r10] v62 = band v3, v1 ; bin: 41 21 ca + + ; asm: orl %esi, %ecx + [-,%rcx] v70 = bor v1, v2 ; bin: 09 f1 + ; asm: orl %r10d, %esi + [-,%rsi] v71 = bor v2, v3 ; bin: 44 09 d6 + ; asm: orl %ecx, %r10d + [-,%r10] v72 = bor v3, v1 ; bin: 41 09 ca + + ; asm: xorl %esi, %ecx + [-,%rcx] v80 = bxor v1, v2 ; bin: 31 f1 + ; asm: xorl %r10d, %esi + [-,%rsi] v81 = bxor v2, v3 ; bin: 44 31 d6 + ; asm: xorl %ecx, %r10d + [-,%r10] v82 = bxor v3, v1 ; bin: 41 31 ca + + ; asm: shll %cl, %esi + [-,%rsi] v90 = ishl v2, v1 ; bin: d3 e6 + ; asm: shll %cl, %r10d + [-,%r10] v91 = ishl v3, v1 ; bin: 41 d3 e2 + ; asm: sarl %cl, %esi + [-,%rsi] v92 = sshr v2, v1 ; bin: d3 fe + ; asm: sarl %cl, %r10d + [-,%r10] v93 = sshr v3, v1 ; bin: 41 d3 fa + ; asm: shrl %cl, %esi + [-,%rsi] v94 = ushr v2, v1 ; bin: d3 ee + ; asm: shrl %cl, %r10d + [-,%r10] v95 = ushr v3, v1 ; bin: 41 d3 ea + + ; asm: roll %cl, %esi + [-,%rsi] v96 = rotl v2, v1 ; bin: d3 c6 + ; asm: roll %cl, %r10d + [-,%r10] v97 = rotl v3, v1 ; bin: 41 d3 c2 + ; asm: rorl %cl, %esi + [-,%rsi] v98 = rotr v2, v1 ; bin: d3 ce + ; asm: rorl %cl, %r10d + [-,%r10] v99 = rotr v3, v1 ; bin: 41 d3 ca + + ; Integer Register-Immediate Operations. + ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. + ; Some take 8-bit immediates that are sign-extended to 64 bits. + + ; asm: addl $-100000, %ecx + [-,%rcx] v100 = iadd_imm v1, -100000 ; bin: 81 c1 fffe7960 + ; asm: addl $100000, %esi + [-,%rsi] v101 = iadd_imm v2, 100000 ; bin: 81 c6 000186a0 + ; asm: addl $0x7fffffff, %r10d + [-,%r10] v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff + ; asm: addl $100, %r8d + [-,%r8] v103 = iadd_imm v4, 100 ; bin: 41 83 c0 64 + ; asm: addl $-100, %r14d + [-,%r14] v104 = iadd_imm v5, -100 ; bin: 41 83 c6 9c + + ; asm: andl $-100000, %ecx + [-,%rcx] v110 = band_imm v1, -100000 ; bin: 81 e1 fffe7960 + ; asm: andl $100000, %esi + [-,%rsi] v111 = band_imm v2, 100000 ; bin: 81 e6 000186a0 + ; asm: andl $0x7fffffff, %r10d + [-,%r10] v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff + ; asm: andl $100, %r8d + [-,%r8] v113 = band_imm v4, 100 ; bin: 41 83 e0 64 + ; asm: andl $-100, %r14d + [-,%r14] v114 = band_imm v5, -100 ; bin: 41 83 e6 9c + + ; asm: orl $-100000, %ecx + [-,%rcx] v120 = bor_imm v1, -100000 ; bin: 81 c9 fffe7960 + ; asm: orl $100000, %esi + [-,%rsi] v121 = bor_imm v2, 100000 ; bin: 81 ce 000186a0 + ; asm: orl $0x7fffffff, %r10d + [-,%r10] v122 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff + ; asm: orl $100, %r8d + [-,%r8] v123 = bor_imm v4, 100 ; bin: 41 83 c8 64 + ; asm: orl $-100, %r14d + [-,%r14] v124 = bor_imm v5, -100 ; bin: 41 83 ce 9c + ; asm: ret + + ; asm: xorl $-100000, %ecx + [-,%rcx] v130 = bxor_imm v1, -100000 ; bin: 81 f1 fffe7960 + ; asm: xorl $100000, %esi + [-,%rsi] v131 = bxor_imm v2, 100000 ; bin: 81 f6 000186a0 + ; asm: xorl $0x7fffffff, %r10d + [-,%r10] v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff + ; asm: xorl $100, %r8d + [-,%r8] v133 = bxor_imm v4, 100 ; bin: 41 83 f0 64 + ; asm: xorl $-100, %r14d + [-,%r14] v134 = bxor_imm v5, -100 ; bin: 41 83 f6 9c + + ; Register copies. + + ; asm: movl %esi, %ecx + [-,%rcx] v140 = copy v2 ; bin: 89 f1 + ; asm: movl %r10d, %esi + [-,%rsi] v141 = copy v3 ; bin: 44 89 d6 + ; asm: movl %ecx, %r10d + [-,%r10] v142 = copy v1 ; bin: 41 89 ca + + ; More arithmetic. + + ; asm: imull %esi, %ecx + [-,%rcx] v150 = imul v1, v2 ; bin: 0f af ce + ; asm: imull %r10d, %esi + [-,%rsi] v151 = imul v2, v3 ; bin: 41 0f af f2 + ; asm: imull %ecx, %r10d + [-,%r10] v152 = imul v3, v1 ; bin: 44 0f af d1 + + [-,%rax] v160 = iconst.i32 1 + [-,%rdx] v161 = iconst.i32 2 + ; asm: idivl %ecx + [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1 ; bin: int_divz f7 f9 + ; asm: idivl %esi + [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2 ; bin: int_divz f7 fe + ; asm: idivl %r10d + [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3 ; bin: int_divz 41 f7 fa + ; asm: divl %ecx + [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1 ; bin: int_divz f7 f1 + ; asm: divl %esi + [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2 ; bin: int_divz f7 f6 + ; asm: divl %r10d + [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3 ; bin: int_divz 41 f7 f2 + + ; Bit-counting instructions. + + ; asm: popcntl %esi, %ecx + [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce + ; asm: popcntl %r10d, %esi + [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 + ; asm: popcntl %ecx, %r10d + [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 + + ; asm: lzcntl %esi, %ecx + [-,%rcx] v203 = clz v2 ; bin: f3 0f bd ce + ; asm: lzcntl %r10d, %esi + [-,%rsi] v204 = clz v3 ; bin: f3 41 0f bd f2 + ; asm: lzcntl %ecx, %r10d + [-,%r10] v205 = clz v1 ; bin: f3 44 0f bd d1 + + ; asm: tzcntl %esi, %ecx + [-,%rcx] v206 = ctz v2 ; bin: f3 0f bc ce + ; asm: tzcntl %r10d, %esi + [-,%rsi] v207 = ctz v3 ; bin: f3 41 0f bc f2 + ; asm: tzcntl %ecx, %r10d + [-,%r10] v208 = ctz v1 ; bin: f3 44 0f bc d1 + + ; Integer comparisons. + + ; asm: cmpl %esi, %ecx + ; asm: sete %bl + [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 + ; asm: cmpl %r10d, %esi + ; asm: sete %dl + [-,%rdx] v301 = icmp eq v2, v3 ; bin: 44 39 d6 0f 94 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setne %bl + [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 + ; asm: cmpl %r10d, %esi + ; asm: setne %dl + [-,%rdx] v303 = icmp ne v2, v3 ; bin: 44 39 d6 0f 95 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setl %bl + [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 + ; asm: cmpl %r10d, %esi + ; asm: setl %dl + [-,%rdx] v305 = icmp slt v2, v3 ; bin: 44 39 d6 0f 9c c2 + + ; asm: cmpl %esi, %ecx + ; asm: setge %bl + [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 + ; asm: cmpl %r10d, %esi + ; asm: setge %dl + [-,%rdx] v307 = icmp sge v2, v3 ; bin: 44 39 d6 0f 9d c2 + + ; asm: cmpl %esi, %ecx + ; asm: setg %bl + [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 + ; asm: cmpl %r10d, %esi + ; asm: setg %dl + [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 44 39 d6 0f 9f c2 + + ; asm: cmpl %esi, %ecx + ; asm: setle %bl + [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 + ; asm: cmpl %r10d, %esi + ; asm: setle %dl + [-,%rdx] v311 = icmp sle v2, v3 ; bin: 44 39 d6 0f 9e c2 + + ; asm: cmpl %esi, %ecx + ; asm: setb %bl + [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 + ; asm: cmpl %r10d, %esi + ; asm: setb %dl + [-,%rdx] v313 = icmp ult v2, v3 ; bin: 44 39 d6 0f 92 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setae %bl + [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 + ; asm: cmpl %r10d, %esi + ; asm: setae %dl + [-,%rdx] v315 = icmp uge v2, v3 ; bin: 44 39 d6 0f 93 c2 + + ; asm: cmpl %esi, %ecx + ; asm: seta %bl + [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 + ; asm: cmpl %r10d, %esi + ; asm: seta %dl + [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 44 39 d6 0f 97 c2 + + ; asm: cmpl %esi, %ecx + ; asm: setbe %bl + [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 + ; asm: cmpl %r10d, %esi + ; asm: setbe %dl + [-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2 + + ; asm: cmpl $37, %ecx + ; asm: setl %bl + [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3 + + ; asm: cmpl $100000, %ecx + ; asm: setl %bl + [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3 + + ; Bool-to-int conversions. + + ; asm: movzbl %bl, %ecx + [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb + ; asm: movzbl %dl, %esi + [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 + + ; Spill / Fill. + + ; asm: movl %ecx, 1032(%rsp) + [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 + ; asm: movl %esi, 1032(%rsp) + [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 + ; asm: movl %r10d, 1032(%rsp) + [-,ss1] v502 = spill v3 ; bin: stk_ovf 44 89 94 24 00000408 + + ; asm: movl 1032(%rsp), %ecx + [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 + ; asm: movl 1032(%rsp), %esi + [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 + ; asm: movl 1032(%rsp), %r10d + [-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408 + + ; asm: movl %ecx, 1032(%rsp) + regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 + ; asm: movl 1032(%rsp), %ecx + regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 + + ; asm: cmpl %esi, %ecx + [-,%rflags] v520 = ifcmp v1, v2 ; bin: 39 f1 + ; asm: cmpl %r10d, %esi + [-,%rflags] v521 = ifcmp v2, v3 ; bin: 44 39 d6 + + ; asm: cmpl $-100, %ecx + [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 83 f9 9c + ; asm: cmpl $100, %r10d + [-,%rflags] v523 = ifcmp_imm v3, 100 ; bin: 41 83 fa 64 + + ; asm: cmpl $-10000, %ecx + [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 + ; asm: cmpl $10000, %r10d + [-,%rflags] v525 = ifcmp_imm v3, 10000 ; bin: 41 81 fa 00002710 + + ; asm: shll $2, %esi + [-,%rsi] v526 = ishl_imm v2, 2 ; bin: c1 e6 02 + ; asm: shll $12, %r10d + [-,%r10] v527 = ishl_imm v3, 12 ; bin: 41 c1 e2 0c + ; asm: sarl $5, %esi + [-,%rsi] v529 = sshr_imm v2, 5 ; bin: c1 fe 05 + ; asm: sarl $32, %r10d + [-,%r10] v530 = sshr_imm v3, 32 ; bin: 41 c1 fa 20 + ; asm: shrl $8, %esi + [-,%rsi] v532 = ushr_imm v2, 8 ; bin: c1 ee 08 + ; asm: shrl $31, %r10d + [-,%r10] v533 = ushr_imm v3, 31 ; bin: 41 c1 ea 1f + + ; asm: testl %ecx, %ecx + ; asm: je block1x + brz v1, block1 ; bin: 85 c9 74 18 + fallthrough block3 + +block3: + ; asm: testl %esi, %esi + ; asm: je block1x + brz v2, block1 ; bin: 85 f6 74 14 + fallthrough block4 + +block4: + ; asm: testl %r10d, %r10d + ; asm: je block1x + brz v3, block1 ; bin: 45 85 d2 74 0f + fallthrough block5 + +block5: + ; asm: testl %ecx, %ecx + ; asm: jne block1x + brnz v1, block1 ; bin: 85 c9 75 0b + fallthrough block6 + +block6: + ; asm: testl %esi, %esi + ; asm: jne block1x + brnz v2, block1 ; bin: 85 f6 75 07 + fallthrough block7 + +block7: + ; asm: testl %r10d, %r10d + ; asm: jne block1x + brnz v3, block1 ; bin: 45 85 d2 75 02 + + ; asm: jmp block2x + jump block2 ; bin: eb 01 + + ; asm: block1x: +block1: + return ; bin: c3 + + ; asm: block2x: +block2: + ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. + ; asm: notl %ecx + [-,%rcx] v5000 = bnot v1 ; bin: f7 d1 + jump block1 ; bin: eb fb + +} + +; Tests for i32/i8 conversion instructions. +function %I32_I8() { +block0: + [-,%rcx] v1 = iconst.i32 1 + [-,%rsi] v2 = iconst.i32 2 + [-,%r10] v3 = iconst.i32 3 + + [-,%rcx] v11 = ireduce.i8 v1 ; bin: + [-,%rsi] v12 = ireduce.i8 v2 ; bin: + [-,%r10] v13 = ireduce.i8 v3 ; bin: + + ; asm: movsbl %cl, %esi + [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 + ; asm: movsbl %sil, %r10d + [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f be d6 + ; asm: movsbl %r10b, %ecx + [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f be ca + + ; asm: movzbl %cl, %esi + [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 + ; asm: movzbl %sil, %r10d + [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b6 d6 + ; asm: movzbl %r10b, %ecx + [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b6 ca + + trap user0 ; bin: user0 0f 0b +} + +; Tests for i32/i16 conversion instructions. +function %I32_I16() { +block0: + [-,%rcx] v1 = iconst.i32 1 + [-,%rsi] v2 = iconst.i32 2 + [-,%r10] v3 = iconst.i32 3 + + [-,%rcx] v11 = ireduce.i16 v1 ; bin: + [-,%rsi] v12 = ireduce.i16 v2 ; bin: + [-,%r10] v13 = ireduce.i16 v3 ; bin: + + ; asm: movswl %cx, %esi + [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 + ; asm: movswl %si, %r10d + [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f bf d6 + ; asm: movswl %r10w, %ecx + [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f bf ca + + ; asm: movzwl %cx, %esi + [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 + ; asm: movzwl %si, %r10d + [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b7 d6 + ; asm: movzwl %r10w, %ecx + [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b7 ca + + trap user0 ; bin: user0 0f 0b +} + +; Tests for i64/i8 conversion instructions. +function %I64_I8() { +block0: + [-,%rcx] v1 = iconst.i64 1 + [-,%rsi] v2 = iconst.i64 2 + [-,%r10] v3 = iconst.i64 3 + + [-,%rcx] v11 = ireduce.i8 v1 ; bin: + [-,%rsi] v12 = ireduce.i8 v2 ; bin: + [-,%r10] v13 = ireduce.i8 v3 ; bin: + + ; asm: movsbq %cl, %rsi + [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f be f1 + ; asm: movsbq %sil, %r10 + [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f be d6 + ; asm: movsbq %r10b, %rcx + [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f be ca + + ; asm: movzbl %cl, %esi + [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b6 f1 + ; asm: movzbl %sil, %r10d + [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b6 d6 + ; asm: movzbl %r10b, %ecx + [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b6 ca + + trap user0 ; bin: user0 0f 0b +} + +; Tests for i64/i16 conversion instructions. +function %I64_I16() { +block0: + [-,%rcx] v1 = iconst.i64 1 + [-,%rsi] v2 = iconst.i64 2 + [-,%r10] v3 = iconst.i64 3 + + [-,%rcx] v11 = ireduce.i16 v1 ; bin: + [-,%rsi] v12 = ireduce.i16 v2 ; bin: + [-,%r10] v13 = ireduce.i16 v3 ; bin: + + ; asm: movswq %cx, %rsi + [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f bf f1 + ; asm: movswq %si, %r10 + [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f bf d6 + ; asm: movswq %r10w, %rcx + [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f bf ca + + ; asm: movzwl %cx, %esi + [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b7 f1 + ; asm: movzwl %si, %r10d + [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b7 d6 + ; asm: movzwl %r10w, %ecx + [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b7 ca + + trap user0 ; bin: user0 0f 0b +} + +; Tests for i64/i32 conversion instructions. +function %I64_I32() { +block0: + [-,%rcx] v1 = iconst.i64 1 + [-,%rsi] v2 = iconst.i64 2 + [-,%r10] v3 = iconst.i64 3 + + [-,%rcx] v11 = ireduce.i32 v1 ; bin: + [-,%rsi] v12 = ireduce.i32 v2 ; bin: + [-,%r10] v13 = ireduce.i32 v3 ; bin: + + ; asm: movslq %ecx, %rsi + [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 63 f1 + ; asm: movslq %esi, %r10 + [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 63 d6 + ; asm: movslq %r10d, %rcx + [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 63 ca + + ; asm: movl %ecx, %esi + [-,%rsi] v30 = uextend.i64 v11 ; bin: 89 ce + ; asm: movl %esi, %r10d + [-,%r10] v31 = uextend.i64 v12 ; bin: 41 89 f2 + ; asm: movl %r10d, %ecx + [-,%rcx] v32 = uextend.i64 v13 ; bin: 44 89 d1 + + trap user0 ; bin: user0 0f 0b +} + +; Tests for i64 jump table instructions. +function %I64_JT(i64 [%rdi]) { + jt0 = jump_table [block1, block2, block3] + +block0(v0: i64 [%rdi]): + ; Note: The next two lines will need to change whenever instructions are + ; added or removed from this test. + [-, %rax] v1 = jump_table_base.i64 jt0 ; bin: 48 8d 05 00000039 PCRelRodata4(jt0) + [-, %r10] v2 = jump_table_base.i64 jt0 ; bin: 4c 8d 15 00000032 PCRelRodata4(jt0) + + [-, %rbx] v10 = iconst.i64 1 + [-, %r13] v11 = iconst.i64 2 + + [-, %rax] v20 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 48 63 04 98 + [-, %rax] v21 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 49 63 04 9a + [-, %rax] v22 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4a 63 04 a8 + [-, %rax] v23 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4b 63 04 aa + + [-, %r10] v30 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 4c 63 14 98 + [-, %r10] v31 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 4d 63 14 9a + [-, %r10] v32 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4e 63 14 a8 + [-, %r10] v33 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4f 63 14 aa + + fallthrough block10 + +block10: + indirect_jump_table_br v10, jt0 ; bin: ff e3 +block11: + indirect_jump_table_br v11, jt0 ; bin: 41 ff e5 + +block1: + fallthrough block2 +block2: + fallthrough block3 +block3: + trap user0 +} + +function %r12_r13_loads() { +block0: + [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef + [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 + [-,%rax] v3 = iconst.i64 0x1 + + ;; Simple GPR load. + ; asm: movq (%r12), %rdx + [-,%rdx] v4 = load.i64 notrap v1 ; bin: 49 8b 14 24 + ; asm: movq (%r13), %rdx + [-,%rdx] v5 = load.i64 notrap v2 ; bin: 49 8b 55 00 + + ;; Load with disp8. + ; asm: movq 0x1(%r12), %rdx + [-,%rdx] v6 = load.i64 notrap v1+1 ; bin: 49 8b 54 24 01 + ; asm: movq 0x1(%r13), %rdx + [-,%rdx] v7 = load.i64 notrap v2+1 ; bin: 49 8b 55 01 + + ;; Load with disp32. + ; asm: movq 0x100(%r12), %rdx + [-,%rdx] v8 = load.i64 notrap v1+256 ; bin: 49 8b 94 24 00000100 + ; asm: movq 0x100(%r13), %rdx + [-,%rdx] v9 = load.i64 notrap v2+256 ; bin: 49 8b 95 00000100 + + ;; Load for base+index. + ; asm: movq (%r12, %rax, 1), %rdx + [-,%rdx] v10 = load_complex.i64 notrap v1+v3 ; bin: 49 8b 14 04 + ; asm: movq (%r13, %rax, 1), %rdx + [-,%rdx] v11 = load_complex.i64 notrap v2+v3 ; bin: 49 8b 54 05 00 + + ;; Now for FP values. + ; asm: movss (%r12), %xmm0 + [-,%xmm0] v12 = load.f32 notrap v1 ; bin: f3 41 0f 10 04 24 + ; asm: movss (%r13), %xmm0 + [-,%xmm0] v13 = load.f32 notrap v2 ; bin: f3 41 0f 10 45 00 + + ;; Load with disp8. + ; asm: movss 0x1(%r12), %xmm0 + [-,%xmm0] v14 = load.f32 notrap v1+1 ; bin: f3 41 0f 10 44 24 01 + ; asm: movss 0x1(%r13), %xmm0 + [-,%xmm0] v15 = load.f32 notrap v2+1 ; bin: f3 41 0f 10 45 01 + + ;; Load with disp32. + ; asm: movss 0x100(%r12), %xmm0 + [-,%xmm0] v16 = load.f32 notrap v1+256 ; bin: f3 41 0f 10 84 24 00000100 + ; asm: movss 0x100(%r13), %xmm0 + [-,%xmm0] v17 = load.f32 notrap v2+256 ; bin: f3 41 0f 10 85 00000100 + + ;; Load for base+index. + ; asm: movss (%r12, %rax, 1), %xmm0 + [-,%xmm0] v18 = load_complex.f32 notrap v1+v3 ; bin: f3 41 0f 10 04 04 + ; asm: movss (%r13, %rax, 1), %xmm0 + [-,%xmm0] v19 = load_complex.f32 notrap v2+v3 ; bin: f3 41 0f 10 44 05 00 + + return +} + +function %r12_r13_stores() { +block0: + [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef + [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 + [-,%rax] v3 = iconst.i64 0x1 + [-,%xmm0] v4 = f32const 0x1.0 + + ;; Simple GPR load. + ; asm: movq %rax, (%r12) + store notrap v3, v1; bin: 49 89 04 24 + ; asm: movq (%r13), %rdx + store notrap v3, v2; bin: 49 89 45 00 + + ; asm: movq %rax, 0x1(%r12) + store notrap v3, v1+1; bin: 49 89 44 24 01 + ; asm: movq %rax, 0x1(%r13) + store notrap v3, v2+1; bin: 49 89 45 01 + + ; asm: movq %rax, 0x100(%r12) + store notrap v3, v1+256; bin: 49 89 84 24 00000100 + ; asm: movq %rax, 0x100(%r13) + store notrap v3, v2+256; bin: 49 89 85 00000100 + + ; asm: movq %rax, (%r12, %rax, 1) + store_complex notrap v3, v1+v3; bin: 49 89 04 04 + ; asm: movq %rax, (%r13, %rax, 1) + store_complex notrap v3, v2+v3; bin: 49 89 44 05 00 + + ; asm: movb %al, (%r12) + istore8 notrap v3, v1; bin: 41 88 04 24 + ; asm: movb %al, (%r13) + istore8 notrap v3, v2; bin: 41 88 45 00 + + ; asm: movb %al, 0x1(%r12) + istore8 notrap v3, v1+1; bin: 41 88 44 24 01 + ; asm: movb %al, 0x1(%r13) + istore8 notrap v3, v2+1; bin: 41 88 45 01 + + ; asm: movb %al, 0x100(%r12) + istore8 notrap v3, v1+256; bin: 41 88 84 24 00000100 + ; asm: movb %al, 0x100(%r13) + istore8 notrap v3, v2+256; bin: 41 88 85 00000100 + + ; asm: movb %al, (%r12, %rax, 1) + istore8_complex notrap v3, v1+v3; bin: 41 88 04 04 + ; asm: movb %al, (%r13, %rax, 1) + istore8_complex notrap v3, v2+v3; bin: 41 88 44 05 00 + + ; asm: movss %xmm0, (%r12) + store notrap v4, v1; bin: f3 41 0f 11 04 24 + ; asm: movss %xmm0, (%r13) + store notrap v4, v2; bin: f3 41 0f 11 45 00 + + ; asm: movss %xmm0, 0x1(%r12) + store notrap v4, v1+1; bin: f3 41 0f 11 44 24 01 + ; asm: movss %xmm0, 0x1(%r13) + store notrap v4, v2+1; bin: f3 41 0f 11 45 01 + + ; asm: movss %xmm0, 0x100(%r12) + store notrap v4, v1+256; bin: f3 41 0f 11 84 24 00000100 + ; asm: movss %xmm0, 0x100(%r13) + store notrap v4, v2+256; bin: f3 41 0f 11 85 00000100 + + ; asm: movss %xmm0, (%r12, %rax, 1) + store_complex notrap v4, v1+v3; bin: f3 41 0f 11 04 04 + ; asm: movss %xmm0, (%r13, %rax, 1) + store_complex notrap v4, v2+v3; bin: f3 41 0f 11 44 05 00 + + return +} + +function %B64() { +block0: + [-, %rax] v1 = bconst.b64 true ; bin: 40 b8 00000001 + [-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001 + return +} + +function %V128() { +block0: + [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 + [-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33) + store v4, v3 ; bin: heap_oob 45 0f 11 0a + + [-, %r11] v5 = iconst.i64 0x1234 + [-, %xmm2] v6 = load.i32x4 v5 ; bin: heap_oob 41 0f 10 13 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif b/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif new file mode 100644 index 0000000000..4d3fe2ef57 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/bitrev-i128-run.clif @@ -0,0 +1,46 @@ +test run +target x86_64 + +function %reverse_bits_zero() -> b1 { +block0: + v0 = iconst.i64 0 + v1 = iconcat v0, v0 + v2 = bitrev.i128 v1 + v3 = icmp eq v2, v1 + return v3 +} +; run + +function %reverse_bits_one() -> b1 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i64 1 + v2 = iconcat v0, v1 + + v3 = bitrev.i128 v2 + + v4 = iconst.i64 0x8000_0000_0000_0000 + v5 = iconst.i64 0 + v6 = iconcat v4, v5 + + v7 = icmp eq v3, v6 + return v7 +} +; run + +function %reverse_bits() -> b1 { +block0: + v0 = iconst.i64 0x06AD_8667_69EC_41BA + v1 = iconst.i64 0x6C83_D81A_6E28_83AB + v2 = iconcat v0, v1 + + v3 = bitrev.i128 v2 + + v4 = iconst.i64 0xD5C11476581BC136 + v5 = iconst.i64 0x5D823796E661B560 + v6 = iconcat v4, v5 + + v7 = icmp eq v3, v6 + return v7 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/br-i128-run.clif b/cranelift/filetests/filetests/isa/x86/br-i128-run.clif new file mode 100644 index 0000000000..95a1de81cf --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/br-i128-run.clif @@ -0,0 +1,38 @@ +test run +target x86_64 + +function %br_false() -> b1 { +block0: + v10 = iconst.i64 0x42 + v11 = iconst.i64 0x00 + v0 = iconcat v10, v11 + brz v0, block2 + jump block1 + +block1: + v1 = bconst.b1 true + return v1 + +block2: + v2 = bconst.b1 false + return v2 +} +; run + +function %br_true() -> b1 { +block0: + v10 = iconst.i64 0x00 + v11 = iconst.i64 0x00 + v0 = iconcat v10, v11 + brz v0, block2 + jump block1 + +block1: + v1 = bconst.b1 false + return v1 + +block2: + v2 = bconst.b1 true + return v2 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/br-i128.clif b/cranelift/filetests/filetests/isa/x86/br-i128.clif new file mode 100644 index 0000000000..a1778f4cf5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/br-i128.clif @@ -0,0 +1,42 @@ +test compile +target x86_64 + +function u0:0(i128) -> i8 fast { +block0(v0: i128): + brz v0, block2 + ; check: v0 = iconcat v3, v4 + ; nextln: v5 = icmp_imm eq v3, 0 + ; nextln: v6 = icmp_imm eq v4, 0 + ; nextln: v7 = band v5, v6 + ; nextln: brnz v7, block2 + jump block1 + +block1: + v1 = iconst.i8 0 + return v1 + +block2: + v2 = iconst.i8 1 + return v2 +} + +function u0:1(i128) -> i8 fast { +block0(v0: i128): + brnz v0, block2 + ; check: v0 = iconcat v3, v4 + ; nextln: brnz v3, block2 + ; nextln: fallthrough block3 + + ; check: block3: + ; nextln: brnz.i64 v4, block2 + jump block1 + ; nextln: fallthrough block1 + +block1: + v1 = iconst.i8 0 + return v1 + +block2: + v2 = iconst.i8 1 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif b/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif new file mode 100644 index 0000000000..c8520830e6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/brz-i8-run.clif @@ -0,0 +1,34 @@ +test run +target x86_64 + +function u0:0() -> b1 { +block0: + v0 = iconst.i8 0 + brz v0, block1 + jump block2 + +block1: + v1 = bconst.b1 true + return v1 + +block2: + v2 = bconst.b1 false + return v2 +} +; run + +function u0:1() -> b1 { +block0: + v0 = iconst.i8 0 + brnz v0, block1 + jump block2 + +block1: + v1 = bconst.b1 false + return v1 + +block2: + v2 = bconst.b1 true + return v2 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8.clif b/cranelift/filetests/filetests/isa/x86/brz-i8.clif new file mode 100644 index 0000000000..6c2f95c359 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/brz-i8.clif @@ -0,0 +1,38 @@ +test compile +target x86_64 + +function u0:0() -> b1 { +block0: + v0 = iconst.i8 0 + ; check: v0 = iconst.i8 0 + brz v0, block1 + ; nextln: v3 = uextend.i32 v0 + ; nextln: brz v3, block1 + jump block2 + +block1: + v1 = bconst.b1 true + return v1 + +block2: + v2 = bconst.b1 false + return v2 +} + +function u0:1() -> b1 { +block0: + v0 = iconst.i8 0 + ; check: v0 = iconst.i8 0 + brnz v0, block1 + ; nextln: v3 = uextend.i32 v0 + ; nextln: brnz v3, block1 + jump block2 + +block1: + v1 = bconst.b1 false + return v1 + +block2: + v2 = bconst.b1 true + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/compile-vconst.clif b/cranelift/filetests/filetests/isa/x86/compile-vconst.clif new file mode 100644 index 0000000000..f2cb9259e6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/compile-vconst.clif @@ -0,0 +1,16 @@ +test compile +set enable_simd=true +set enable_probestack=false +target x86_64 haswell + +; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) +function %test_vconst_i32() -> i32x4 baldrdash_system_v { +block0: + v0 = vconst.i32x4 0x1234 + return v0 +} + +; check: block0: +; nextln: v0 = vconst.i32x4 0x1234 +; nextln: return v0 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif b/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif new file mode 100644 index 0000000000..3626e5ebf4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/extend-i128-run.clif @@ -0,0 +1,26 @@ +test run +target x86_64 + +function u0:0() -> b1 { +block0: + v0 = iconst.i64 0xffff_ffff_eeee_0000 + v1 = uextend.i128 v0 + v2, v3 = isplit v1 + v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 + v5 = icmp_imm eq v3, 0 + v6 = band v4, v5 + return v6 +} +; run + +function u0:1() -> b1 { +block0: + v0 = iconst.i64 0xffff_ffff_eeee_0000 + v1 = sextend.i128 v0 + v2, v3 = isplit v1 + v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 + v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff + v6 = band v4, v5 + return v6 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128.clif b/cranelift/filetests/filetests/isa/x86/extend-i128.clif new file mode 100644 index 0000000000..db2b53276a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/extend-i128.clif @@ -0,0 +1,37 @@ +test compile +target x86_64 + +function u0:0() -> b1 { +block0: + v0 = iconst.i64 0xffff_ffff_eeee_0000 + ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 + ; nextln: v2 -> v0 + v1 = uextend.i128 v0 + ; nextln: v7 = iconst.i64 0 + ; nextln: v3 -> v7 + ; nextln: v1 = iconcat v0, v7 + + v2, v3 = isplit v1 + v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 + v5 = icmp_imm eq v3, 0 + + v6 = band v4, v5 + return v6 +} + +function u0:1() -> b1 { +block0: + v0 = iconst.i64 0xffff_ffff_eeee_0000 + ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 + ; nextln: v2 -> v0 + v1 = sextend.i128 v0 + ; nextln: v8 = copy v0 + ; nextln: v7 = sshr_imm v8, 63 + ; nextln: v3 -> v7 + + v2, v3 = isplit v1 + v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 + v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff + v6 = band v4, v5 + return v6 +} diff --git a/cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif b/cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif new file mode 100644 index 0000000000..84140a23bd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/extractlane-binemit.clif @@ -0,0 +1,38 @@ +test binemit +set enable_simd +target x86_64 haswell + +; for extractlane, floats are legalized differently than integers and booleans; integers and +; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested + +function %test_extractlane_b8() { +block0: +[-, %rax] v0 = bconst.b8 true +[-, %xmm0] v1 = splat.b8x16 v0 +[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a + return +} + +function %test_extractlane_i16() { +block0: +[-, %rax] v0 = iconst.i16 4 +[-, %xmm1] v1 = splat.i16x8 v0 +[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 + return +} + +function %test_extractlane_i32() { +block0: +[-, %rax] v0 = iconst.i32 42 +[-, %xmm4] v1 = splat.i32x4 v0 +[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02 + return +} + +function %test_extractlane_b64() { +block0: +[-, %rax] v0 = bconst.b64 false +[-, %xmm2] v1 = splat.b64x2 v0 +[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/extractlane-run.clif b/cranelift/filetests/filetests/isa/x86/extractlane-run.clif new file mode 100644 index 0000000000..4e1d735bfe --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/extractlane-run.clif @@ -0,0 +1,68 @@ +test run +set enable_simd + +function %test_extractlane_b8() -> b8 { +block0: + v1 = vconst.b8x16 [false false false false false false false false false false true false false + false false false] + v2 = extractlane v1, 10 + return v2 +} +; run + +function %test_extractlane_i16() -> b1 { +block0: + v0 = vconst.i16x8 0x00080007000600050004000300020001 + v1 = extractlane v0, 1 + v2 = icmp_imm eq v1, 2 + return v2 +} +; run + +function %test_extractlane_f32() -> b1 { +block0: + v0 = f32const 0x42.42 + v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42] + v2 = extractlane v1, 3 + v3 = fcmp eq v2, v0 + return v3 +} +; run + +function %test_extractlane_i32_with_vector_reuse() -> b1 { +block0: + v0 = iconst.i32 42 + v1 = iconst.i32 99 + + v2 = splat.i32x4 v0 + v3 = insertlane v2, 2, v1 + + v4 = extractlane v3, 3 + v5 = icmp eq v4, v0 + + v6 = extractlane v3, 2 + v7 = icmp eq v6, v1 + + v8 = band v5, v7 + return v8 +} +; run + +function %test_extractlane_f32_with_vector_reuse() -> b1 { +block0: + v0 = f32const 0x42.42 + v1 = f32const 0x99.99 + + v2 = splat.f32x4 v0 + v3 = insertlane v2, 2, v1 + + v4 = extractlane v3, 3 + v5 = fcmp eq v4, v0 + + v6 = extractlane v3, 2 + v7 = fcmp eq v6, v1 + + v8 = band v5, v7 + return v8 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif new file mode 100644 index 0000000000..4d736287e0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif @@ -0,0 +1,17 @@ +; Check that floating-point and integer constants equal to zero are optimized correctly. +test binemit +target i686 + +function %foo() -> f32 fast { +block0: + ; asm: xorps %xmm0, %xmm0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 + return v0 +} + +function %bar() -> f64 fast { +block0: + ; asm: xorpd %xmm0, %xmm0 + [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif new file mode 100644 index 0000000000..25cd686996 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif @@ -0,0 +1,31 @@ +; Check that floating-point constants equal to zero are optimized correctly. +test binemit +target x86_64 + +function %zero_const_32bit_no_rex() -> f32 fast { +block0: + ; asm: xorps %xmm0, %xmm0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 + return v0 +} + +function %zero_const_32bit_rex() -> f32 fast { +block0: + ; asm: xorps %xmm8, %xmm8 + [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 + return v1 +} + +function %zero_const_64bit_no_rex() -> f64 fast { +block0: + ; asm: xorpd %xmm0, %xmm0 + [-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 + return v0 +} + +function %zero_const_64bit_rex() -> f64 fast { +block0: + ; asm: xorpd %xmm8, %xmm8 + [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif new file mode 100644 index 0000000000..493d2e6365 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif @@ -0,0 +1,25 @@ +test compile +target x86_64 + +function u0:0() -> i128 system_v { +block0: + v0 = iconst.i64 0 + v1 = iconst.i64 0 + v2 = iconcat v0, v1 + jump block5 + +block2: + jump block4(v27) + +block4(v23: i128): + return v23 + +block5: + v27 = bxor.i128 v2, v2 + v32 = iconst.i32 0 + brz v32, block2 + jump block6 + +block6: + trap user0 +} diff --git a/cranelift/filetests/filetests/isa/x86/i128.clif b/cranelift/filetests/filetests/isa/x86/i128.clif new file mode 100644 index 0000000000..028fb6e551 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/i128.clif @@ -0,0 +1,46 @@ +test compile +target x86_64 + +function u0:0(i64, i64) -> i128 fast { +block0(v0: i64, v1: i64): +;check: block0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]): + + v2 = iconcat.i64 v0, v1 + ; check: regmove v0, %rdi -> %rax + ; check: regmove v1, %rsi -> %rdx + + return v2 + ; check: v4 = x86_pop.i64 + ; check: return v0, v1, v4 +} + +function u0:1(i128) -> i64, i64 fast { +block0(v0: i128): +; check: block0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]): + + v1, v2 = isplit v0 + ; check: regmove v3, %rdi -> %rax + ; check: regmove v4, %rsi -> %rdx + + return v1, v2 + ; check: v6 = x86_pop.i64 + ; check: return v3, v4, v6 +} + +function u0:2(i64, i128) fast { +; check: block0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]): +block0(v0: i64, v1: i128): + ; check: store v2, v0+8 + ; check: store v3, v0+16 + store v1, v0+8 + return +} + +function u0:3(i64) -> i128 fast { +block0(v0: i64): + ; check: v2 = load.i64 v0+8 + ; check: v3 = load.i64 v0+16 + v1 = load.i128 v0+8 + ; check: return v2, v3, v5 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/icmp-compile.clif b/cranelift/filetests/filetests/isa/x86/icmp-compile.clif new file mode 100644 index 0000000000..4a4ac0fc59 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/icmp-compile.clif @@ -0,0 +1,35 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %icmp_i8x16() { +block0: +[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db +[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 +[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc + return +} + +function %icmp_i16x8() { +block0: +[-, %xmm0] v0 = vconst.i16x8 0x00 +[-, %xmm7] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 75 c7 + return +} + +function %icmp_i32x4() { +block0: +[-, %xmm0] v0 = vconst.i32x4 0x00 +[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 + return +} + +function %icmp_i64x2() { +block0: +[-, %xmm0] v0 = vconst.i64x2 0x00 +[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff +[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 38 29 c1 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/icmp-i128.clif b/cranelift/filetests/filetests/isa/x86/icmp-i128.clif new file mode 100644 index 0000000000..dce0e1db87 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/icmp-i128.clif @@ -0,0 +1,52 @@ +test run +target x86_64 haswell + +function %test_icmp_eq_i128() -> b1 { +block0: + v11 = iconst.i64 0x0 + v12 = iconst.i64 0x0 + v1 = iconcat v11, v12 + v21 = iconst.i64 0x0 + v22 = iconst.i64 0x0 + v2 = iconcat v21, v22 + v10 = icmp.i128 eq v1, v2 + return v10 +} + +; run + +function %test_icmp_imm_eq_i128() -> b1 { +block0: + v11 = iconst.i64 0x0 + v12 = iconst.i64 0x0 + v1 = iconcat v11, v12 + v10 = icmp_imm.i128 eq v1, 0x0 + return v10 +} + +; run + +function %test_icmp_ne_i128() -> b1 { +block0: + v11 = iconst.i64 0x0 + v12 = iconst.i64 0x0 + v1 = iconcat v11, v12 + v21 = iconst.i64 0x0 + v22 = iconst.i64 0x1 + v2 = iconcat v21, v22 + v10 = icmp.i128 ne v1, v2 + return v10 +} + +; run + +function %test_icmp_imm_ne_i128() -> b1 { +block0: + v11 = iconst.i64 0x0 + v12 = iconst.i64 0x0 + v1 = iconcat v11, v12 + v10 = icmp_imm.i128 ne v1, 0x1 + return v10 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/icmp-run.clif b/cranelift/filetests/filetests/isa/x86/icmp-run.clif new file mode 100644 index 0000000000..0820cac013 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/icmp-run.clif @@ -0,0 +1,24 @@ +test run +set enable_simd + +function %run_icmp_i8x16() -> b8 { +block0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 0x00 + v2 = icmp eq v0, v1 + v3 = extractlane v2, 0 + return v3 +} + +; run + +function %run_icmp_i64x2() -> b64 { +block0: + v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + v2 = icmp eq v0, v1 + v3 = extractlane v2, 1 + return v3 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/imul-i128.clif b/cranelift/filetests/filetests/isa/x86/imul-i128.clif new file mode 100644 index 0000000000..65d21463fd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/imul-i128.clif @@ -0,0 +1,20 @@ +test run +target x86_64 haswell + +function %test_imul_i128() -> b1 { +block0: + v11 = iconst.i64 0xf2347ac4503f1e24 + v12 = iconst.i64 0x0098fe985354ab06 + v1 = iconcat v11, v12 + v21 = iconst.i64 0xf606ba453589ef89 + v22 = iconst.i64 0x042e1f3054ca7432 + v2 = iconcat v21, v22 + v31 = iconst.i64 0xbe2044b2742ebd44 + v32 = iconst.i64 0xa363ce3b6849f307 + v3 = iconcat v31, v32 + v4 = imul v1, v2 + v5 = icmp eq v3, v4 + return v5 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/insertlane-binemit.clif b/cranelift/filetests/filetests/isa/x86/insertlane-binemit.clif new file mode 100644 index 0000000000..4be35a47b3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/insertlane-binemit.clif @@ -0,0 +1,42 @@ +test binemit +set enable_simd +target x86_64 haswell + +; for insertlane, floats are legalized differently than integers and booleans; integers and +; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested + +function %test_insertlane_b8() { +block0: +[-, %rax] v0 = bconst.b8 true +[-, %rbx] v1 = bconst.b8 false +[-, %xmm0] v2 = splat.b8x16 v0 +[-, %xmm0] v3 = x86_pinsr v2, 10, v1 ; bin: 66 0f 3a 20 c3 0a + return +} + +function %test_insertlane_i16() { +block0: +[-, %rax] v0 = iconst.i16 4 +[-, %rbx] v1 = iconst.i16 5 +[-, %xmm1] v2 = splat.i16x8 v0 +[-, %xmm1] v3 = x86_pinsr v2, 4, v1 ; bin: 66 0f c4 cb 04 + return +} + +function %test_insertlane_i32() { +block0: +[-, %rax] v0 = iconst.i32 42 +[-, %rbx] v1 = iconst.i32 99 +[-, %xmm4] v2 = splat.i32x4 v0 +[-, %xmm4] v3 = x86_pinsr v2, 2, v1 ; bin: 66 0f 3a 22 e3 02 + return +} + +function %test_insertlane_b64() { +block0: +[-, %rax] v0 = bconst.b64 true +[-, %rbx] v1 = bconst.b64 false +[-, %xmm2] v2 = splat.b64x2 v0 +[-, %xmm2] v3 = x86_pinsr v2, 1, v1 ; bin: 66 48 0f 3a 22 d3 01 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/insertlane-run.clif b/cranelift/filetests/filetests/isa/x86/insertlane-run.clif new file mode 100644 index 0000000000..8f1cd7ef46 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/insertlane-run.clif @@ -0,0 +1,48 @@ +test run +set enable_simd + +; TODO once SIMD vector comparison is implemented, remove use of extractlane below + +function %test_insertlane_b8() -> b8 { +block0: + v1 = bconst.b8 true + v2 = vconst.b8x16 [false false false false false false false false false false false false false + false false false] + v3 = insertlane v2, 10, v1 + v4 = extractlane v3, 10 + return v4 +} +; run + +function %test_insertlane_f32() -> b1 { +block0: + v0 = f32const 0x42.42 + v1 = vconst.f32x4 0x00 + v2 = insertlane v1, 1, v0 + v3 = extractlane v2, 1 + v4 = fcmp eq v3, v0 + return v4 +} +; run + +function %test_insertlane_f64_lane1() -> b1 { +block0: + v0 = f64const 0x42.42 + v1 = vconst.f64x2 0x00 + v2 = insertlane v1, 1, v0 + v3 = extractlane v2, 1 + v4 = fcmp eq v3, v0 + return v4 +} +; run + +function %test_insertlane_f64_lane0() -> b1 { +block0: + v0 = f64const 0x42.42 + v1 = vconst.f64x2 0x00 + v2 = insertlane v1, 0, v0 + v3 = extractlane v2, 0 + v4 = fcmp eq v3, v0 + return v4 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif new file mode 100644 index 0000000000..2a283af485 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif @@ -0,0 +1,8 @@ +test compile +target x86_64 + +function u0:0(i16) -> i8 fast { +block0(v0: i16): + v1 = ireduce.i8 v0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif new file mode 100644 index 0000000000..c3ace05158 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif @@ -0,0 +1,20 @@ +test compile +target x86_64 + +function u0:0(i64, i64) -> i128 system_v { +block0(v0: i64, v1: i64): + trap user0 + +block30: + v245 = iconst.i64 0 + v246 = iconcat v245, v245 + ; The next instruction used to be legalized twice, causing a panic the second time. + v250, v251 = isplit.i128 v370 + v252, v253 = isplit v246 + trap user0 + +block45: + v369 = iconst.i64 0 + v370 = load.i128 v369 + trap user0 +} diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif new file mode 100644 index 0000000000..018ac95fbc --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif @@ -0,0 +1,14 @@ +test compile +set opt_level=speed_and_size +target x86_64 + +function u0:0(i8) -> i8 fast { +block0(v0: i8): + v1 = iconst.i8 0 + v2 = isub v1, v0 + ; check: v3 = uextend.i32 v0 + ; nextln: v5 = iconst.i32 0 + ; nextln = isub v5, v3 + ; nextln = ireduce.i8 v4 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif new file mode 100644 index 0000000000..19f22c3906 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif @@ -0,0 +1,10 @@ +test compile +target x86_64 + +function u0:0(i128) system_v { +block0(v0: i128): + jump block1(v0) + +block1(v1: i128): + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif new file mode 100644 index 0000000000..dec3416a89 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif @@ -0,0 +1,10 @@ +test compile + +target x86_64 + +function u0:0() -> i8 fast { +block0: + v14 = bconst.b1 false + v15 = bint.i8 v14 + return v15 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif new file mode 100644 index 0000000000..dbd1397e45 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif @@ -0,0 +1,28 @@ +test compile + +target x86_64 + +function u0:51(i64, i64) system_v { + ss0 = explicit_slot 0 + ss1 = explicit_slot 1 + ss2 = explicit_slot 1 + ss3 = explicit_slot 1 + +block0(v0: i64, v1: i64): + v2 = stack_addr.i64 ss1 + v3 = load.i8 v1 + store v3, v2 + v4 = stack_addr.i64 ss2 + v5 = stack_addr.i64 ss3 + jump block1 + +block1: + v6 = load.i8 v2 + store v6, v5 + v7 = load.i8 v5 + v8 = bnot v7 + store v8, v4 + v9 = load.i8 v4 + store v9, v0 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif new file mode 100644 index 0000000000..5c4004a539 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif @@ -0,0 +1,46 @@ +test legalizer + +target x86_64 + +function %br_icmp(i64) fast { +block0(v0: i64): + v1 = iconst.i64 0 + br_icmp eq v0, v1, block1 + jump block1 + +block1: + return +} + +; sameln: function %br_icmp(i64 [%rdi]) fast { +; nextln: block0(v0: i64): +; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 +; nextln: [DynRexOp1icscc#8039] v2 = icmp eq v0, v1 +; nextln: [RexOp1t8jccb#75] brnz v2, block1 +; nextln: [Op1jmpb#eb] jump block1 +; nextln: +; nextln: block1: +; nextln: [Op1ret#c3] return +; nextln: } + + +function %br_icmp_args(i64) fast { +block0(v0: i64): + v1 = iconst.i64 0 + br_icmp eq v0, v1, block1(v0) + jump block1(v0) + +block1(v2: i64): + return +} + +; sameln: function %br_icmp_args(i64 [%rdi]) fast { +; nextln: block0(v0: i64): +; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 +; nextln: [DynRexOp1icscc#8039] v3 = icmp eq v0, v1 +; nextln: [RexOp1t8jccb#75] brnz v3, block1(v0) +; nextln: [Op1jmpb#eb] jump block1(v0) +; nextln: +; nextln: block1(v2: i64): +; nextln: [Op1ret#c3] return +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif new file mode 100644 index 0000000000..b9ed036755 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif @@ -0,0 +1,31 @@ +test compile +set opt_level=speed_and_size +target x86_64 +; regex: V=v\d+ +; regex: BB=block\d+ + +function u0:0(i64) system_v { + ss0 = explicit_slot 1 + jt0 = jump_table [block1] + +block0(v0: i64): + v1 = stack_addr.i64 ss0 + v2 = load.i8 v1 + br_table v2, block2, jt0 +; check: $(oob=$V) = ifcmp_imm $(idx=$V), 1 +; block2 is replaced by block1 by fold_redundant_jump +; nextln: brif uge $oob, block1 +; nextln: fallthrough $(inb=$BB) +; check: $inb: +; nextln: $(final_idx=$V) = uextend.i64 $idx +; nextln: $(base=$V) = jump_table_base.i64 jt0 +; nextln: $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0 +; nextln: $(addr=$V) = iadd $base, $rel_addr +; nextln: indirect_jump_table_br $addr, jt0 + +block2: + jump block1 + +block1: + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif new file mode 100644 index 0000000000..2c8c8612d6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif @@ -0,0 +1,36 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) fast { +fn0 = %black_box(i8) +ss0 = explicit_slot 1 ; black box + +block0(v0: i8, v1: i8): + v99 = stack_addr.i64 ss0 + + ; check: istore8 $(V), $(V) + + v2 = band v0, v1 + store v2, v99 + v3 = bor v0, v1 + store v3, v99 + v4 = bxor v0, v1 + store v4, v99 + v5 = bnot v0 + store v5, v99 + v6 = band_not v0, v1 + store v6, v99 + v7 = bor_not v0, v1 + store v7, v99 + v8 = bxor_not v0, v1 + store v8, v99 + v9 = band_imm v0, 42 + store v9, v99 + v10 = bor_imm v0, 42 + store v10, v99 + v11 = bxor_imm v0, 42 + store v11, v99 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-call.clif b/cranelift/filetests/filetests/isa/x86/legalize-call.clif new file mode 100644 index 0000000000..c761a8d5aa --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-call.clif @@ -0,0 +1,14 @@ +; Test legalization of a non-colocated call in 64-bit non-PIC mode. +test legalizer +set opt_level=speed_and_size +target x86_64 haswell + +function %call() { + fn0 = %foo() +block0: + call fn0() + return +} + +; check: v0 = func_addr.i64 fn0 +; nextln: call_indirect sig0, v0() diff --git a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif new file mode 100644 index 0000000000..8e63f1e0c6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif @@ -0,0 +1,25 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8) -> i8, i8 fast { +block0(v0: i8): + v1 = clz v0 + ; check: v3 = uextend.i32 v0 + ; nextln: v6 = iconst.i32 -1 + ; nextln: v7 = iconst.i32 31 + ; nextln: v8, v9 = x86_bsr v3 + ; nextln: v10 = selectif.i32 eq v9, v6, v8 + ; nextln: v4 = isub v7, v10 + ; nextln: v5 = iadd_imm v4, -24 + ; nextln: v1 = ireduce.i8 v5 + v2 = ctz v0 + ; nextln: v11 = uextend.i32 v0 + ; nextln: v12 = bor_imm v11, 256 + ; nextln: v14 = iconst.i32 32 + ; nextln: v15, v16 = x86_bsf v12 + ; nextln: v13 = selectif.i32 eq v16, v14, v15 + ; nextln: v2 = ireduce.i8 v13 + return v1, v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif new file mode 100644 index 0000000000..3ee674d66a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif @@ -0,0 +1,133 @@ +; Test the custom legalizations. +test legalizer +target i686 +target x86_64 + +; regex: V=v\d+ +; regex: BB=block\d+ + +function %cond_trap(i32) { +block0(v1: i32): + trapz v1, user67 + return + ; check: block0(v1: i32 + ; nextln: $(f=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $f, user67 + ; nextln: return +} + +function %cond_trap2(i32) { +block0(v1: i32): + trapnz v1, int_ovf + return + ; check: block0(v1: i32 + ; nextln: $(f=$V) = ifcmp_imm v1, 0 + ; nextln: trapif ne $f, int_ovf + ; nextln: return +} + +function %cond_trap_b1(i32) { +block0(v1: i32): + v2 = icmp_imm eq v1, 6 + trapz v2, user7 + return + ; check: block0(v1: i32 + ; check: brnz v2, $(new=$BB) + ; check: jump $(trap=$BB) + ; check: $trap: + ; nextln: trap user7 + ; check: $new: + ; nextln: return +} + +function %cond_trap2_b1(i32) { +block0(v1: i32): + v2 = icmp_imm eq v1, 6 + trapnz v2, user9 + return + ; check: block0(v1: i32 + ; check: brz v2, $(new=$BB) + ; check: jump $(trap=$BB) + ; check: $trap: + ; nextln: trap user9 + ; check: $new: + ; nextln: return +} + +function %f32const() -> f32 { +block0: + v1 = f32const 0x1.0p1 + ; check: $(tmp=$V) = iconst.i32 + ; check: v1 = bitcast.f32 $tmp + return v1 +} + +function %select_f64(f64, f64, i32) -> f64 { +block0(v0: f64, v1: f64, v2: i32): + v3 = select v2, v0, v1 + ; check: brnz v2, $(new=$BB)(v0) + ; nextln: jump $new(v1) + ; check: $new(v3: f64): + ; nextln: return v3 + return v3 +} + +function %f32_min(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 + ; check: $(vnat=$V) = x86_fmin.f32 v0, v1 + ; nextln: jump $(done=$BB)($vnat) + + ; check: $(uno=$BB): + ; nextln: $(vuno=$V) = fadd.f32 v0, v1 + ; nextln: jump $(done=$BB)($vuno) + + ; check: $(ueq=$BB): + ; check: $(veq=$V) = bor.f32 v0, v1 + ; nextln: jump $(done=$BB)($veq) + + ; check: $done(v2: f32): + ; nextln: return v2 +} + +function %ineg_legalized_i8() { +block0: + v0 = iconst.i8 1 + v1 = ineg v0 + ; check: v2 = iconst.i32 1 + ; nextln: v0 = ireduce.i8 v2 + ; nextln: v3 = iconst.i8 0 + ; nextln: v4 = uextend.i32 v3 + ; nextln: v5 = uextend.i32 v0 + ; nextln: v6 = isub v4, v5 + ; nextln: v1 = ireduce.i8 v6 + + return +} + +function %ineg_legalized_i16() { +block0: + v0 = iconst.i16 1 + v1 = ineg v0 + ; check: v2 = iconst.i32 1 + ; nextln: v0 = ireduce.i16 v2 + ; nextln: v3 = iconst.i16 0 + ; nextln: v4 = uextend.i32 v3 + ; nextln: v5 = uextend.i32 v0 + ; nextln: v6 = isub v4, v5 + ; nextln: v1 = ireduce.i16 v6 + + return +} + +function %ineg_legalized_i32() { +block0: + v0 = iconst.i32 1 + v1 = ineg v0 + ; check: v0 = iconst.i32 1 + ; nextln: v2 = iconst.i32 0 + ; nextln: v1 = isub v2, v0 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif new file mode 100644 index 0000000000..1be81ec186 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif @@ -0,0 +1,192 @@ +; Test the division legalizations. +test legalizer +; See also legalize-div.clif. +set avoid_div_traps=1 +target x86_64 + +; regex: V=v\d+ +; regex: BB=block\d+ + +function %udiv(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = udiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + +function %udiv_0(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = udiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + +function %udiv_minus_1(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = udiv v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + +function %urem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = urem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $r +} + +function %urem_0(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = urem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $r +} + +function %urem_minus_1(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = urem v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $r +} + +function %sdiv(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = sdiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$BB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$BB)($q) + ; check: $m1: + ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 + ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin + ; nextln: trapif eq $fm, int_ovf + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} + +function %sdiv_0(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = sdiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; check: $(hi=$V) = sshr_imm + ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + return v2 + ; nextln: return v2 +} + +function %sdiv_minus_1(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = sdiv v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$BB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$BB)($q) + ; check: $m1: + ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 + ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin + ; nextln: trapif eq $fm, int_ovf + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} + +; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. +; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. +function %srem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = srem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$BB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$BB)($r) + ; check: $m1: + ; nextln: $(zero=$V) = iconst.i64 0 + ; nextln: jump $(done=$BB)($zero) + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} + +function %srem_0(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = srem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + return v2 + ; nextln: return v2 +} + +function %srem_minus_1(i64) -> i64 { +block0(v0: i64): + ; check: block0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = srem v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$BB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$BB)($r) + ; check: $m1: + ; nextln: $(zero=$V) = iconst.i64 0 + ; nextln: jump $(done=$BB)($zero) + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div.clif b/cranelift/filetests/filetests/isa/x86/legalize-div.clif new file mode 100644 index 0000000000..b9f115b85b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-div.clif @@ -0,0 +1,57 @@ +; Test the division legalizations. +test legalizer +; See also legalize-div-traps.clif. +set avoid_div_traps=0 +target x86_64 + +; regex: V=v\d+ +; regex: BB=block\d+ + +function %udiv(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = udiv v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + +function %urem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = urem v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $r +} + +function %sdiv(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = sdiv v0, v1 + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + +; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. +; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. +function %srem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + ; check: block0( + v2 = srem v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$BB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$BB)($r) + ; check: $m1: + ; nextln: $(zero=$V) = iconst.i64 0 + ; nextln: jump $(done=$BB)($zero) + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif new file mode 100644 index 0000000000..382c6ba80a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif @@ -0,0 +1,13 @@ +; Test the legalization of f64const. +test legalizer +target x86_64 + +; regex: V=v\d+ + +function %f64const() -> f64 { +block0: + v1 = f64const 0x1.0p1 + ; check: $(tmp=$V) = iconst.i64 + ; check: v1 = bitcast.f64 $tmp + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif new file mode 100644 index 0000000000..c11e77f2c7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif @@ -0,0 +1,14 @@ +test compile +target x86_64 + +function u0:0(i16) -> f64 fast { +block0(v0: i16): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +function u0:1(i16) -> f64 fast { +block0(v0: i16): + v1 = fcvt_from_sint.f64 v0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif new file mode 100644 index 0000000000..5fb080f8a6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif @@ -0,0 +1,122 @@ +test legalizer +target x86_64 + +; Test legalization for various forms of heap addresses. +; regex: BB=block\d+ + +function %heap_addrs(i32, i64, i64 vmctx) { + gv4 = vmctx + gv0 = iadd_imm.i64 gv4, 64 + gv1 = iadd_imm.i64 gv4, 72 + gv2 = iadd_imm.i64 gv4, 80 + gv3 = load.i32 notrap aligned gv4+88 + + heap0 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + heap1 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32 + heap2 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i64 + heap3 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i64 + heap4 = dynamic gv1, min 0x1_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 + heap5 = dynamic gv1, bound gv3, offset_guard 0x1000, index_type i32 + heap6 = dynamic gv1, min 0x1_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 + heap7 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i64 + + ; check: heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 + ; check: heap1 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i32 + ; check: heap2 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i64 + ; check: heap3 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i64 + ; check: heap4 = dynamic gv1, min 0x0001_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 + ; check: heap5 = dynamic gv1, min 0, bound gv3, offset_guard 4096, index_type i32 + ; check: heap6 = dynamic gv1, min 0x0001_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 + ; check: heap7 = dynamic gv1, min 0, bound gv2, offset_guard 4096, index_type i64 + +block0(v0: i32, v1: i64, v3: i64): + ; The fast-path; 32-bit index, static heap with a sufficient bound, no bounds check needed! + v4 = heap_addr.i64 heap0, v0, 0 + ; check: v12 = uextend.i64 v0 + ; check: v13 = iadd_imm v3, 64 + ; check: v4 = iadd v13, v12 + + v5 = heap_addr.i64 heap1, v0, 0 + ; check: v14 = icmp_imm ugt v0, 0x0001_0000 + ; check: brz v14, $(resume_1=$BB) + ; nextln: jump $(trap_1=$BB) + ; check: $trap_1: + ; nextln: trap heap_oob + ; check: $resume_1: + ; check: v15 = uextend.i64 v0 + ; check: v16 = iadd_imm.i64 v3, 64 + ; check: v5 = iadd v16, v15 + + v6 = heap_addr.i64 heap2, v1, 0 + ; check: v19 = iconst.i64 0x0001_0000_0000 + ; check: v17 = icmp.i64 ugt v1, v19 + ; check: brz v17, $(resume_2=$BB) + ; nextln: jump $(trap_2=$BB) + ; check: $trap_2: + ; nextln: trap heap_oob + ; check: $resume_2: + ; check: v18 = iadd_imm.i64 v3, 64 + ; check: v6 = iadd v18, v1 + + v7 = heap_addr.i64 heap3, v1, 0 + ; check: v20 = icmp_imm.i64 ugt v1, 0x0001_0000 + ; check: brz v20, $(resume_3=$BB) + ; nextln: jump $(trap_3=$BB) + ; check: $trap_3: + ; nextln: trap heap_oob + ; check: $resume_3: + ; check: v21 = iadd_imm.i64 v3, 64 + ; check: v7 = iadd v21, v1 + + v8 = heap_addr.i64 heap4, v0, 0 + ; check: v22 = load.i32 notrap aligned v3+88 + ; check: v23 = iadd_imm v22, 0 + ; check: v24 = icmp.i32 ugt v0, v23 + ; check: brz v24, $(resume_4=$BB) + ; nextln: jump $(trap_4=$BB) + ; check: $trap_4: + ; nextln: trap heap_oob + ; check: $resume_4: + ; check: v25 = uextend.i64 v0 + ; check: v26 = iadd_imm.i64 v3, 72 + ; check: v8 = iadd v26, v25 + + v9 = heap_addr.i64 heap5, v0, 0 + ; check: v27 = load.i32 notrap aligned v3+88 + ; check: v28 = iadd_imm v27, 0 + ; check: v29 = icmp.i32 ugt v0, v28 + ; check: brz v29, $(resume_5=$BB) + ; nextln: jump $(trap_5=$BB) + ; check: $trap_5: + ; nextln: trap heap_oob + ; check: $resume_5: + ; check: v30 = uextend.i64 v0 + ; check: v31 = iadd_imm.i64 v3, 72 + ; check: v9 = iadd v31, v30 + + v10 = heap_addr.i64 heap6, v1, 0 + ; check: v32 = iadd_imm.i64 v3, 80 + ; check: v33 = iadd_imm v32, 0 + ; check: v34 = icmp.i64 ugt v1, v33 + ; check: brz v34, $(resume_6=$BB) + ; nextln: jump $(trap_6=$BB) + ; check: $trap_6: + ; nextln: trap heap_oob + ; check: $resume_6: + ; check: v35 = iadd_imm.i64 v3, 72 + ; check: v10 = iadd v35, v1 + + v11 = heap_addr.i64 heap7, v1, 0 + ; check: v36 = iadd_imm.i64 v3, 80 + ; check: v37 = iadd_imm v36, 0 + ; check: v38 = icmp.i64 ugt v1, v37 + ; check: brz v38, $(resume_7=$BB) + ; nextln: jump $(trap_7=$BB) + ; check: $trap_7: + ; nextln: trap heap_oob + ; check: $resume_7: + ; check: v39 = iadd_imm.i64 v3, 72 + ; check: v11 = iadd v39, v1 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif new file mode 100644 index 0000000000..81a2d1ecdd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif @@ -0,0 +1,20 @@ +; Test the legalization of i128 instructions on x86_64. +test legalizer +target x86_64 haswell + +; regex: V=v\d+ + +function %imul(i128, i128) -> i128 { +block0(v1: i128, v2: i128): + v10 = imul v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb + ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb + ; nextln: $(v13=$V) = iadd $v11, $v12 + ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd $v13, $v14 + ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif new file mode 100644 index 0000000000..94fbc82015 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif @@ -0,0 +1,357 @@ +; Test the legalization of i64 instructions on x86_32. +test legalizer +target i686 haswell + +; regex: V=v\d+ + +function %iadd(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v10 = iadd v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v10_lsb=$V), $(carry=$V) = iadd_ifcout $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd_ifcin $v1_msb, $v2_msb, $carry + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} + +function %isub(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v10 = isub v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v10_lsb=$V), $(borrow=$V) = isub_ifbout $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = isub_ifbin $v1_msb, $v2_msb, $borrow + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} + +function %imul(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v10 = imul v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb + ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb + ; nextln: $(v13=$V) = iadd $v11, $v12 + ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = iadd $v13, $v14 + ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb + ; nextln: v10 = iconcat $v10_lsb, $v10_msb + return v10 +} + +function %icmp_eq(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp eq v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb + ; nextln: v10 = band $v10_lsb, $v10_msb + return v10 +} + +function %icmp_imm_eq(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm eq v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb + ; nextln: v10 = band $v10_lsb, $v10_msb + return v10 +} + +function %icmp_ne(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp ne v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb + ; nextln: v10 = bor $v10_lsb, $v10_msb + return v10 +} + +function %icmp_imm_ne(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm ne v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb + ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb + ; nextln: v10 = bor $v10_lsb, $v10_msb + return v10 +} + +function %icmp_sgt(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp sgt v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_sgt(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm sgt v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_sge(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp sge v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_sge(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm sge v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_slt(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp slt v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_slt(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm slt v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_sle(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp sle v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_sle(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm sle v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_ugt(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp ugt v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_ugt(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm ugt v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_uge(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp uge v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_uge(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm uge v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_ult(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp ult v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_ult(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm ult v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_ule(i64, i64) -> b1 { +block0(v1: i64, v2: i64): + v10 = icmp ule v1, v2 + ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) + ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) + ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %icmp_imm_ule(i64) -> b1 { +block0(v1: i64): + v10 = icmp_imm ule v1, 0 + ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) + ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) + ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) + ; nextln: $(v2_lsb=$V) = iconst.i32 0 + ; nextln: $(v2_msb=$V) = iconst.i32 0 + ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb + ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb + ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb + ; nextln: $(c1=$V) = bnot $b2 + ; nextln: $(c2=$V) = band $c1, $b3 + ; nextln: v10 = bor $b1, $c2 + return v10 +} + +function %ineg_legalized_i64() { +block0: + v0 = iconst.i64 1 + v1 = ineg v0 + ; check: v2 = iconst.i32 1 + ; nextln: v3 = iconst.i32 0 + ; nextln: v0 = iconcat v2, v3 + ; nextln: v5 = iconst.i32 0 + ; nextln: v6 = iconst.i32 0 + ; nextln: v4 = iconcat v5, v6 + ; nextln: v7, v8 = isub_ifbout v5, v2 + ; nextln: v9 = isub_ifbin v6, v3, v8 + ; nextln: v1 = iconcat v7, v9 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif new file mode 100644 index 0000000000..2519d3b484 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif @@ -0,0 +1,19 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) -> i8 fast { +block0(v0: i8, v1: i8): + v2 = icmp_imm sle v0, 0 + ; check: $(e1=$V) = sextend.i32 v0 + ; nextln: v2 = icmp_imm sle $e1, 0 + v3 = bint.i8 v2 + v4 = icmp eq v0, v1 + ; check: $(e2=$V) = uextend.i32 v0 + ; nextln: $(e3=$V) = uextend.i32 v1 + ; nextln: v4 = icmp eq $e2, $e3 + v5 = bint.i8 v4 + v6 = iadd v3, v5 + return v6 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif new file mode 100644 index 0000000000..39908d1f1d --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif @@ -0,0 +1,18 @@ +test compile + +target x86_64 + +function u0:0(i64) system_v { + ss0 = explicit_slot 0 + +block0(v0: i64): + jump block1 + +block1: +; _0 = const 42u8 + v1 = iconst.i8 42 + store v1, v0 +; +; return + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif new file mode 100644 index 0000000000..6902636008 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif @@ -0,0 +1,11 @@ +test compile + +target x86_64 + +function u0:0(i64, i8, i8) system_v { + +block0(v0: i64, v1: i8, v2: i8): + v11 = imul v1, v2 + store v11, v0 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif new file mode 100644 index 0000000000..82d3fa26ce --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif @@ -0,0 +1,15 @@ +test compile + +target x86_64 + +function u0:0(i64, i8) system_v { + ss0 = explicit_slot 1 + +block0(v0: i64, v1: i8): + v3 = stack_addr.i64 ss0 + v5 = load.i8 v3 + v6 = iconst.i8 2 + v7 = imul_imm v5, 42 + store v7, v0 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif new file mode 100644 index 0000000000..fac17d6ff1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif @@ -0,0 +1,13 @@ +; Test the custom legalization of ineg.i64 on x86_64. +test legalizer +target x86_64 + +function %ineg_legalized_i64() { +block0: + v0 = iconst.i64 1 + v1 = ineg v0 + ; check: v0 = iconst.i64 1 + ; nextln: v2 = iconst.i64 0 + ; nextln: v1 = isub v2, v0 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif new file mode 100644 index 0000000000..5a903350b5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif @@ -0,0 +1,24 @@ +test compile +target x86_64 + +function u0:0(i128) -> i64, i64 fast { +; check: block0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]): +block0(v0: i128): + jump block2 + +block1: + ; When this `isplit` is legalized, the bnot below is not yet legalized, + ; so there isn't a corresponding `iconcat` yet. We should try legalization + ; for this `isplit` again once all instrucions have been legalized. + v2, v3 = isplit.i128 v1 + ; return v6, v7 + return v2, v3 + +block2: + ; check: v6 = bnot.i64 v4 + ; check: v2 -> v6 + ; check: v7 = bnot.i64 v5 + ; check: v3 -> v7 + v1 = bnot.i128 v0 + jump block1 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif new file mode 100644 index 0000000000..8ddb0865f8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif @@ -0,0 +1,15 @@ +test legalizer + +; Pre-SSE 4.1, we need to use runtime library calls for floating point rounding operations. +set is_pic +target x86_64 + +function %floor(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} +; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] fast { +; check: sig0 = (f32 [%xmm0]) -> f32 [%xmm0] system_v +; check: fn0 = %FloorF32 sig0 +; check: v1 = call fn0(v0) diff --git a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif new file mode 100644 index 0000000000..2fcb086e72 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif @@ -0,0 +1,31 @@ +test compile + +target x86_64 + +function u0:0(i64, i8, i8) system_v { + ss0 = explicit_slot 0 + ss1 = explicit_slot 1 + ss2 = explicit_slot 1 + ss3 = explicit_slot 1 + ss4 = explicit_slot 1 + +block0(v0: i64, v1: i8, v2: i8): + v3 = stack_addr.i64 ss1 + store v1, v3 + v4 = stack_addr.i64 ss2 + store v2, v4 + v5 = stack_addr.i64 ss3 + v6 = stack_addr.i64 ss4 + jump block1 + +block1: + v7 = load.i8 v3 + store v7, v5 + v8 = load.i8 v4 + store v8, v6 + v9 = load.i8 v5 + v10 = load.i8 v6 + v11 = imul v9, v10 + store v11, v0 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif new file mode 100644 index 0000000000..78d1796d00 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif @@ -0,0 +1,114 @@ +; Test the legalization of memory objects. +test legalizer +target x86_64 + +; regex: V=v\d+ +; regex: BB=block\d+ + +function %vmctx(i64 vmctx) -> i64 { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, -16 + +block1(v1: i64): + v2 = global_value.i64 gv1 + ; check: v2 = iadd_imm v1, -16 + return v2 + ; check: return v2 +} + +function %load(i64 vmctx) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0-16 + gv2 = iadd_imm.i64 gv1, 32 + +block1(v1: i64): + v2 = global_value.i64 gv2 + ; check: $(p1=$V) = load.i64 notrap aligned v1-16 + ; check: v2 = iadd_imm $p1, 32 + return v2 + ; check: return v2 +} + +function %symbol() -> i64 { + gv0 = symbol %something + gv1 = symbol u123:456 + +block1: + v0 = global_value.i64 gv0 + ; check: v0 = symbol_value.i64 gv0 + v1 = global_value.i64 gv1 + ; check: v1 = symbol_value.i64 gv1 + v2 = bxor v0, v1 + return v2 +} + +; SpiderMonkey VM-style static 4+2 GB heap. +; This eliminates bounds checks completely for offsets < 2GB. +function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 64 + heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v999: i64): + ; check: block0( + v1 = heap_addr.i64 heap0, v0, 1 + ; Boundscheck should be eliminated. + ; Checks here are assuming that no pipehole opts fold the load offsets. + ; nextln: $(xoff=$V) = uextend.i64 v0 + ; nextln: $(hbase=$V) = iadd_imm v999, 64 + ; nextln: v1 = iadd $hbase, $xoff + v2 = load.f32 v1+16 + ; nextln: v2 = load.f32 v1+16 + v3 = load.f32 v1+20 + ; nextln: v3 = load.f32 v1+20 + v4 = fadd v2, v3 + return v4 +} + +function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 64 + heap0 = static gv1, min 0x1000, bound 0x1000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v999: i64): + ; Everything after the obviously OOB access should be eliminated, leaving + ; the `trap heap_oob` instruction as the terminator of the block and moving + ; the remainder of the instructions into an inaccessible block. + ; check: block0( + ; nextln: trap heap_oob + ; check: block1: + ; nextln: v1 = iconst.i64 0 + ; nextln: v2 = load.f32 v1+16 + ; nextln: return v2 + ; nextln: } + v1 = heap_addr.i64 heap0, v0, 0x1000_0001 + v2 = load.f32 v1+16 + return v2 +} + + +; SpiderMonkey VM-style static 4+2 GB heap. +; Offsets >= 2 GB do require a boundscheck. +function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 64 + heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v999: i64): + ; check: block0( + v1 = heap_addr.i64 heap0, v0, 0x8000_0000 + ; Boundscheck code + ; check: $(oob=$V) = icmp + ; nextln: brz $oob, $(ok=$BB) + ; nextln: jump $(trap_oob=$BB) + ; check: $trap_oob: + ; nextln: trap heap_oob + ; check: $ok: + ; Checks here are assuming that no pipehole opts fold the load offsets. + ; nextln: $(xoff=$V) = uextend.i64 v0 + ; nextln: $(hbase=$V) = iadd_imm.i64 v999, 64 + ; nextln: v1 = iadd $hbase, $xoff + v2 = load.f32 v1+0x7fff_ffff + ; nextln: v2 = load.f32 v1+0x7fff_ffff + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif new file mode 100644 index 0000000000..375a454c20 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif @@ -0,0 +1,43 @@ +test compile +target x86_64 baseline + +; umulhi/smulhi on 64 bit operands + +function %i64_umulhi(i64, i64) -> i64 { +block0(v10: i64, v11: i64): + v12 = umulhi v10, v11 + ; check: %rdi -> %rax + ; check: x86_umulx + ; check: %rdx -> %rax + return v12 +} + +function %i64_smulhi(i64, i64) -> i64 { +block0(v20: i64, v21: i64): + v22 = smulhi v20, v21 + ; check: %rdi -> %rax + ; check: x86_smulx + ; check: %rdx -> %rax + return v22 +} + + +; umulhi/smulhi on 32 bit operands + +function %i32_umulhi(i32, i32) -> i32 { +block0(v30: i32, v31: i32): + v32 = umulhi v30, v31 + ; check: %rdi -> %rax + ; check: x86_umulx + ; check: %rdx -> %rax + return v32 +} + +function %i32_smulhi(i32, i32) -> i32 { +block0(v40: i32, v41: i32): + v42 = smulhi v40, v41 + ; check: %rdi -> %rax + ; check: x86_smulx + ; check: %rdx -> %rax + return v42 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif new file mode 100644 index 0000000000..c3f89c4807 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif @@ -0,0 +1,9 @@ +test compile +target x86_64 + +function u0:0(i8) -> i8 fast { +block0(v0: i8): + v1 = popcnt v0 + ; check-not: sextend.i32 v0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif new file mode 100644 index 0000000000..6f080ca89b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif @@ -0,0 +1,36 @@ +test compile + +target x86_64 + +function u0:0(i64, i64, i64) system_v { + ss0 = explicit_slot 0 + ss1 = explicit_slot 8 + ss2 = explicit_slot 8 + ss3 = explicit_slot 2 + ss4 = explicit_slot 8 + sig0 = (i64, i16, i64) system_v + fn0 = colocated u0:11 sig0 + +block0(v0: i64, v1: i64, v2: i64): + v3 = stack_addr.i64 ss1 + store v1, v3 + v4 = stack_addr.i64 ss2 + store v2, v4 + v5 = stack_addr.i64 ss3 + v6 = stack_addr.i64 ss4 + jump block1 + +block1: + v7 = load.i64 v3 + v8 = load.i16 v7 + store v8, v5 + v9 = load.i64 v4 + store v9, v6 + v10 = load.i16 v5 + v11 = load.i64 v6 + call fn0(v0, v10, v11) + jump block2 + +block2: + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif new file mode 100644 index 0000000000..78524d2969 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif @@ -0,0 +1,35 @@ +test compile +target x86_64 + +; regex: V=v\d+ +; regex: R=%[a-z0-9]+ + +function %i32_rotr(i32, i32) -> i32 fast { +block0(v0: i32, v1: i32): + ; check: regmove v1, $R -> %rcx + ; check: v2 = rotr v0, v1 + v2 = rotr v0, v1 + return v2 +} + +function %i32_rotr_imm_1(i32) -> i32 fast { +block0(v0: i32): + ; check: $V = rotr_imm v0, 1 + v2 = rotr_imm v0, 1 + return v2 +} + +function %i32_rotl(i32, i32) -> i32 fast { +block0(v0: i32, v1: i32): + ; check: regmove v1, $R -> %rcx + ; check: v2 = rotl v0, v1 + v2 = rotl v0, v1 + return v2 +} + +function %i32_rotl_imm_1(i32) -> i32 fast { +block0(v0: i32): + ; check: $V = rotl_imm v0, 1 + v2 = rotl_imm v0, 1 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif new file mode 100644 index 0000000000..ee6e3e6d11 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif @@ -0,0 +1,24 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) -> i8 fast { +block0(v0: i8, v1: i8): + v2 = ishl v0, v1 + ; check: $(e1=$V) = uextend.i32 v0 + ; check: $(r1=$V) = ishl $e1, v1 + ; check v2 = ireduce.i8 $r1 + v3 = ushr v0, v1 + ; check: $(e2=$V) = uextend.i32 v0 + ; check: $(r2=$V) = ushr $e2, v1 + ; check v2 = ireduce.i8 $r2 + v4 = sshr v0, v1 + ; check: $(e3=$V) = sextend.i32 v0 + ; check: $(r3=$V) = sshr $e3, v1 + ; check v2 = ireduce.i8 $r3 + + v5 = iadd v2, v3 + v6 = iadd v4, v5 + return v6 +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-splat.clif b/cranelift/filetests/filetests/isa/x86/legalize-splat.clif new file mode 100644 index 0000000000..2fa6ace7e9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-splat.clif @@ -0,0 +1,72 @@ +test compile +set enable_simd=true +set enable_probestack=false +target x86_64 haswell + +; use baldrdash_system_v calling convention here for simplicity (avoids prologue, epilogue) +function %test_splat_i32() -> i32x4 baldrdash_system_v { +block0: + v0 = iconst.i32 42 + v1 = splat.i32x4 v0 + return v1 +} + +; sameln: function %test_splat_i32() -> i32x4 [%xmm0] baldrdash_system_v { +; nextln: ss0 = incoming_arg 0, offset 0 +; nextln: +; nextln: block0: +; nextln: v0 = iconst.i32 42 +; nextln: v2 = scalar_to_vector.i32x4 v0 +; nextln: v1 = x86_pshufd v2, 0 +; nextln: return v1 +; nextln: } + + + +function %test_splat_i64() -> i64x2 baldrdash_system_v { +block0: + v0 = iconst.i64 42 + v1 = splat.i64x2 v0 + return v1 +} + +; check: block0: +; nextln: v0 = iconst.i64 42 +; nextln: v2 = scalar_to_vector.i64x2 v0 +; nextln: v1 = x86_pinsr v2, 1, v0 +; nextln: return v1 + + + +function %test_splat_b16() -> b16x8 baldrdash_system_v { +block0: + v0 = bconst.b16 true + v1 = splat.b16x8 v0 + return v1 +} + +; check: block0: +; nextln: v0 = bconst.b16 true +; nextln: v2 = scalar_to_vector.b16x8 v0 +; nextln: v3 = x86_pinsr v2, 1, v0 +; nextln: v4 = raw_bitcast.i32x4 v3 +; nextln: v5 = x86_pshufd v4, 0 +; nextln: v1 = raw_bitcast.b16x8 v5 +; nextln: return v1 + + + +function %test_splat_i8() -> i8x16 baldrdash_system_v { +block0: + v0 = iconst.i8 42 + v1 = splat.i8x16 v0 + return v1 +} + +; check: block0: +; nextln: v2 = iconst.i32 42 +; nextln: v0 = ireduce.i8 v2 +; nextln: v3 = scalar_to_vector.i8x16 v0 +; nextln: v4 = vconst.i8x16 0x00 +; nextln: v1 = x86_pshufb v3, v4 +; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif new file mode 100644 index 0000000000..5f4632041d --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif @@ -0,0 +1,73 @@ +test legalizer +target x86_64 + +; Test legalization for various forms of table addresses. +; regex: BB=block\d+ + +function %table_addrs(i32, i64, i64 vmctx) { + gv4 = vmctx + gv0 = iadd_imm.i64 gv4, 72 + gv1 = iadd_imm.i64 gv4, 80 + gv2 = load.i32 notrap aligned gv4+88 + + table0 = dynamic gv0, min 0x1_0000, bound gv2, element_size 1, index_type i32 + table1 = dynamic gv0, bound gv2, element_size 16, index_type i32 + table2 = dynamic gv0, min 0x1_0000, bound gv1, element_size 1, index_type i64 + table3 = dynamic gv0, bound gv1, element_size 16, index_type i64 + + ; check: table0 = dynamic gv0, min 0x0001_0000, bound gv2, element_size 1, index_type i32 + ; check: table1 = dynamic gv0, min 0, bound gv2, element_size 16, index_type i32 + ; check: table2 = dynamic gv0, min 0x0001_0000, bound gv1, element_size 1, index_type i64 + ; check: table3 = dynamic gv0, min 0, bound gv1, element_size 16, index_type i64 + +block0(v0: i32, v1: i64, v3: i64): + v4 = table_addr.i64 table0, v0, +0 + ; check: v8 = load.i32 notrap aligned v3+88 + ; check: v9 = icmp uge v0, v8 + ; check: brz v9, $(resume_1=$BB) + ; nextln: jump $(trap_1=$BB) + ; check: $trap_1: + ; nextln: trap table_oob + ; check: $resume_1: + ; check: v10 = uextend.i64 v0 + ; check: v11 = iadd_imm.i64 v3, 72 + ; check: v4 = iadd v11, v10 + + v5 = table_addr.i64 table1, v0, +0 + ; check: v12 = load.i32 notrap aligned v3+88 + ; check: v13 = icmp.i32 uge v0, v12 + ; check: brz v13, $(resume_2=$BB) + ; nextln: jump $(trap_2=$BB) + ; check: $trap_2: + ; nextln: trap table_oob + ; check: $resume_2: + ; check: v14 = uextend.i64 v0 + ; check: v15 = iadd_imm.i64 v3, 72 + ; check: v16 = ishl_imm v14, 4 + ; check: v5 = iadd v15, v16 + + v6 = table_addr.i64 table2, v1, +0 + ; check: v17 = iadd_imm.i64 v3, 80 + ; check: v18 = icmp.i64 uge v1, v17 + ; check: brz v18, $(resume_3=$BB) + ; nextln: jump $(trap_3=$BB) + ; check: $trap_3: + ; nextln: trap table_oob + ; check: $resume_3: + ; check: v19 = iadd_imm.i64 v3, 72 + ; check: v6 = iadd v19, v1 + + v7 = table_addr.i64 table3, v1, +0 + ; check: v20 = iadd_imm.i64 v3, 80 + ; check: v21 = icmp.i64 uge v1, v20 + ; check: brz v21, $(resume_4=$BB) + ; nextln: jump $(trap_4=$BB) + ; check: $trap_4: + ; nextln: trap table_oob + ; check: $resume_4: + ; check: v22 = iadd_imm.i64 v3, 72 + ; check: v23 = ishl_imm.i64 v1, 4 + ; check: v7 = iadd v22, v23 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif new file mode 100644 index 0000000000..de193c2abb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif @@ -0,0 +1,15 @@ +test compile +target x86_64 + +; regex: V=v\d+ + +function u0:0(i8, i8) -> i8 fast { +block0(v0: i8, v1: i8): + v2 = urem v0, v1 + ; check: $(a=$V) = uextend.i32 v0 + ; nextln: $(b=$V) = uextend.i32 v1 + ; nextln: $(c=$V) = iconst.i32 0 + ; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b + ; nextln: v2 = ireduce.i8 $r + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif new file mode 100644 index 0000000000..070b7459e2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif @@ -0,0 +1,16 @@ +test compile +target i686 + +function u0:0(i64, i32) system_v { +block0(v0: i64, v1: i32): + v2 = bor v0, v0 + store v2, v1 + return +} + +function u0:1(i32) -> i64 system_v { +block0(v1: i32): + v0 = load.i64 v1 + v2 = bor v0, v0 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/nop.clif b/cranelift/filetests/filetests/isa/x86/nop.clif new file mode 100644 index 0000000000..08d4fdd7a0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/nop.clif @@ -0,0 +1,10 @@ +test compile + +target x86_64 + +function %test(i32) -> i32 system_v { +block0(v0: i32): + nop + v1 = iconst.i32 42 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif new file mode 100644 index 0000000000..0f0f06e6f2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif @@ -0,0 +1,52 @@ +; Check that floating-point and integer constants equal to zero are optimized correctly. +test binemit +set opt_level=speed_and_size +target i686 + +function %foo() -> f32 fast { +block0: + ; asm: xorps %xmm0, %xmm0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 + return v0 +} + +function %bar() -> f64 fast { +block0: + ; asm: xorpd %xmm0, %xmm0 + [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 + return v1 +} + +function %zero_dword() -> i32 fast { +block0: + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i32 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i32 0 ; bin: 31 ff + return v0 +} + +function %zero_word() -> i16 fast { +block0: + ; while you may expect this to be encoded like 6631c0, aka + ; xor %ax, %ax, the upper 16 bits of the register used for + ; i16 are left undefined, so it's not wrong to clear them. + ; + ; discarding the 66 prefix is shorter, so this test expects + ; that we do so. + ; + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff + return v0 +} + +function %zero_byte() -> i8 fast { +block0: + ; asm: xor %al, %al + [-,%rax] v0 = iconst.i8 0 ; bin: 30 c0 + ; asm: xor %dh, %dh + [-,%rdi] v1 = iconst.i8 0 ; bin: 30 ff + return v0 +} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif new file mode 100644 index 0000000000..7f5890a1ae --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif @@ -0,0 +1,72 @@ +; Check that floating-point constants equal to zero are optimized correctly. +test binemit +set opt_level=speed_and_size +target x86_64 + +function %zero_const_32bit_no_rex() -> f32 fast { +block0: + ; asm: xorps %xmm0, %xmm0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 + return v0 +} + +function %zero_const_32bit_rex() -> f32 fast { +block0: + ; asm: xorps %xmm8, %xmm8 + [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 + return v1 +} + +function %zero_const_64bit_no_rex() -> f64 fast { +block0: + ; asm: xorpd %xmm0, %xmm0 + [-,%xmm0] v0 = f64const 0.0 ; bin: 66 0f 57 c0 + return v0 +} + +function %zero_const_64bit_rex() -> f64 fast { +block0: + ; asm: xorpd %xmm8, %xmm8 + [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 + return v1 +} + +function %imm_zero_register() -> i64 fast { +block0: + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i64 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i64 0 ; bin: 31 ff + ; asm: xor %r8, r8 + [-,%r8] v2 = iconst.i64 0 ; bin: 45 31 c0 + ; asm: xor %r15, %r15 + [-,%r15] v4 = iconst.i64 0 ; bin: 45 31 ff + return v0 +} + +function %zero_word() -> i16 fast { +block0: + ; while you may expect this to be encoded like 6631c0, aka + ; xor %ax, %ax, the upper 16 bits of the register used for + ; i16 are left undefined, so it's not wrong to clear them. + ; + ; discarding the 66 prefix is shorter, so this test expects + ; that we do so. + ; + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff + return v0 +} + +function %zero_byte() -> i8 fast { +block0: + ; asm: xor %r8b, %r8b + [-,%r15] v0 = iconst.i8 0 ; bin: 45 30 ff + ; asm: xor %al, %al + [-,%rax] v1 = iconst.i8 0 ; bin: 30 c0 + ; asm: xor %dh, %dh + [-,%rdi] v2 = iconst.i8 0 ; bin: 30 ff + return v0 +} diff --git a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif new file mode 100644 index 0000000000..f4bbc2501b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif @@ -0,0 +1,74 @@ +test compile + +set enable_pinned_reg=true +set use_pinned_reg_as_heap_base=true +set opt_level=speed_and_size + +target x86_64 + +; regex: V=v\d+ + +; r15 is the pinned heap register. It must not be rewritten, so it must not be +; used as a tied output register. +function %tied_input() -> i64 system_v { +block0: + v1 = get_pinned_reg.i64 + v2 = iadd_imm v1, 42 + return v2 +} + +; check: ,%r15] +; sameln: v1 = get_pinned_reg.i64 +; nextln: regmove v1, %r15 -> %rax +; nextln: ,%rax] +; sameln: iadd_imm v1, 42 + +;; It musn't be used even if this is a tied input used twice. +function %tied_twice() -> i64 system_v { +block0: + v1 = get_pinned_reg.i64 + v2 = iadd v1, v1 + return v2 +} + +; check: ,%r15] +; sameln: v1 = get_pinned_reg.i64 +; nextln: regmove v1, %r15 -> %rax +; nextln: ,%rax] +; sameln: iadd v1, v1 + +function %uses() -> i64 system_v { +block0: + v1 = get_pinned_reg.i64 + v2 = iadd_imm v1, 42 + v3 = get_pinned_reg.i64 + v4 = iadd v2, v3 + return v4 +} + +; check: ,%r15] +; sameln: v1 = get_pinned_reg.i64 +; nextln: regmove v1, %r15 -> %rax +; nextln: ,%rax] +; sameln: iadd_imm v1, 42 +; nextln: ,%r15 +; sameln: v3 = get_pinned_reg.i64 +; nextln: ,%rax] +; sameln: iadd v2, v3 + +; When the pinned register is used as the heap base, the final load instruction +; must use the %r15 register, since x86 implements the complex addressing mode. +function u0:1(i64 vmctx) -> i64 system_v { + gv0 = vmctx + heap0 = static gv0, min 0x000a_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v42: i64): + v5 = iconst.i32 42 + v6 = heap_addr.i64 heap0, v5, 0 + v7 = load.i64 v6 + return v7 +} + +; check: ,%r15] +; sameln: $(heap_base=$V) = get_pinned_reg.i64 +; nextln: load_complex.i64 $heap_base+ diff --git a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif new file mode 100644 index 0000000000..17be399a4e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif @@ -0,0 +1,28 @@ +test compile +set use_colocated_libcalls=1 +set probestack_func_adjusts_sp=1 +target x86_64 + +; Like %big in probestack.clif, but with the probestack function adjusting +; the stack pointer itself. + +function %big() system_v { + ss0 = explicit_slot 300000 +block0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 300000, offset -300016 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) probestack +; nextln: fn0 = colocated %Probestack sig0 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 +; nextln: [Op1call_id#e8] call fn0(v1) +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 +; nextln: [RexOp1popq#58,%rbp] v2 = x86_pop.i64 +; nextln: [Op1ret#c3] return v2 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif new file mode 100644 index 0000000000..433c634cab --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif @@ -0,0 +1,24 @@ +test compile +set use_colocated_libcalls=1 +set enable_probestack=0 +target x86_64 + +; Like %big in probestack.clif, but with probes disabled. + +function %big() system_v { + ss0 = explicit_slot 300000 +block0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 300000, offset -300016 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 0x0004_93e0 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif new file mode 100644 index 0000000000..9af61f0586 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif @@ -0,0 +1,27 @@ +test compile +target x86_64 + +; Like %big in probestack.clif, but without a colocated libcall. + +function %big() system_v { + ss0 = explicit_slot 300000 +block0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 300000, offset -300016 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack +; nextln: fn0 = %Probestack sig0 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 +; nextln: [RexOp1fnaddr8#80b8,%r11] v2 = func_addr.i64 fn0 +; nextln: [RexOp1call_r#20ff,%rax] v3 = call_indirect sig0, v2(v1) +; nextln: [RexOp1adjustsp#8029] adjust_sp_down v3 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 +; nextln: [RexOp1popq#58,%rbp] v4 = x86_pop.i64 +; nextln: [Op1ret#c3] return v4 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-size.clif b/cranelift/filetests/filetests/isa/x86/probestack-size.clif new file mode 100644 index 0000000000..8eb934b06c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/probestack-size.clif @@ -0,0 +1,74 @@ +test compile +set use_colocated_libcalls=1 +set probestack_size_log2=13 +target x86_64 + +; Like %big in probestack.clif, but now the probestack size is bigger +; and it no longer needs a probe. + +function %big() system_v { + ss0 = explicit_slot 4097 +block0: + return +} + +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 4097, offset -4113 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4112 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } + + +; Like %big; still doesn't need a probe. + +function %bigger() system_v { + ss0 = explicit_slot 8192 +block0: + return +} + +; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 8192, offset -8208 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 8192 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8192 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } + + +; Like %bigger; this needs a probe. + +function %biggest() system_v { + ss0 = explicit_slot 8193 +block0: + return +} + +; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 8193, offset -8209 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack +; nextln: fn0 = colocated %Probestack sig0 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 8208 +; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) +; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8208 +; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 +; nextln: [Op1ret#c3] return v3 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack.clif b/cranelift/filetests/filetests/isa/x86/probestack.clif new file mode 100644 index 0000000000..d9f29a8681 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/probestack.clif @@ -0,0 +1,49 @@ +test compile +set use_colocated_libcalls=1 +target x86_64 + +; A function with a big stack frame. This should have a stack probe. + +function %big() system_v { + ss0 = explicit_slot 4097 +block0: + return +} +; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 4097, offset -4113 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack +; nextln: fn0 = colocated %Probestack sig0 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 4112 +; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) +; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 +; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 +; nextln: [Op1ret#c3] return v3 +; nextln: } + + +; A function with a small enough stack frame. This shouldn't have a stack probe. + +function %small() system_v { + ss0 = explicit_slot 4096 +block0: + return +} + +; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { +; nextln: ss0 = explicit_slot 4096, offset -4112 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: [RexOp1pushq#50] x86_push v0 +; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp +; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4096 +; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4096 +; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 +; nextln: [Op1ret#c3] return v1 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif new file mode 100644 index 0000000000..25118ca72b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif @@ -0,0 +1,256 @@ +test compile +set opt_level=speed_and_size +set is_pic +target x86_64 haswell + +; An empty function. + +function %empty() { +block0: + return +} + +; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] fast { +; nextln: ss0 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: x86_push v0 +; nextln: copy_special %rsp -> %rbp +; nextln: v1 = x86_pop.i64 +; nextln: return v1 +; nextln: } + +; A function with a single stack slot. + +function %one_stack_slot() { + ss0 = explicit_slot 168 +block0: + return +} + +; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] fast { +; nextln: ss0 = explicit_slot 168, offset -184 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: x86_push v0 +; nextln: copy_special %rsp -> %rbp +; nextln: adjust_sp_down_imm 176 +; nextln: adjust_sp_up_imm 176 +; nextln: v1 = x86_pop.i64 +; nextln: return v1 +; nextln: } + +; A function performing a call. + +function %call() { + fn0 = %foo() + +block0: + call fn0() + return +} + +; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] fast { +; nextln: ss0 = incoming_arg 16, offset -16 +; nextln: sig0 = () fast +; nextln: fn0 = %foo sig0 +; nextln: +; nextln: block0(v0: i64 [%rbp]): +; nextln: x86_push v0 +; nextln: copy_special %rsp -> %rbp +; nextln: call fn0() +; nextln: v1 = x86_pop.i64 +; nextln: return v1 +; nextln: } + +; A function that uses a lot of registers but doesn't quite need to spill. + +function %no_spill(i64, i64) { +block0(v0: i64, v1: i64): + v2 = load.i32 v0+0 + v3 = load.i32 v0+8 + v4 = load.i32 v0+16 + v5 = load.i32 v0+24 + v6 = load.i32 v0+32 + v7 = load.i32 v0+40 + v8 = load.i32 v0+48 + v9 = load.i32 v0+56 + v10 = load.i32 v0+64 + v11 = load.i32 v0+72 + v12 = load.i32 v0+80 + v13 = load.i32 v0+88 + v14 = load.i32 v0+96 + store.i32 v2, v1+0 + store.i32 v3, v1+8 + store.i32 v4, v1+16 + store.i32 v5, v1+24 + store.i32 v6, v1+32 + store.i32 v7, v1+40 + store.i32 v8, v1+48 + store.i32 v9, v1+56 + store.i32 v10, v1+64 + store.i32 v11, v1+72 + store.i32 v12, v1+80 + store.i32 v13, v1+88 + store.i32 v14, v1+96 + return +} + +; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { +; nextln: ss0 = incoming_arg 56, offset -56 +; nextln: +; nextln: block0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]): +; nextln: x86_push v15 +; nextln: copy_special %rsp -> %rbp +; nextln: x86_push v16 +; nextln: x86_push v17 +; nextln: x86_push v18 +; nextln: x86_push v19 +; nextln: x86_push v20 +; nextln: v2 = load.i32 v0 +; nextln: v3 = load.i32 v0+8 +; nextln: v4 = load.i32 v0+16 +; nextln: v5 = load.i32 v0+24 +; nextln: v6 = load.i32 v0+32 +; nextln: v7 = load.i32 v0+40 +; nextln: v8 = load.i32 v0+48 +; nextln: v9 = load.i32 v0+56 +; nextln: v10 = load.i32 v0+64 +; nextln: v11 = load.i32 v0+72 +; nextln: v12 = load.i32 v0+80 +; nextln: v13 = load.i32 v0+88 +; nextln: v14 = load.i32 v0+96 +; nextln: store v2, v1 +; nextln: store v3, v1+8 +; nextln: store v4, v1+16 +; nextln: store v5, v1+24 +; nextln: store v6, v1+32 +; nextln: store v7, v1+40 +; nextln: store v8, v1+48 +; nextln: store v9, v1+56 +; nextln: store v10, v1+64 +; nextln: store v11, v1+72 +; nextln: store v12, v1+80 +; nextln: store v13, v1+88 +; nextln: store v14, v1+96 +; nextln: v26 = x86_pop.i64 +; nextln: v25 = x86_pop.i64 +; nextln: v24 = x86_pop.i64 +; nextln: v23 = x86_pop.i64 +; nextln: v22 = x86_pop.i64 +; nextln: v21 = x86_pop.i64 +; nextln: return v21, v22, v23, v24, v25, v26 +; nextln: } + +; This function requires too many registers and must spill. + +function %yes_spill(i64, i64) { +block0(v0: i64, v1: i64): + v2 = load.i32 v0+0 + v3 = load.i32 v0+8 + v4 = load.i32 v0+16 + v5 = load.i32 v0+24 + v6 = load.i32 v0+32 + v7 = load.i32 v0+40 + v8 = load.i32 v0+48 + v9 = load.i32 v0+56 + v10 = load.i32 v0+64 + v11 = load.i32 v0+72 + v12 = load.i32 v0+80 + v13 = load.i32 v0+88 + v14 = load.i32 v0+96 + v15 = load.i32 v0+104 + store.i32 v2, v1+0 + store.i32 v3, v1+8 + store.i32 v4, v1+16 + store.i32 v5, v1+24 + store.i32 v6, v1+32 + store.i32 v7, v1+40 + store.i32 v8, v1+48 + store.i32 v9, v1+56 + store.i32 v10, v1+64 + store.i32 v11, v1+72 + store.i32 v12, v1+80 + store.i32 v13, v1+88 + store.i32 v14, v1+96 + store.i32 v15, v1+104 + return +} + +; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { +; check: ss0 = spill_slot + +; check: block0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]): +; nextln: x86_push v48 +; nextln: copy_special %rsp -> %rbp +; nextln: x86_push v49 +; nextln: x86_push v50 +; nextln: x86_push v51 +; nextln: x86_push v52 +; nextln: x86_push v53 +; nextln: adjust_sp_down_imm + +; check: spill + +; check: fill + +; check: adjust_sp_up_imm +; nextln: v59 = x86_pop.i64 +; nextln: v58 = x86_pop.i64 +; nextln: v57 = x86_pop.i64 +; nextln: v56 = x86_pop.i64 +; nextln: v55 = x86_pop.i64 +; nextln: v54 = x86_pop.i64 +; nextln: return v54, v55, v56, v57, v58, v59 +; nextln: } + +; A function which uses diverted registers. + +function %divert(i32) -> i32 system_v { +block0(v0: i32): + v2 = iconst.i32 0 + v3 = iconst.i32 1 + jump block1(v0, v3, v2) + +block1(v4: i32, v5: i32, v6: i32): + brz v4, block3 + jump block2 + +block2: + v7 = iadd v5, v6 + v8 = iadd_imm v4, -1 + jump block1(v8, v7, v5) + +block3: + return v5 +} + +; check: function %divert +; check: regmove.i32 v5, %rcx -> %rbx +; check: [Op1popq#58,%rbx] v15 = x86_pop.i64 + +; Stack limit checking + +function %stack_limit(i64 stack_limit) { + ss0 = explicit_slot 168 +block0(v0: i64): + return +} + +; check: function %stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { +; nextln: ss0 = explicit_slot 168, offset -184 +; nextln: ss1 = incoming_arg 16, offset -16 +; nextln: +; nextln: block0(v0: i64 [%rdi], v4: i64 [%rbp]): +; nextln: v1 = copy v0 +; nextln: v2 = iadd_imm v1, 16 +; nextln: v3 = ifcmp_sp v2 +; nextln: trapif uge v3, stk_ovf +; nextln: x86_push v4 +; nextln: copy_special %rsp -> %rbp +; nextln: adjust_sp_down_imm 176 +; nextln: adjust_sp_up_imm 176 +; nextln: v5 = x86_pop.i64 +; nextln: return v5 +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/pshufb.clif b/cranelift/filetests/filetests/isa/x86/pshufb.clif new file mode 100644 index 0000000000..c9d5d798d9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/pshufb.clif @@ -0,0 +1,13 @@ +test binemit +set enable_simd +target x86_64 has_ssse3=true + +function %test_pshufb() { +block0: +[-, %rax] v0 = iconst.i8 42 +[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 40 0f 6e c0 +[-, %rbx] v2 = iconst.i8 43 +[-, %xmm4] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 40 0f 6e e3 +[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 0f 38 00 c4 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/pshufd.clif b/cranelift/filetests/filetests/isa/x86/pshufd.clif new file mode 100644 index 0000000000..69dc3f4ea0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/pshufd.clif @@ -0,0 +1,11 @@ +test binemit +set enable_simd +target x86_64 + +function %test_pshuf() { +block0: +[-, %rax] v0 = iconst.i32 42 +[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 40 0f 6e c0 +[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/raw_bitcast.clif b/cranelift/filetests/filetests/isa/x86/raw_bitcast.clif new file mode 100644 index 0000000000..717f655ec6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/raw_bitcast.clif @@ -0,0 +1,10 @@ +test binemit +target x86_64 + +function %test_raw_bitcast_i16x8_to_b32x4() { +block0: +[-, %rbx] v0 = bconst.b16 true +[-, %xmm2] v1 = scalar_to_vector.b16x8 v0 +[-, %xmm2] v2 = raw_bitcast.i32x4 v1 ; bin: + return +} diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif new file mode 100644 index 0000000000..8b29a057e0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/relax_branch.clif @@ -0,0 +1,132 @@ +test binemit +set opt_level=speed_and_size +set avoid_div_traps +set baldrdash_prologue_words=3 +set emit_all_ones_funcaddrs +set enable_probestack=false +target x86_64 haswell + +; This checks that a branch that is too far away is getting relaxed. In +; particular, the first block has to be non-empty but its encoding size must be +; zero (i.e. not generate any code). See also issue #666 for more details. + +function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash_system_v { + ss0 = incoming_arg 24, offset -24 + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 48 + gv2 = load.i64 notrap aligned readonly gv0 + heap0 = static gv2, min 0xd839_6000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 + + block0(v0: i32 [%rdi], v1: i32 [%rsi], v2: i64 [%r14]): +@0005 [-] fallthrough block3(v0, v1) + + block3(v8: i32 [%rdi], v19: i32 [%rsi]): +@0005 [RexOp1ldDisp8#808b,%rax] v7 = load.i64 v2+48 +@0005 [DynRexOp1rcmp_ib#f083,%rflags] v91 = ifcmp_imm v7, 0 +@0005 [trapif#00] trapif ne v91, interrupt +[DynRexOp1umr#89,%rax] v105 = copy v8 +@000b [DynRexOp1r_ib#83,%rax] v10 = iadd_imm v105, 1 + v80 -> v10 +@0010 [Op1umr#89,%rcx] v92 = uextend.i64 v8 +@0010 [RexOp1ld#808b,%rdx] v93 = load.i64 notrap aligned readonly v2 + v95 -> v93 +@0010 [Op2ldWithIndex#4be,%rcx] v12 = sload8_complex.i32 v93+v92 +[DynRexOp1umr#89,%rbx] v106 = copy v12 +@0017 [DynRexOp1r_ib#40c1,%rbx] v14 = ishl_imm v106, 24 +@001a [DynRexOp1r_ib#70c1,%rbx] v16 = sshr_imm v14, 24 +[DynRexOp1umr#89,%rdi] v107 = copy v16 +@001f [DynRexOp1r_ib#83,%rdi] v18 = iadd_imm v107, 32 +[DynRexOp1umr#89,%r8] v108 = copy v19 +@0026 [DynRexOp1r_ib#83,%r8] v21 = iadd_imm v108, 1 + v82 -> v21 +@002b [Op1umr#89,%rsi] v94 = uextend.i64 v19 +@002b [Op2ldWithIndex#4be,%rdx] v23 = sload8_complex.i32 v93+v94 + v55 -> v23 +[DynRexOp1umr#89,%rsi] v109 = copy v23 +@0032 [DynRexOp1r_ib#40c1,%rsi] v25 = ishl_imm v109, 24 +@0035 [DynRexOp1r_ib#70c1,%rsi] v27 = sshr_imm v25, 24 + v69 -> v27 +[DynRexOp1umr#89,%r9] v110 = copy v27 +@003a [DynRexOp1r_ib#83,%r9] v29 = iadd_imm v110, 32 + v68 -> v29 +@0042 [DynRexOp1r_ib#83,%rcx] v31 = iadd_imm v12, -65 +@0045 [DynRexOp1r_ib#40c1,%rcx] v33 = ishl_imm v31, 24 +@0048 [DynRexOp1r_ib#70c1,%rcx] v35 = sshr_imm v33, 24 +@004c [DynRexOp1r_id#4081,%rcx] v37 = band_imm v35, 255 +[DynRexOp1rcmp_ib#7083,%rflags] v97 = ifcmp_imm v37, 26 +@0050 [Op1brib#70] brif sge v97, block6 +@0050 [-] fallthrough block10 + + block10: +[DynRexOp1umr#89,%rcx] v101 = copy v18 +@0054 [Op1jmpb#eb] jump block5(v18, v101) + + block6: +[DynRexOp1umr#89,%rcx] v102 = copy.i32 v16 +@0059 [RexOp1rmov#89] regmove v102, %rcx -> %rdi +@0059 [RexOp1rmov#89] regmove.i32 v16, %rbx -> %rcx +@0059 [-] fallthrough block5(v102, v16) + + block5(v41: i32 [%rdi], v84: i32 [%rcx]): + v83 -> v84 +@005d [DynRexOp1r_id#4081,%rdi] v43 = band_imm v41, 255 +@0062 [DynRexOp1r_ib#40c1,%rdi] v45 = ishl_imm v43, 24 + v52 -> v45 +@0065 [RexOp1rmov#89] regmove v45, %rdi -> %rbx +@0065 [DynRexOp1r_ib#70c1,%rbx] v47 = sshr_imm v45, 24 + v54 -> v47 +@0068 [RexOp1rmov#89] regmove v47, %rbx -> %rdi +@0068 [DynRexOp1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0 +@0068 [RexOp2urm_noflags#4b6,%r10] v50 = bint.i32 v49 +@0076 [DynRexOp1r_ib#83,%rdx] v57 = iadd_imm.i32 v23, -65 +@0079 [DynRexOp1r_ib#40c1,%rdx] v59 = ishl_imm v57, 24 +@007c [DynRexOp1r_ib#70c1,%rdx] v61 = sshr_imm v59, 24 +@0080 [DynRexOp1r_id#4081,%rdx] v63 = band_imm v61, 255 +[DynRexOp1rcmp_ib#7083,%rflags] v98 = ifcmp_imm v63, 26 +@0084 [RexOp1rmov#89] regmove v47, %rdi -> %rbx +@0084 [Op1brib#70] brif sge v98, block8 +@0084 [-] fallthrough block11 + + block11: +[DynRexOp1umr#89,%rdx] v103 = copy.i32 v29 +@0088 [Op1jmpb#eb] jump block7(v29, v10, v21, v103) + + block8: +[DynRexOp1umr#89,%rdx] v104 = copy.i32 v27 +@008d [RexOp1rmov#89] regmove v104, %rdx -> %r9 +@008d [RexOp1rmov#89] regmove.i32 v27, %rsi -> %rdx +@008d [-] fallthrough block7(v104, v10, v21, v27) + + block7(v67: i32 [%r9], v79: i32 [%rax], v81: i32 [%r8], v87: i32 [%rdx]): +@0091 [DynRexOp1r_id#4081,%r9] v71 = band_imm v67, 255 +@0094 [DynRexOp1r_ib#40c1,%r9] v73 = ishl_imm v71, 24 +@0097 [DynRexOp1r_ib#70c1,%r9] v75 = sshr_imm v73, 24 +@0098 [DynRexOp1icscc#39,%rbx] v76 = icmp.i32 eq v47, v75 +@0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76 +@0099 [DynRexOp1rr#21,%r10] v78 = band.i32 v50, v77 +@009a [RexOp1tjccb#74] brz v78, block9 +@009a [-] fallthrough block12 + + block12: +[DynRexOp1umr#89,%rcx] v99 = copy v81 +[DynRexOp1umr#89,%rdx] v100 = copy v79 +@00a4 [RexOp1rmov#89] regmove v100, %rdx -> %rdi +@00a4 [RexOp1rmov#89] regmove v99, %rcx -> %rsi +@00a4 [Op1jmpd#e9] jump block3(v100, v99); bin: e9 ffffff2d + + block9: +@00a7 [-] fallthrough block4 + + block4: +@00ad [DynRexOp1r_id#4081,%rcx] v86 = band_imm.i32 v84, 255 +@00b3 [DynRexOp1r_id#4081,%rdx] v89 = band_imm.i32 v87, 255 +@00b4 [DynRexOp1rr#29,%rcx] v90 = isub v86, v89 +@00b5 [-] fallthrough block2(v90) + + block2(v5: i32 [%rcx]): +@00b6 [-] fallthrough block1(v5) + + block1(v3: i32 [%rcx]): +@00b6 [Op1umr#89,%rax] v96 = uextend.i64 v3 +@00b6 [-] fallthrough_return v96 +} diff --git a/cranelift/filetests/filetests/isa/x86/run-const.clif b/cranelift/filetests/filetests/isa/x86/run-const.clif new file mode 100644 index 0000000000..c39d39adfb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/run-const.clif @@ -0,0 +1,11 @@ +test run + +function %test_compare_i32() -> b1 { +block0: + v0 = iconst.i32 42 + v1 = iconst.i32 42 + v2 = icmp eq v0, v1 + return v2 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/run-i64.clif b/cranelift/filetests/filetests/isa/x86/run-i64.clif new file mode 100644 index 0000000000..ae4a618573 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/run-i64.clif @@ -0,0 +1,17 @@ +; Test i64 instructions on x86_32. +test compile +target i686 haswell + +function %iadd(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v10 = iadd v1, v2 + ; check: iadd_ifcout + return v10 +} + +function %isub(i64, i64) -> i64 { +block0(v1: i64, v2: i64): + v10 = isub v1, v2 + ; check: isub_ifbout + return v10 +} diff --git a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif new file mode 100644 index 0000000000..36b69ca8b7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif @@ -0,0 +1,13 @@ +test compile +target x86_64 + +function u0:0() -> f32 system_v { +block0: + v0 = iconst.i8 255 +; check: v2 = iconst.i32 255 +; nextln: v0 = ireduce.i8 v2 + v1 = fcvt_from_uint.f32 v0 +; nextln: v3 = uextend.i64 v0 +; nextln: v1 = fcvt_from_sint.f32 v3 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif new file mode 100644 index 0000000000..4a02e6bac6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-binemit.clif @@ -0,0 +1,32 @@ +test binemit +set opt_level=speed_and_size +set enable_simd +target x86_64 + +function %test_scalar_to_vector_b8() { +block0: +[-, %rax] v0 = bconst.b8 true +[-, %xmm0] v1 = scalar_to_vector.b8x16 v0 ; bin: 66 0f 6e c0 + return +} + +function %test_scalar_to_vector_i16() { +block0: +[-, %rbx] v0 = iconst.i16 42 +[-, %xmm2] v1 = scalar_to_vector.i16x8 v0 ; bin: 66 0f 6e d3 + return +} + +function %test_scalar_to_vector_b32() { +block0: +[-, %rcx] v0 = bconst.b32 false +[-, %xmm3] v1 = scalar_to_vector.b32x4 v0 ; bin: 66 0f 6e d9 + return +} + +function %test_scalar_to_vector_i64() { +block0: +[-, %rdx] v0 = iconst.i64 42 +[-, %xmm7] v1 = scalar_to_vector.i64x2 v0 ; bin: 66 48 0f 6e fa + return +} diff --git a/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif new file mode 100644 index 0000000000..8ab62db59d --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/scalar_to_vector-compile.clif @@ -0,0 +1,19 @@ +test compile +set opt_level=speed_and_size +set enable_probestack=false +set enable_simd +target x86_64 + +; ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) +function %test_scalar_to_vector_f32() -> f32x4 baldrdash_system_v { +block0: + v0 = f32const 0x0.42 + v1 = scalar_to_vector.f32x4 v0 + return v1 +} + +; check: block0 +; nextln: v2 = iconst.i32 0x3e84_0000 +; nextln: v0 = bitcast.f32 v2 +; nextln: [null_fpr#00,%xmm0] v1 = scalar_to_vector.f32x4 v0 +; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/select-i8.clif b/cranelift/filetests/filetests/isa/x86/select-i8.clif new file mode 100644 index 0000000000..feec520860 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/select-i8.clif @@ -0,0 +1,8 @@ +test compile +target x86_64 + +function u0:0(b1, i8, i8) -> i8 { +block0(v0: b1, v1: i8, v2: i8): + v3 = select v0, v1, v2 + return v3 +} diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif new file mode 100644 index 0000000000..f896d8cc25 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif @@ -0,0 +1,18 @@ +test shrink +set opt_level=speed_and_size +target x86_64 + +function %test_multiple_uses(i32 [%rdi]) -> i32 { +block0(v0: i32 [%rdi]): +[DynRexOp1rcmp_ib#7083,%rflags] v3 = ifcmp_imm v0, 0 +[Op2seti_abcd#490,%rax] v1 = trueif eq v3 +[RexOp2urm_noflags#4b6,%rax] v2 = bint.i32 v1 +[Op1brib#70] brif eq v3, block1 +[Op1jmpb#eb] jump block2 + +block2: +[Op1ret#c3] return v2 + +block1: +[Op2trap#40b] trap user0 +} diff --git a/cranelift/filetests/filetests/isa/x86/shrink.clif b/cranelift/filetests/filetests/isa/x86/shrink.clif new file mode 100644 index 0000000000..9b0832b2a7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/shrink.clif @@ -0,0 +1,40 @@ +test binemit +set opt_level=speed_and_size +target x86_64 + +; Test that instruction shrinking eliminates REX prefixes when possible. + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.clif | llvm-mc -show-encoding -triple=x86_64 +; + +function %test_shrinking(i32) -> i32 { +block0(v0: i32 [ %rdi ]): + ; asm: movl $0x2,%eax +[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 + ; asm: subl %ecx,%edi +[-,%rdi] v2 = isub v0, v1 ; bin: 29 cf + return v2 +} + +function %test_not_shrinking(i32) -> i32 { +block0(v0: i32 [ %r8 ]): + ; asm: movl $0x2,%eax +[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 + ; asm: subl %ecx,%edi +[-,%r8] v2 = isub v0, v1 ; bin: 41 29 c8 + return v2 +} + +function %test_not_shrinking_i8() { +block0: +[-,%rsi] v1 = iconst.i8 1 + ; asm: movsbl %sil,%esi +[-,%rsi] v2 = sextend.i32 v1 ; bin: 40 0f be f6 + ; asm: movzbl %sil,%esi +[-,%rsi] v3 = uextend.i32 v1 ; bin: 40 0f b6 f6 + ; asm: movzbl %sil,%esi +[-,%rsi] v4 = uextend.i64 v1 ; bin: 40 0f b6 f6 + trap user0 +} diff --git a/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif b/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif new file mode 100644 index 0000000000..78c6bfef40 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif @@ -0,0 +1,31 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %test_shuffle_different_ssa_values() -> i8x16 { +block0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 0x01 + v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 + return v2 +} + +; check: v1 = vconst.i8x16 0x01 +; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000 +; nextln: v4 = x86_pshufb v0, v3 +; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080 +; nextln: v6 = x86_pshufb v1, v5 +; nextln: v2 = bor v4, v6 + + + +function %test_shuffle_same_ssa_value() -> i8x16 { +block0: + v1 = vconst.i8x16 0x01 + v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 + return v2 +} + +; check: v1 = vconst.i8x16 0x01 +; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000 +; nextln: v2 = x86_pshufb v1, v3 diff --git a/cranelift/filetests/filetests/isa/x86/shuffle-run.clif b/cranelift/filetests/filetests/isa/x86/shuffle-run.clif new file mode 100644 index 0000000000..44e4998907 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/shuffle-run.clif @@ -0,0 +1,60 @@ +test run +set enable_simd + +function %test_shuffle_different_ssa_values() -> b1 { +block0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] + v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1 + v3 = extractlane.i8x16 v2, 15 + v4 = iconst.i8 42 + v5 = icmp eq v3, v4 + return v5 +} + +; run + +function %test_shuffle_same_ssa_value() -> b1 { +block0: + v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax + v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes + v2 = extractlane.i8x16 v1, 4 + v3 = iconst.i8 0x01 + v4 = icmp eq v2, v3 + return v4 +} + +; run + +function %compare_shuffle() -> b1 { +block0: + v1 = vconst.i32x4 [0 1 2 3] + v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 + ; keep each lane in place from the first vector + v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] + v4 = raw_bitcast.i32x4 v3 + v5 = extractlane.i32x4 v4, 3 + v6 = icmp_imm eq v5, 3 + v7 = extractlane.i32x4 v4, 0 + v8 = icmp_imm eq v7, 0 + v9 = band v6, v8 + return v9 +} + +; run + + +function %compare_shuffle() -> b32 { +block0: + v1 = vconst.b32x4 [true false true false] + v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 + ; pair up the true values to make the entire vector true + v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11] + v4 = raw_bitcast.b32x4 v3 + v5 = extractlane v4, 3 + v6 = extractlane v4, 0 + v7 = band v5, v6 + return v7 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif new file mode 100644 index 0000000000..85797d9a4b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif @@ -0,0 +1,211 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %iadd_i32x4() -> b1 { +block0: +[-, %xmm0] v0 = vconst.i32x4 [1 1 1 1] +[-, %xmm1] v1 = vconst.i32x4 [1 2 3 4] +[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 2 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 5 + + v7 = band v4, v6 + return v7 +} + +function %iadd_i8x16_with_overflow() -> b1 { +block0: +[-, %xmm0] v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] +[-, %xmm7] v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] +[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fc c7 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 1 + + return v4 +} + +function %iadd_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]): +[-, %xmm1] v2 = iadd v0, v1 ; bin: 66 0f fd ca + return v2 +} + +function %iadd_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm4]): +[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f d4 dc + return v2 +} + +function %isub_i32x4() -> b1 { +block0: +[-, %xmm3] v0 = vconst.i32x4 [1 1 1 1] +[-, %xmm5] v1 = vconst.i32x4 [1 2 3 4] +[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f fa dd + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0xffffffff + + v7 = band v4, v6 + return v7 +} + +function %isub_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm1]): +[-, %xmm0] v2 = isub v0, v1 ; bin: 66 0f fb c1 + return v2 +} + +function %isub_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm4]): +[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f9 dc + return v2 +} + +function %isub_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm4]): +[-, %xmm3] v2 = isub v0, v1 ; bin: 66 0f f8 dc + return v2 +} + +function %imul_i32x4() -> b1 { +block0: +[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01 +[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2] +[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -2 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped) + + v9 = band v4, v6 + v10 = band v8, v9 + return v10 +} + + +function %imul_i16x8() -> b1 { +block0: +[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff +[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0] +[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is + ; being uextend-ed, not sextend-ed + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe + + v9 = band v4, v6 + v10 = band v8, v9 + + return v4 +} + + +function %sadd_sat_i8x16() -> b1 { +block0: +[-, %xmm2] v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] +[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +[-, %xmm2] v2 = sadd_sat v0, v1 ; bin: 66 0f ec d3 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 127 + + return v4 +} + + +function %uadd_sat_i16x8() -> b1 { +block0: +[-, %xmm2] v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0] +[-, %xmm3] v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] + +[-, %xmm2] v2 = uadd_sat v0, v1 ; bin: 66 0f dd d3 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 65535 + + return v4 +} + + +function %sub_sat_i8x16() -> b1 { +block0: +[-, %xmm2] v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128 +[-, %xmm3] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + +[-, %xmm2] v2 = ssub_sat v0, v1 ; bin: 66 0f e8 d3 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128 + + ; now re-use 0x80 as an unsigned 128 +[-, %xmm2] v5 = usub_sat v0, v2 ; bin: 66 0f d8 d2 + v6 = extractlane v5, 0 + v7 = icmp_imm eq v6, 0 + + v8 = band v4, v7 + return v8 +} + + +function %sub_sat_i16x8() { +block0: +[-, %xmm3] v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] +[-, %xmm5] v1 = vconst.i16x8 [1 1 1 1 1 1 1 1] +[-, %xmm3] v2 = ssub_sat v0, v1 ; bin: 66 0f e9 dd +[-, %xmm3] v3 = usub_sat v0, v1 ; bin: 66 0f d9 dd + return +} + +function %float_arithmetic_f32x4(f32x4, f32x4) { +block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]): +[-, %xmm3] v2 = fadd v0, v1 ; bin: 40 0f 58 dd +[-, %xmm3] v3 = fsub v0, v1 ; bin: 40 0f 5c dd +[-, %xmm3] v4 = fmul v0, v1 ; bin: 40 0f 59 dd +[-, %xmm3] v5 = fdiv v0, v1 ; bin: 40 0f 5e dd +[-, %xmm3] v6 = fmin v0, v1 ; bin: 40 0f 5d dd +[-, %xmm3] v7 = fmax v0, v1 ; bin: 40 0f 5f dd +[-, %xmm3] v8 = sqrt v0 ; bin: 40 0f 51 db + return +} + +function %float_arithmetic_f64x2(f64x2, f64x2) { +block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): +[-, %xmm3] v2 = fadd v0, v1 ; bin: 66 40 0f 58 dd +[-, %xmm3] v3 = fsub v0, v1 ; bin: 66 40 0f 5c dd +[-, %xmm3] v4 = fmul v0, v1 ; bin: 66 40 0f 59 dd +[-, %xmm3] v5 = fdiv v0, v1 ; bin: 66 40 0f 5e dd +[-, %xmm3] v6 = fmin v0, v1 ; bin: 66 40 0f 5d dd +[-, %xmm3] v7 = fmax v0, v1 ; bin: 66 40 0f 5f dd +[-, %xmm3] v8 = sqrt v0 ; bin: 66 40 0f 51 db + return +} + +function %average_rounding_i8x16(i8x16, i8x16) { +block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): +[-, %xmm6] v2 = avg_round v0, v1 ; bin: 66 0f e0 f2 + return +} + +function %average_rounding_i16x8(i16x8, i16x8) { +block0(v0: i16x8 [%xmm6], v1: i16x8 [%xmm2]): +[-, %xmm6] v2 = avg_round v0, v1 ; bin: 66 0f e3 f2 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif new file mode 100644 index 0000000000..5211e1d796 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif @@ -0,0 +1,66 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %ineg_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [1 1 1 1] + v2 = ineg v0 + ; check: v5 = vconst.i32x4 0x00 + ; nextln: v2 = isub v5, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -1 + + return v4 +} + +function %ineg_legalized() { +block0: + v0 = vconst.i8x16 0x00 + v1 = ineg v0 + ; check: v6 = vconst.i8x16 0x00 + ; nextln: v1 = isub v6, v0 + + v2 = raw_bitcast.i16x8 v0 + v3 = ineg v2 + ; check: v7 = vconst.i16x8 0x00 + ; nextln: v3 = isub v7, v2 + + v4 = raw_bitcast.i64x2 v0 + v5 = ineg v4 + ; check: v8 = vconst.i64x2 0x00 + ; nextln: v5 = isub v8, v4 + + return +} + +function %fneg_legalized() { +block0: + v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0] + v1 = fneg v0 + ; check: v4 = vconst.i32x4 0xffffffffffffffffffffffffffffffff + ; nextln: v5 = ishl_imm v4, 31 + ; nextln: v6 = raw_bitcast.f32x4 v5 + ; nextln: v1 = bxor v0, v6 + + v2 = vconst.f64x2 [0x1.0 0x2.0] + v3 = fneg v2 + ; check: v7 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + ; nextln: v8 = ishl_imm v7, 63 + ; nextln: v9 = raw_bitcast.f64x2 v8 + ; nextln: v3 = bxor v2, v9 + + return +} + +function %fabs_legalized() { +block0: + v0 = vconst.f64x2 [0x1.0 -0x2.0] + v1 = fabs v0 + ; check: v2 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + ; nextln: v3 = ushr_imm v2, 1 + ; nextln: v4 = raw_bitcast.f64x2 v3 + ; nextln: v1 = band v0, v4 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif new file mode 100644 index 0000000000..3403815154 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -0,0 +1,281 @@ +test run +set enable_simd +target x86_64 skylake + +function %iadd_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [1 1 1 1] + v1 = vconst.i32x4 [1 2 3 4] + v2 = iadd v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 2 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 5 + ; TODO replace extractlanes with vector comparison + + v7 = band v4, v6 + return v7 +} +; run + +function %iadd_i8x16_with_overflow() -> b1 { +block0: + v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255] + v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] + v2 = iadd v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 1 + ; TODO replace extractlane with vector comparison + + return v4 +} +; run + +function %isub_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [1 1 1 1] + v1 = vconst.i32x4 [1 2 3 4] + v2 = isub v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0xffffffff + ; TODO replace extractlanes with vector comparison + + v7 = band v4, v6 + return v7 +} +; run + + +function %ineg_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [1 1 1 1] + v2 = ineg v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -1 + + return v4 +} +; run + +function %imul_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01 + v1 = vconst.i32x4 [2 2 2 2] + v2 = imul v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -2 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped) + + v9 = band v4, v6 + v10 = band v8, v9 + return v10 +} +; run + +function %imul_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff + v1 = vconst.i16x8 [2 2 2 2 0 0 0 0] + v2 = imul v0, v1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0xfffe ; 0xfffe == -2; -2 will not work here and below because v3 is + ; being uextend-ed, not sextend-ed + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe + + v9 = band v4, v6 + v10 = band v8, v9 + + return v4 +} +; run + +function %sadd_sat_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + + v2 = sadd_sat v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 127 + + return v4 +} +; run + +function %uadd_sat_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0] + v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] + + v2 = uadd_sat v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 65535 + + return v4 +} +; run + +function %sub_sat_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128 + v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + + v2 = ssub_sat v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128 + + ; now re-use 0x80 as an unsigned 128 + v5 = usub_sat v0, v2 + v6 = extractlane v5, 0 + v7 = icmp_imm eq v6, 0 + + v8 = band v4, v7 + return v8 +} +; run + +function %add_sub_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [0x4.2 0.0 0.0 0.0] + v1 = vconst.f32x4 [0x1.0 0x1.0 0x1.0 0x1.0] + v2 = vconst.f32x4 [0x5.2 0x1.0 0x1.0 0x1.0] + + v3 = fadd v0, v1 + v4 = fcmp eq v3, v2 + + v6 = fsub v2, v1 + v7 = fcmp eq v6, v0 + + v8 = band v4, v7 + v9 = vall_true v8 + return v9 +} +; run + +function %mul_div_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [0x4.2 -0x2.1 0x2.0 0.0] + v1 = vconst.f32x4 [0x3.4 0x6.7 0x8.9 0xa.b] + v2 = vconst.f32x4 [0xd.68 -0xd.47 0x11.2 0x0.0] + + v3 = fmul v0, v1 + v4 = fcmp eq v3, v2 + + v6 = fdiv v2, v1 + v7 = fcmp eq v6, v0 + + v8 = band v4, v7 + v9 = vall_true v8 + return v9 +} +; run + +function %sqrt_f64x2() -> b1 { +block0: + v0 = vconst.f64x2 [0x9.0 0x1.0] + v1 = sqrt v0 + v2 = vconst.f64x2 [0x3.0 0x1.0] + v3 = fcmp eq v2, v1 + v4 = vall_true v3 + return v4 +} +; run + +function %fmax_f64x2() -> b1 { +block0: + v0 = vconst.f64x2 [-0.0 -0x1.0] + v1 = vconst.f64x2 [+0.0 +0x1.0] + + v2 = fmax v0, v1 + v3 = fcmp eq v2, v1 + v4 = vall_true v3 + + return v4 +} +; run + +function %fmin_f64x2() -> b1 { +block0: + v0 = vconst.f64x2 [-0x1.0 -0x1.0] + v1 = vconst.f64x2 [+0.0 +0x1.0] + + v2 = fmin v0, v1 + v3 = fcmp eq v2, v0 + v4 = vall_true v3 + + return v4 +} +; run + +function %fneg_f64x2() -> b1 { +block0: + v0 = vconst.f64x2 [0x1.0 -0x1.0] + v1 = fneg v0 + + v2 = vconst.f64x2 [-0x1.0 0x1.0] + v3 = fcmp eq v1, v2 + v4 = vall_true v3 + + return v4 +} +; run + +function %fneg_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [0x0.0 -0x0.0 -Inf Inf] + v1 = fneg v0 + + v2 = vconst.f32x4 [-0x0.0 0x0.0 Inf -Inf] + v3 = fcmp eq v1, v2 + v4 = vall_true v3 + + return v4 +} +; run + +function %fabs_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0] + v1 = fabs v0 + + v2 = vconst.f32x4 [0x0.0 0x1.0 0x2.0 0x3.0] + v3 = fcmp eq v1, v2 + v4 = vall_true v3 + + return v4 +} +; run + +function %average_rounding_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [0 0 0 1 42 19 -1 -1] + v1 = vconst.i16x8 [0 1 2 4 42 18 -1 0] + v2 = vconst.i16x8 [0 1 1 3 42 19 -1 -32768] ; -1 (0xffff) + 0 + 1 == -32768 (0x8000) + + v3 = avg_round v0, v1 + v4 = icmp eq v2, v3 + v5 = vall_true v4 + + return v5 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif new file mode 100644 index 0000000000..599c58fd80 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif @@ -0,0 +1,105 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %ishl_i16x8(i16x8, i64x2) -> i16x8 { +block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): +[-, %xmm2] v2 = x86_psll v0, v1 ; bin: 66 0f f1 d1 + return v2 +} + +function %ishl_i32x4(i32x4, i64x2) -> i32x4 { +block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): +[-, %xmm4] v2 = x86_psll v0, v1 ; bin: 66 0f f2 e0 + return v2 +} + +function %ishl_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): +[-, %xmm6] v2 = x86_psll v0, v1 ; bin: 66 0f f3 f3 + return v2 +} + +function %ushr_i16x8(i16x8, i64x2) -> i16x8 { +block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): +[-, %xmm2] v2 = x86_psrl v0, v1 ; bin: 66 0f d1 d1 + return v2 +} + +function %ushr_i32x4(i32x4, i64x2) -> i32x4 { +block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): +[-, %xmm4] v2 = x86_psrl v0, v1 ; bin: 66 0f d2 e0 + return v2 +} + +function %ushr_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): +[-, %xmm6] v2 = x86_psrl v0, v1 ; bin: 66 0f d3 f3 + return v2 +} + +function %sshr_i16x8(i16x8, i64x2) -> i16x8 { +block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): +[-, %xmm2] v2 = x86_psra v0, v1 ; bin: 66 0f e1 d1 + return v2 +} + +function %sshr_i32x4(i32x4, i64x2) -> i32x4 { +block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): +[-, %xmm4] v2 = x86_psra v0, v1 ; bin: 66 0f e2 e0 + return v2 +} + +function %ishl_imm_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8 [%xmm2]): +[-, %xmm2] v2 = ishl_imm v0, 3 ; bin: 66 0f 71 f2 03 + return v2 +} + +function %ishl_imm_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4 [%xmm4]): +[-, %xmm4] v2 = ishl_imm v0, 10 ; bin: 66 0f 72 f4 0a + return v2 +} + +function %ishl_imm_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm6]): +[-, %xmm6] v2 = ishl_imm v0, 42 ; bin: 66 0f 73 f6 2a + return v2 +} + +function %ushr_imm_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8 [%xmm2]): +[-, %xmm2] v2 = ushr_imm v0, 3 ; bin: 66 0f 71 d2 03 + return v2 +} + +function %ushr_imm_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4 [%xmm4]): +[-, %xmm4] v2 = ushr_imm v0, 10 ; bin: 66 0f 72 d4 0a + return v2 +} + +function %ushr_imm_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm6]): +[-, %xmm6] v2 = ushr_imm v0, 42 ; bin: 66 0f 73 d6 2a + return v2 +} + +function %sshr_imm_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8 [%xmm2]): +[-, %xmm2] v2 = sshr_imm v0, 3 ; bin: 66 0f 71 e2 03 + return v2 +} + +function %sshr_imm_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4 [%xmm4]): +[-, %xmm4] v2 = sshr_imm v0, 10 ; bin: 66 0f 72 e4 0a + return v2 +} + +function %sshr_imm_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2 [%xmm6]): +[-, %xmm6] v2 = sshr_imm v0, 42 ; bin: 66 0f 73 e6 2a + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif new file mode 100644 index 0000000000..af7036b27a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif @@ -0,0 +1,45 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %ishl_i32x4() -> i32x4 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i32x4 [1 2 4 8] + v2 = ishl v1, v0 + ; check: v3 = bitcast.i64x2 v0 + ; nextln: v2 = x86_psll v1, v3 + return v2 +} + +function %ushr_i64x2() -> i64x2 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i64x2 [1 2] + v2 = ushr v1, v0 + ; check: v3 = bitcast.i64x2 v0 + ; nextln: v2 = x86_psrl v1, v3 + return v2 +} + +function %sshr_i16x8() -> i16x8 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i16x8 [1 2 4 8 16 32 64 128] + v2 = sshr v1, v0 + ; check: v3 = bitcast.i64x2 v0 + ; nextln: v2 = x86_psra v1, v3 + return v2 +} + +function %bitselect_i16x8() -> i16x8 { +block0: + v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v1 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v2 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v3 = bitselect v0, v1, v2 + ; check: v4 = band v1, v0 + ; nextln: v5 = band_not v2, v0 + ; nextln: v3 = bor v4, v5 + return v3 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif new file mode 100644 index 0000000000..670c501c9b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-run.clif @@ -0,0 +1,165 @@ +test run +set enable_simd +target x86_64 skylake + +; TODO: once available, replace all lane extraction with `icmp + all_ones` + +function %ishl_i32x4() -> b1 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i32x4 [1 2 4 8] + v2 = ishl v1, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 2 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 16 + + v7 = band v4, v6 + return v7 +} +; run + +function %ishl_too_large_i16x8() -> b1 { +block0: + v0 = iconst.i32 17 ; note that this will shift off the end of each lane + v1 = vconst.i16x8 [1 2 4 8 16 32 64 128] + v2 = ishl v1, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 0 + + v7 = band v4, v6 + return v7 +} +; run + +function %ushr_i64x2() -> b1 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i64x2 [1 2] + v2 = ushr v1, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 1 + + v7 = band v4, v6 + return v7 +} +; run + +function %ushr_too_large_i32x4() -> b1 { +block0: + v0 = iconst.i32 33 ; note that this will shift off the end of each lane + v1 = vconst.i32x4 [1 2 4 8] + v2 = ushr v1, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 0 + + v7 = band v4, v6 + return v7 +} +; run + +function %sshr_i16x8() -> b1 { +block0: + v0 = iconst.i32 1 + v1 = vconst.i16x8 [-1 2 4 8 -16 32 64 128] + v2 = sshr v1, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0xffff ; because of the shifted-in sign-bit, this remains 0xffff == -1 + + v5 = extractlane v2, 4 + v6 = icmp_imm eq v5, 0xfff8 ; -16 has been shifted to -8 == 0xfff8 + + v7 = band v4, v6 + return v7 +} +; run + +function %sshr_too_large_i32x4() -> b1 { +block0: + v0 = iconst.i32 33 ; note that this will shift off the end of each lane + v1 = vconst.i32x4 [1 2 4 -8] + v2 = sshr v1, v0 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0 + + v5 = extractlane v2, 3 + v6 = icmp_imm eq v5, 0xffff_ffff ; shifting in the sign-bit repeatedly fills the result with 1s + + v7 = band v4, v6 + return v7 +} +; run + +function %bitselect_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255] ; the selector vector + v1 = vconst.i8x16 [127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] ; for each 1-bit in v0 the bit of v1 is selected + v2 = vconst.i8x16 [42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 127] ; for each 0-bit in v0 the bit of v2 is selected + v3 = bitselect v0, v1, v2 + + v4 = extractlane v3, 0 + v5 = icmp_imm eq v4, 42 + + v6 = extractlane v3, 1 + v7 = icmp_imm eq v6, 0 + + v8 = extractlane v3, 15 + v9 = icmp_imm eq v8, 42 + + v10 = band v5, v7 + v11 = band v10, v9 + return v11 +} +; run + +function %sshr_imm_i32x4() -> b1 { +block0: + v1 = vconst.i32x4 [1 2 4 -8] + v2 = sshr_imm v1, 1 + + v3 = vconst.i32x4 [0 1 2 -4] + v4 = icmp eq v2, v3 + v5 = vall_true v4 + return v5 +} +; run + +function %sshr_imm_i16x8() -> b1 { +block0: + v1 = vconst.i16x8 [1 2 4 -8 0 0 0 0] + v2 = ushr_imm v1, 1 + + v3 = vconst.i16x8 [0 1 2 32764 0 0 0 0] ; -4 with MSB unset == 32764 + v4 = icmp eq v2, v3 + v5 = vall_true v4 + return v5 +} +; run + +function %ishl_imm_i64x2() -> b1 { +block0: + v1 = vconst.i64x2 [1 0] + v2 = ishl_imm v1, 1 + + v3 = vconst.i64x2 [2 0] + v4 = icmp eq v2, v3 + v5 = vall_true v4 + return v5 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif new file mode 100644 index 0000000000..053b50a9f3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif @@ -0,0 +1,80 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 { +block0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]): +[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1 + return v2 +} + +function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 { +block0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]): +[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3 + return v2 +} + +function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 { +block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]): +[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5 + return v2 +} + +function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 { +block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]): +[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7 + return v2 +} + +function %min_max_i8x16(i8x16, i8x16) { +block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]): +[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9 +[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9 +[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9 +[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9 + return +} + +function %min_max_i16x8(i16x8, i16x8) { +block0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]): +[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5 +[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5 +[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5 +[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5 + return +} + +function %min_max_i32x4(i32x4, i32x4) { +block0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]): +[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4 +[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4 +[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4 +[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4 + return +} + +function %fcmp_f32x4(f32x4, f32x4) { +block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]): +[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 40 0f c2 d4 00 +[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 40 0f c2 d4 01 +[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02 +[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03 +[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04 +[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 40 0f c2 d4 05 +[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 40 0f c2 d4 06 +[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07 + return +} + +function %fcmp_f64x2(f64x2, f64x2) { +block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]): +[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 40 0f c2 d0 00 +[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 40 0f c2 d0 01 +[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02 +[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03 +[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04 +[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 40 0f c2 d0 05 +[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 40 0f c2 d0 06 +[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif new file mode 100644 index 0000000000..61888ccb6d --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif @@ -0,0 +1,38 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ne v0, v1 + ; check: v3 = icmp eq v0, v1 + ; nextln: v4 = vconst.b32x4 0xffffffffffffffffffffffffffffffff + ; nextln: v2 = bxor v4, v3 + return v2 +} + +function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = icmp ugt v0, v1 + ; check: v3 = x86_pmaxu v0, v1 + ; nextln: v4 = icmp eq v3, v1 + ; nextln: v5 = vconst.b32x4 0xffffffffffffffffffffffffffffffff + ; nextln: v2 = bxor v5, v4 + return v2 +} + +function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = icmp sge v0, v1 + ; check: v3 = x86_pmins v0, v1 + ; nextln: v2 = icmp eq v3, v1 + return v2 +} + +function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = icmp uge v0, v1 + ; check: v3 = x86_pminu v0, v1 + ; nextln: v2 = icmp eq v3, v1 + return v2 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif new file mode 100644 index 0000000000..7cbd285860 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif @@ -0,0 +1,247 @@ +test run +set enable_simd +target x86_64 skylake + +function %icmp_eq_i8x16() -> b8 { +block0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 0x00 + v2 = icmp eq v0, v1 + v3 = extractlane v2, 0 + return v3 +} +; run + +function %icmp_eq_i64x2() -> b64 { +block0: + v0 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + v2 = icmp eq v0, v1 + v3 = extractlane v2, 1 + return v3 +} +; run + +function %icmp_ne_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [0 1 2 3] + v1 = vconst.i32x4 [7 7 7 7] + v2 = icmp ne v0, v1 + v3 = vall_true v2 + return v3 +} +; run + +function %icmp_ne_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [0 1 2 3 4 5 6 7] + v1 = vconst.i16x8 [0 1 2 3 4 5 6 7] + v2 = icmp ne v0, v1 + v3 = vall_true v2 + v4 = bint.i32 v3 + v5 = icmp_imm eq v4, 0 + return v5 +} +; run + +function %icmp_sgt_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0] + v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff] + v2 = icmp sgt v0, v1 + v3 = raw_bitcast.i8x16 v2 + v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff] + v7 = icmp eq v3, v4 + v8 = vall_true v7 + return v8 +} +; run + +function %icmp_sgt_i64x2() -> b1 { +block0: + v0 = vconst.i64x2 [0 -42] + v1 = vconst.i64x2 [-1 -43] + v2 = icmp sgt v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %maxs_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with + ; signed max + v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + v2 = x86_pmaxs v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %maxu_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [0 1 1 1 1 1 1 1] + v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max + v2 = x86_pmaxu v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %mins_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [0 1 1 1] + v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min + v2 = x86_pmins v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %minu_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min + v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] + v2 = x86_pminu v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_ugt_i8x16() -> b1 { +block0: + v0 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16] + v1 = vconst.i8x16 [0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] + v2 = icmp ugt v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_sge_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [-1 1 2 3 4 5 6 7] + v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] + v2 = icmp sge v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_uge_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [1 2 3 4] + v1 = vconst.i32x4 [1 1 1 1] + v2 = icmp uge v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_slt_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [-1 1 1 1] + v1 = vconst.i32x4 [1 2 3 4] + v2 = icmp slt v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_ult_i32x4() -> b1 { +block0: + v0 = vconst.i32x4 [1 1 1 1] + v1 = vconst.i32x4 [-1 2 3 4] ; -1 = 0xffff... will be greater than 1 when unsigned + v2 = icmp ult v0, v1 + v8 = vall_true v2 + return v8 +} +; run + + +function %icmp_ult_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1] + v1 = vconst.i16x8 [-1 -1 -1 -1 -1 -1 -1 -1] + v2 = icmp ult v0, v1 + v3 = vconst.i16x8 0x00 + v4 = raw_bitcast.i16x8 v2 + v5 = icmp eq v3, v4 + v8 = vall_true v5 + return v8 +} +; run + +function %icmp_sle_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [-1 -1 0 0 0 0 0 0] + v1 = vconst.i16x8 [-1 0 0 0 0 0 0 0] + v2 = icmp sle v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %icmp_ule_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [-1 0 0 0 0 0 0 0] + v1 = vconst.i16x8 [-1 -1 0 0 0 0 0 0] + v2 = icmp ule v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_eq_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] + v1 = vconst.f32x4 [0.0 -0x4.2 0x0.33333 -0.0] + v2 = fcmp eq v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_lt_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [0.0 -0x4.2 0x0.0 -0.0] + v1 = vconst.f32x4 [0x0.001 0x4.2 0x0.33333 0x1.0] + v2 = fcmp lt v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_ge_f64x2() -> b1 { +block0: + v0 = vconst.f64x2 [0x0.0 0x4.2] + v1 = vconst.f64x2 [0.0 0x4.1] + v2 = fcmp ge v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_uno_f64x2() -> b1 { +block0: + v0 = vconst.f64x2 [0.0 NaN] + v1 = vconst.f64x2 [NaN 0x4.1] + v2 = fcmp uno v0, v1 + v8 = vall_true v2 + return v8 +} +; run + +function %fcmp_gt_nans_f32x4() -> b1 { +block0: + v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN] + v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf] + v2 = fcmp gt v0, v1 + ; now check that the result v2 is all zeroes + v3 = vconst.i32x4 0x00 + v4 = raw_bitcast.i32x4 v2 + v5 = icmp eq v3, v4 + v8 = vall_true v5 + return v8 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif b/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif new file mode 100644 index 0000000000..ef2aeea26d --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-construction-run.clif @@ -0,0 +1,14 @@ +test run +set enable_simd +target x86_64 skylake + +function %splat_i64x2() -> b1 { +block0: + v0 = iconst.i64 -1 + v1 = splat.i64x2 v0 + v2 = vconst.i64x2 [-1 -1] + v3 = icmp eq v1, v2 + v8 = vall_true v3 + return v8 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif new file mode 100644 index 0000000000..2f7c4f5b22 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif @@ -0,0 +1,33 @@ +test binemit +set enable_simd +target x86_64 skylake + +function %bor_b16x8(b16x8, b16x8) -> b16x8 { +block0(v0: b16x8 [%xmm2], v1: b16x8 [%xmm1]): +[-, %xmm2] v2 = bor v0, v1 ; bin: 66 0f eb d1 + return v2 +} + +function %band_b64x2(b64x2, b64x2) -> b64x2 { +block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): +[-, %xmm6] v2 = band v0, v1 ; bin: 66 0f db f3 + return v2 +} + +function %bxor_b32x4(b32x4, b32x4) -> b32x4 { +block0(v0: b32x4 [%xmm4], v1: b32x4 [%xmm0]): +[-, %xmm4] v2 = bxor v0, v1 ; bin: 66 0f ef e0 + return v2 +} + +function %band_not_b64x2(b64x2, b64x2) -> b64x2 { +block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): +[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de + return v2 +} + +function %x86_ptest_f64x2(f64x2, f64x2) { +block0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]): +[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif new file mode 100644 index 0000000000..a1248c8bba --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif @@ -0,0 +1,29 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %bnot_b32x4(b32x4) -> b32x4 { +block0(v0: b32x4): + v1 = bnot v0 + ; check: v2 = vconst.b32x4 0xffffffffffffffffffffffffffffffff + ; nextln: v1 = bxor v2, v0 + return v1 +} + +function %vany_true_b32x4(b32x4) -> b1 { +block0(v0: b32x4): + v1 = vany_true v0 + ; check: v2 = x86_ptest v0, v0 + ; nextln: v1 = trueif ne v2 + return v1 +} + +function %vall_true_i64x2(i64x2) -> b1 { +block0(v0: i64x2): + v1 = vall_true v0 + ; check: v2 = vconst.i64x2 0x00 + ; nextln: v3 = icmp eq v0, v2 + ; nextln: v4 = x86_ptest v3, v3 + ; nextln: v1 = trueif eq v4 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif new file mode 100644 index 0000000000..619d300bfe --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif @@ -0,0 +1,11 @@ +test rodata +set enable_simd +target x86_64 skylake + +function %bnot_b32x4(b32x4) -> b32x4 { +block0(v0: b32x4): + v1 = bnot v0 + return v1 +} + +; sameln: [FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF] diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif new file mode 100644 index 0000000000..35fc44bc6a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/simd-logical-run.clif @@ -0,0 +1,59 @@ +test run +set enable_simd +target x86_64 skylake + +function %bnot() -> b32 { +block0: + v0 = vconst.b32x4 [true true true false] + v1 = bnot v0 + v2 = extractlane v1, 3 + return v2 +} +; run + +function %band_not() -> b1 { +block0: + v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v1 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v2 = band_not v0, v1 + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 1 + return v4 +} +; run + +function %vany_true_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v1 = vany_true v0 + return v1 +} +; run + +function %vany_true_b32x4() -> b1 { +block0: + v0 = vconst.b32x4 [false false false false] + v1 = vany_true v0 + v2 = bint.i32 v1 + v3 = icmp_imm eq v2, 0 + return v3 +} +; run + +function %vall_true_i16x8() -> b1 { +block0: + v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v1 = vall_true v0 + v2 = bint.i32 v1 + v3 = icmp_imm eq v2, 0 + return v3 +} +; run + +function %vall_true_b32x4() -> b1 { +block0: + v0 = vconst.b32x4 [true true true true] + v1 = vall_true v0 + return v1 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif new file mode 100644 index 0000000000..bcb441cd6b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif @@ -0,0 +1,45 @@ +; binary emission of stack address instructions on x86-64. +test binemit +set opt_level=none +target x86_64 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr64.clif | llvm-mc -show-encoding -triple=x86_64 +; + +function %stack_addr() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ss4 = explicit_slot 8, offset 0 + ss5 = explicit_slot 8, offset 1024 + +block0: +[-,%rcx] v0 = stack_addr.i64 ss0 ; bin: 48 8d 8c 24 00000808 +[-,%rcx] v1 = stack_addr.i64 ss1 ; bin: 48 8d 8c 24 00000408 +[-,%rcx] v2 = stack_addr.i64 ss2 ; bin: 48 8d 8c 24 00000008 +[-,%rcx] v3 = stack_addr.i64 ss3 ; bin: 48 8d 8c 24 00000000 +[-,%rcx] v4 = stack_addr.i64 ss4 ; bin: 48 8d 8c 24 00000808 +[-,%rcx] v5 = stack_addr.i64 ss5 ; bin: 48 8d 8c 24 00000c08 + +[-,%rcx] v20 = stack_addr.i64 ss4+1 ; bin: 48 8d 8c 24 00000809 +[-,%rcx] v21 = stack_addr.i64 ss4+2 ; bin: 48 8d 8c 24 0000080a +[-,%rcx] v22 = stack_addr.i64 ss4+2048 ; bin: 48 8d 8c 24 00001008 +[-,%rcx] v23 = stack_addr.i64 ss4-4096 ; bin: 48 8d 8c 24 fffff808 + +[-,%r8] v50 = stack_addr.i64 ss0 ; bin: 4c 8d 84 24 00000808 +[-,%r8] v51 = stack_addr.i64 ss1 ; bin: 4c 8d 84 24 00000408 +[-,%r8] v52 = stack_addr.i64 ss2 ; bin: 4c 8d 84 24 00000008 +[-,%r8] v53 = stack_addr.i64 ss3 ; bin: 4c 8d 84 24 00000000 +[-,%r8] v54 = stack_addr.i64 ss4 ; bin: 4c 8d 84 24 00000808 +[-,%r8] v55 = stack_addr.i64 ss5 ; bin: 4c 8d 84 24 00000c08 + +[-,%r8] v70 = stack_addr.i64 ss4+1 ; bin: 4c 8d 84 24 00000809 +[-,%r8] v71 = stack_addr.i64 ss4+2 ; bin: 4c 8d 84 24 0000080a +[-,%r8] v72 = stack_addr.i64 ss4+2048 ; bin: 4c 8d 84 24 00001008 +[-,%r8] v73 = stack_addr.i64 ss4-4096 ; bin: 4c 8d 84 24 fffff808 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif new file mode 100644 index 0000000000..a74a1dfc32 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif @@ -0,0 +1,21 @@ +; legalization of stack load and store instructions on x86-64. +test legalizer +set opt_level=none +target x86_64 haswell + +function %stack_load_and_store() { + ss0 = explicit_slot 8, offset 0 + +block0: + v0 = stack_load.i64 ss0 + +; check: v1 = stack_addr.i64 ss0 +; check: v0 = load.i64 notrap aligned v1 + + stack_store.i64 v0, ss0 + +; check: v2 = stack_addr.i64 ss0 +; check: store notrap aligned v0, v2 + + return +} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif new file mode 100644 index 0000000000..2c5bb1553b --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif @@ -0,0 +1,19 @@ +test compile +target x86_64 + +function u0:0(i8) -> i8 { + ss0 = explicit_slot 1 + +block0(v0: i8): + stack_store v0, ss0 + ; check: v2 = stack_addr.i64 ss0 + ; nextln: v3 = uextend.i32 v0 + ; nextln: istore8 notrap aligned v3, v2 + + v1 = stack_load.i8 ss0 + ; check: v4 = stack_addr.i64 ss0 + ; nextln: v5 = uload8.i32 notrap aligned v4 + ; nextln: v1 = ireduce.i8 v5 + + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/tls_elf.clif b/cranelift/filetests/filetests/isa/x86/tls_elf.clif new file mode 100644 index 0000000000..3788dd7d27 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/tls_elf.clif @@ -0,0 +1,18 @@ +test regalloc +set tls_model=elf_gd +target x86_64 + +function u0:0(i32) -> i32, i64 { +gv0 = symbol colocated tls u1:0 + +block0(v0: i32): + ; check: block0(v2: i32 [%rdi]): + ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 + v1 = global_value.i64 gv0 + ; nextln: [elf_tls_get_addr#00,%rax] v1 = x86_elf_tls_get_addr gv0 + ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 + return v0, v1 + ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx + ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax + ; nextln: [Op1ret#c3] return v3, v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/tls_enc.clif b/cranelift/filetests/filetests/isa/x86/tls_enc.clif new file mode 100644 index 0000000000..acdda733a5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/tls_enc.clif @@ -0,0 +1,11 @@ +test binemit +target x86_64 + +function u0:0() -> i64, i64 { +gv0 = symbol colocated tls u1:0 + +block0: + [-, %rax] v0 = x86_elf_tls_get_addr gv0 ; bin: 66 48 8d 3d ElfX86_64TlsGd(u1:0-4) 00000000 66 66 48 e8 CallPLTRel4(%ElfTlsGetAddr-4) 00000000 + [-, %rax] v1 = x86_macho_tls_get_addr gv0; bin: 48 8b 3d MachOX86_64Tlv(u1:0-4) 00000000 ff 17 + return v0, v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/tls_macho.clif b/cranelift/filetests/filetests/isa/x86/tls_macho.clif new file mode 100644 index 0000000000..d2c637d2e8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/tls_macho.clif @@ -0,0 +1,18 @@ +test regalloc +set tls_model=macho +target x86_64 + +function u0:0(i32) -> i32, i64 { +gv0 = symbol colocated tls u1:0 + +block0(v0: i32): + ; check: block0(v2: i32 [%rdi]): + ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 + v1 = global_value.i64 gv0 + ; nextln: [macho_tls_get_addr#00,%rax] v1 = x86_macho_tls_get_addr gv0 + ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 + return v0, v1 + ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx + ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax + ; nextln: [Op1ret#c3] return v3, v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif new file mode 100644 index 0000000000..7d778aa778 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif @@ -0,0 +1,14 @@ +test compile +target x86_64 + +function u0:0(i8) -> i16 fast { +block0(v0: i8): + v1 = uextend.i16 v0 + return v1 +} + +function u0:1(i8) -> i16 fast { +block0(v0: i8): + v1 = sextend.i16 v0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif new file mode 100644 index 0000000000..15522e3d38 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/vconst-binemit.clif @@ -0,0 +1,11 @@ +test binemit +set opt_level=speed_and_size +set enable_simd +target x86_64 + +function %test_vconst_b8() { +block0: +[-, %xmm2] v0 = vconst.b8x16 0x01 ; bin: 0f 10 15 00000008 PCRelRodata4(15) +[-, %xmm3] v1 = vconst.b8x16 0x02 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) + return +} diff --git a/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif b/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif new file mode 100644 index 0000000000..9294614c96 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/vconst-opt-run.clif @@ -0,0 +1,23 @@ +test run +set enable_simd +target x86_64 + +; TODO move to vconst-run.clif + +function %test_vconst_zeroes() -> b1 { +block0: + v0 = vconst.i8x16 0x00 + v1 = extractlane v0, 4 + v2 = icmp_imm eq v1, 0 + return v2 +} +; run + +function %test_vconst_ones() -> b1 { +block0: + v0 = vconst.i8x16 0xffffffffffffffffffffffffffffffff + v1 = extractlane v0, 2 + v2 = icmp_imm eq v1, 0xff + return v2 +} +; run diff --git a/cranelift/filetests/filetests/isa/x86/vconst-opt.clif b/cranelift/filetests/filetests/isa/x86/vconst-opt.clif new file mode 100644 index 0000000000..bc444b7784 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/vconst-opt.clif @@ -0,0 +1,12 @@ +test binemit +set enable_simd +target x86_64 + +; TODO move to vconst-compile.clif or vconst-binemit.clif + +function %test_vconst_optimizations() { +block0: +[-, %xmm4] v0 = vconst.b8x16 0x00 ; bin: 66 0f ef e4 +[-, %xmm7] v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 ff + return +} diff --git a/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif new file mode 100644 index 0000000000..0df8493d5d --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif @@ -0,0 +1,19 @@ +test rodata +set enable_simd=true +target x86_64 haswell + +function %test_vconst_i32() -> i32x4 { +block0: + v0 = vconst.i32x4 0x1234 + return v0 +} + +; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + +function %test_vconst_b16() -> b16x8 { +block0: + v0 = vconst.b16x8 [true false true false true false true true] + return v0 +} + +; sameln: [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0] diff --git a/cranelift/filetests/filetests/isa/x86/vconst-run.clif b/cranelift/filetests/filetests/isa/x86/vconst-run.clif new file mode 100644 index 0000000000..60d94fbccd --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/vconst-run.clif @@ -0,0 +1,21 @@ +test run +set enable_simd + +function %test_vconst_syntax() -> b1 { +block0: + v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001 ; build constant using hexadecimal syntax + v1 = vconst.i32x4 [1 2 3 4] ; build constant using literal list syntax + + ; verify lane 1 matches + v2 = extractlane v0, 1 + v3 = extractlane v1, 1 + v4 = icmp eq v3, v2 + + ; verify lane 1 has the correct value + v5 = icmp_imm eq v3, 2 + + v6 = band v4, v5 + return v6 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif new file mode 100644 index 0000000000..55a6c59bed --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif @@ -0,0 +1,45 @@ +test compile +set opt_level=speed_and_size +set is_pic +target x86_64 haswell + +; check if for one arg we use the right register +function %one_arg(i64) windows_fastcall { +block0(v0: i64): + return +} +; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { +; nextln: ss0 = incoming_arg 16, offset -48 + +; check if we still use registers for 4 arguments +function %four_args(i64, i64, i64, i64) windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + return +} +; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { + +; check if float arguments are passed through XMM registers +function %four_float_args(f64, f64, f64, f64) windows_fastcall { +block0(v0: f64, v1: f64, v2: f64, v3: f64): + return +} +; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { + +; check if we use stack space for > 4 arguments +function %five_args(i64, i64, i64, i64, i64) windows_fastcall { +block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64): + return +} +; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { + +function %mixed_int_float(i64, f64, i64, f32) windows_fastcall { +block0(v0: i64, v1: f64, v2: i64, v3: f32): + return +} +; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { + +function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall { +block0(v0: f32, v1: f64, v2: i64, v3: i64): + return v1 +} +; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall { diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif new file mode 100644 index 0000000000..b146f0ac76 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif @@ -0,0 +1,221 @@ +test unwind +set opt_level=speed_and_size +set is_pic +target x86_64 haswell + +; check that there is no unwind information for a system_v function +function %not_fastcall() system_v { +block0: + return +} +; sameln: No unwind information. + +; check the unwind information with a function with no args +function %no_args() windows_fastcall { +block0: + return +} +; sameln: UnwindInfo { +; nextln: version: 1, +; nextln: flags: 0, +; nextln: prologue_size: 8, +; nextln: unwind_code_count_raw: 3, +; nextln: frame_register: 5, +; nextln: frame_register_offset: 0, +; nextln: unwind_codes: [ +; nextln: UnwindCode { +; nextln: offset: 8, +; nextln: op: SmallStackAlloc, +; nextln: info: 3, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 4, +; nextln: op: SetFramePointer, +; nextln: info: 0, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 1, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 5, +; nextln: value: None, +; nextln: }, +; nextln: ], +; nextln: } + +; check a function with medium-sized stack alloc +function %medium_stack() windows_fastcall { + ss0 = explicit_slot 100000 +block0: + return +} +; sameln: UnwindInfo { +; nextln: version: 1, +; nextln: flags: 0, +; nextln: prologue_size: 17, +; nextln: unwind_code_count_raw: 4, +; nextln: frame_register: 5, +; nextln: frame_register_offset: 0, +; nextln: unwind_codes: [ +; nextln: UnwindCode { +; nextln: offset: 17, +; nextln: op: LargeStackAlloc, +; nextln: info: 0, +; nextln: value: U16( +; nextln: 12504, +; nextln: ), +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 4, +; nextln: op: SetFramePointer, +; nextln: info: 0, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 1, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 5, +; nextln: value: None, +; nextln: }, +; nextln: ], +; nextln: } + +; check a function with large-sized stack alloc +function %large_stack() windows_fastcall { + ss0 = explicit_slot 524288 +block0: + return +} +; sameln: UnwindInfo { +; nextln: version: 1, +; nextln: flags: 0, +; nextln: prologue_size: 17, +; nextln: unwind_code_count_raw: 5, +; nextln: frame_register: 5, +; nextln: frame_register_offset: 0, +; nextln: unwind_codes: [ +; nextln: UnwindCode { +; nextln: offset: 17, +; nextln: op: LargeStackAlloc, +; nextln: info: 1, +; nextln: value: U32( +; nextln: 524320, +; nextln: ), +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 4, +; nextln: op: SetFramePointer, +; nextln: info: 0, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 1, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 5, +; nextln: value: None, +; nextln: }, +; nextln: ], +; nextln: } + +; check a function that has CSRs +function %lots_of_registers(i64, i64) windows_fastcall { +block0(v0: i64, v1: i64): + v2 = load.i32 v0+0 + v3 = load.i32 v0+8 + v4 = load.i32 v0+16 + v5 = load.i32 v0+24 + v6 = load.i32 v0+32 + v7 = load.i32 v0+40 + v8 = load.i32 v0+48 + v9 = load.i32 v0+56 + v10 = load.i32 v0+64 + v11 = load.i32 v0+72 + v12 = load.i32 v0+80 + v13 = load.i32 v0+88 + v14 = load.i32 v0+96 + store.i32 v2, v1+0 + store.i32 v3, v1+8 + store.i32 v4, v1+16 + store.i32 v5, v1+24 + store.i32 v6, v1+32 + store.i32 v7, v1+40 + store.i32 v8, v1+48 + store.i32 v9, v1+56 + store.i32 v10, v1+64 + store.i32 v11, v1+72 + store.i32 v12, v1+80 + store.i32 v13, v1+88 + store.i32 v14, v1+96 + return +} +; sameln: UnwindInfo { +; nextln: version: 1, +; nextln: flags: 0, +; nextln: prologue_size: 19, +; nextln: unwind_code_count_raw: 10, +; nextln: frame_register: 5, +; nextln: frame_register_offset: 0, +; nextln: unwind_codes: [ +; nextln: UnwindCode { +; nextln: offset: 19, +; nextln: op: SmallStackAlloc, +; nextln: info: 3, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 15, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 15, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 13, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 14, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 11, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 13, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 9, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 12, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 7, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 7, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 6, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 6, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 5, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 3, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 4, +; nextln: op: SetFramePointer, +; nextln: info: 0, +; nextln: value: None, +; nextln: }, +; nextln: UnwindCode { +; nextln: offset: 1, +; nextln: op: PushNonvolatileRegister, +; nextln: info: 5, +; nextln: value: None, +; nextln: }, +; nextln: ], +; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/windows_systemv_x64_fde.clif b/cranelift/filetests/filetests/isa/x86/windows_systemv_x64_fde.clif new file mode 100644 index 0000000000..31b75b6c16 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/windows_systemv_x64_fde.clif @@ -0,0 +1,54 @@ +test fde +set opt_level=speed_and_size +set is_pic +target x86_64 haswell + +; check that there is no libunwind information for a windows_fastcall function +function %not_fastcall() windows_fastcall { +block0: + return +} +; sameln: No unwind information. + +; check the libunwind information with a function with no args +function %no_args() system_v { +block0: + return +} +; sameln: 0x00000000: CIE +; nextln: length: 0x00000014 +; nextln: version: 0x01 +; nextln: code_align: 1 +; nextln: data_align: -8 +; nextln: ra_register: 0x10 +; nextln: DW_CFA_def_cfa (r7, 8) +; nextln: DW_CFA_offset (r16, 1) +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: Instructions: Init State: +; nextln: +; nextln: +; nextln: 0x00000018: FDE +; nextln: length: 0x00000024 +; nextln: CIE_pointer: 0x00000000 +; nextln: start_addr: 0x0000000000000000 +; nextln: range_size: 0x0000000000000006 (end_addr = 0x0000000000000006) +; nextln: Instructions: +; nextln: DW_CFA_advance_loc (1) +; nextln: DW_CFA_def_cfa_offset (16) +; nextln: DW_CFA_offset (r6, 2) +; nextln: DW_CFA_advance_loc (3) +; nextln: DW_CFA_def_cfa_register (r6) +; nextln: DW_CFA_advance_loc (1) +; nextln: DW_CFA_def_cfa (r7, 8) +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: DW_CFA_nop +; nextln: +; nextln: Entry: 24 +; nextln: Relocs: [(Abs8, 32)] diff --git a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif new file mode 100644 index 0000000000..fad0f2aace --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif @@ -0,0 +1,89 @@ +test legalizer +target x86_64 + +function %reverse_bits(i128) -> i128 { +block0(v0: i128): + v1 = bitrev.i128 v0 + return v1 +} + +; check: block0(v2: i64, v3: i64): +; check: v0 = iconcat v2, v3 +; check: v33 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa +; check: v6 = band v2, v33 +; check: v7 = ushr_imm v6, 1 +; check: v34 = iconst.i64 0x5555_5555_5555_5555 +; check: v8 = band v2, v34 +; check: v9 = ishl_imm v8, 1 +; check: v10 = bor v7, v9 +; check: v35 = iconst.i64 0xcccc_cccc_cccc_cccc +; check: v11 = band v10, v35 +; check: v12 = ushr_imm v11, 2 +; check: v36 = iconst.i64 0x3333_3333_3333_3333 +; check: v13 = band v10, v36 +; check: v14 = ishl_imm v13, 2 +; check: v15 = bor v12, v14 +; check: v37 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 +; check: v16 = band v15, v37 +; check: v17 = ushr_imm v16, 4 +; check: v38 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f +; check: v18 = band v15, v38 +; check: v19 = ishl_imm v18, 4 +; check: v20 = bor v17, v19 +; check: v39 = iconst.i64 0xff00_ff00_ff00_ff00 +; check: v21 = band v20, v39 +; check: v22 = ushr_imm v21, 8 +; check: v40 = iconst.i64 0x00ff_00ff_00ff_00ff +; check: v23 = band v20, v40 +; check: v24 = ishl_imm v23, 8 +; check: v25 = bor v22, v24 +; check: v41 = iconst.i64 0xffff_0000_ffff_0000 +; check: v26 = band v25, v41 +; check: v27 = ushr_imm v26, 16 +; check: v42 = iconst.i64 0xffff_0000_ffff +; check: v28 = band v25, v42 +; check: v29 = ishl_imm v28, 16 +; check: v30 = bor v27, v29 +; check: v31 = ushr_imm v30, 32 +; check: v32 = ishl_imm v30, 32 +; check: v4 = bor v31, v32 +; check: v70 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa +; check: v43 = band v3, v70 +; check: v44 = ushr_imm v43, 1 +; check: v71 = iconst.i64 0x5555_5555_5555_5555 +; check: v45 = band v3, v71 +; check: v46 = ishl_imm v45, 1 +; check: v47 = bor v44, v46 +; check: v72 = iconst.i64 0xcccc_cccc_cccc_cccc +; check: v48 = band v47, v72 +; check: v49 = ushr_imm v48, 2 +; check: v73 = iconst.i64 0x3333_3333_3333_3333 +; check: v50 = band v47, v73 +; check: v51 = ishl_imm v50, 2 +; check: v52 = bor v49, v51 +; check: v74 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 +; check: v53 = band v52, v74 +; check: v54 = ushr_imm v53, 4 +; check: v75 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f +; check: v55 = band v52, v75 +; check: v56 = ishl_imm v55, 4 +; check: v57 = bor v54, v56 +; check: v76 = iconst.i64 0xff00_ff00_ff00_ff00 +; check: v58 = band v57, v76 +; check: v59 = ushr_imm v58, 8 +; check: v77 = iconst.i64 0x00ff_00ff_00ff_00ff +; check: v60 = band v57, v77 +; check: v61 = ishl_imm v60, 8 +; check: v62 = bor v59, v61 +; check: v78 = iconst.i64 0xffff_0000_ffff_0000 +; check: v63 = band v62, v78 +; check: v64 = ushr_imm v63, 16 +; check: v79 = iconst.i64 0xffff_0000_ffff +; check: v65 = band v62, v79 +; check: v66 = ishl_imm v65, 16 +; check: v67 = bor v64, v66 +; check: v68 = ushr_imm v67, 32 +; check: v69 = ishl_imm v67, 32 +; check: v5 = bor v68, v69 +; check: v1 = iconcat v5, v4 +; check: return v5, v4 diff --git a/cranelift/filetests/filetests/legalizer/bitrev.clif b/cranelift/filetests/filetests/legalizer/bitrev.clif new file mode 100644 index 0000000000..5651d7a7f3 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/bitrev.clif @@ -0,0 +1,206 @@ +test legalizer +target x86_64 + +function %reverse_bits_8(i8) -> i8 { +block0(v0: i8): + v1 = bitrev.i8 v0 + return v1 +} +; check: v16 = uextend.i32 v0 +; check: v17 = band_imm v16, 170 +; check: v2 = ireduce.i8 v17 +; check: v18 = uextend.i32 v2 +; check: v19 = ushr_imm v18, 1 +; check: v3 = ireduce.i8 v19 +; check: v20 = uextend.i32 v0 +; check: v21 = band_imm v20, 85 +; check: v4 = ireduce.i8 v21 +; check: v22 = uextend.i32 v4 +; check: v23 = ishl_imm v22, 1 +; check: v5 = ireduce.i8 v23 +; check: v24 = uextend.i32 v3 +; check: v25 = uextend.i32 v5 +; check: v26 = bor v24, v25 +; check: v6 = ireduce.i8 v26 +; check: v27 = uextend.i32 v6 +; check: v28 = band_imm v27, 204 +; check: v7 = ireduce.i8 v28 +; check: v29 = uextend.i32 v7 +; check: v30 = ushr_imm v29, 2 +; check: v8 = ireduce.i8 v30 +; check: v31 = uextend.i32 v6 +; check: v32 = band_imm v31, 51 +; check: v9 = ireduce.i8 v32 +; check: v33 = uextend.i32 v9 +; check: v34 = ishl_imm v33, 2 +; check: v10 = ireduce.i8 v34 +; check: v35 = uextend.i32 v8 +; check: v36 = uextend.i32 v10 +; check: v37 = bor v35, v36 +; check: v11 = ireduce.i8 v37 +; check: v38 = uextend.i32 v11 +; check: v39 = band_imm v38, 240 +; check: v12 = ireduce.i8 v39 +; check: v40 = uextend.i32 v12 +; check: v41 = ushr_imm v40, 4 +; check: v13 = ireduce.i8 v41 +; check: v42 = uextend.i32 v11 +; check: v43 = band_imm v42, 15 +; check: v14 = ireduce.i8 v43 +; check: v44 = uextend.i32 v14 +; check: v45 = ishl_imm v44, 4 +; check: v15 = ireduce.i8 v45 +; check: v46 = uextend.i32 v13 +; check: v47 = uextend.i32 v15 +; check: v48 = bor v46, v47 +; check: v1 = ireduce.i8 v48 +; check: return v1 + +function %reverse_bits_16(i16) -> i16 { +block0(v0: i16): + v1 = bitrev.i16 v0 + return v1 +} +; check: v21 = uextend.i32 v0 +; check: v22 = band_imm v21, 0xaaaa +; check: v2 = ireduce.i16 v22 +; check: v23 = uextend.i32 v2 +; check: v24 = ushr_imm v23, 1 +; check: v3 = ireduce.i16 v24 +; check: v25 = uextend.i32 v0 +; check: v26 = band_imm v25, 0x5555 +; check: v4 = ireduce.i16 v26 +; check: v27 = uextend.i32 v4 +; check: v28 = ishl_imm v27, 1 +; check: v5 = ireduce.i16 v28 +; check: v29 = uextend.i32 v3 +; check: v30 = uextend.i32 v5 +; check: v31 = bor v29, v30 +; check: v6 = ireduce.i16 v31 +; check: v32 = uextend.i32 v6 +; check: v33 = band_imm v32, 0xcccc +; check: v7 = ireduce.i16 v33 +; check: v34 = uextend.i32 v7 +; check: v35 = ushr_imm v34, 2 +; check: v8 = ireduce.i16 v35 +; check: v36 = uextend.i32 v6 +; check: v37 = band_imm v36, 0x3333 +; check: v9 = ireduce.i16 v37 +; check: v38 = uextend.i32 v9 +; check: v39 = ishl_imm v38, 2 +; check: v10 = ireduce.i16 v39 +; check: v40 = uextend.i32 v8 +; check: v41 = uextend.i32 v10 +; check: v42 = bor v40, v41 +; check: v11 = ireduce.i16 v42 +; check: v43 = uextend.i32 v11 +; check: v44 = band_imm v43, 0xf0f0 +; check: v12 = ireduce.i16 v44 +; check: v45 = uextend.i32 v12 +; check: v46 = ushr_imm v45, 4 +; check: v13 = ireduce.i16 v46 +; check: v47 = uextend.i32 v11 +; check: v48 = band_imm v47, 3855 +; check: v14 = ireduce.i16 v48 +; check: v49 = uextend.i32 v14 +; check: v50 = ishl_imm v49, 4 +; check: v15 = ireduce.i16 v50 +; check: v51 = uextend.i32 v13 +; check: v52 = uextend.i32 v15 +; check: v53 = bor v51, v52 +; check: v16 = ireduce.i16 v53 +; check: v54 = uextend.i32 v16 +; check: v55 = band_imm v54, 0xff00 +; check: v17 = ireduce.i16 v55 +; check: v56 = uextend.i32 v17 +; check: v57 = ushr_imm v56, 8 +; check: v18 = ireduce.i16 v57 +; check: v58 = uextend.i32 v16 +; check: v59 = band_imm v58, 255 +; check: v19 = ireduce.i16 v59 +; check: v60 = uextend.i32 v19 +; check: v61 = ishl_imm v60, 8 +; check: v20 = ireduce.i16 v61 +; check: v62 = uextend.i32 v18 +; check: v63 = uextend.i32 v20 +; check: v64 = bor v62, v63 +; check: v1 = ireduce.i16 v64 +; check: return v1 + +function %reverse_bits_32(i32) -> i32 { +block0(v0: i32): + v1 = bitrev.i32 v0 + return v1 +} +; check: v24 = iconst.i32 0xaaaa_aaaa +; check: v2 = band v0, v24 +; check: v3 = ushr_imm v2, 1 +; check: v4 = band_imm v0, 0x5555_5555 +; check: v5 = ishl_imm v4, 1 +; check: v6 = bor v3, v5 +; check: v25 = iconst.i32 0xcccc_cccc +; check: v7 = band v6, v25 +; check: v8 = ushr_imm v7, 2 +; check: v9 = band_imm v6, 0x3333_3333 +; check: v10 = ishl_imm v9, 2 +; check: v11 = bor v8, v10 +; check: v26 = iconst.i32 0xf0f0_f0f0 +; check: v12 = band v11, v26 +; check: v13 = ushr_imm v12, 4 +; check: v14 = band_imm v11, 0x0f0f_0f0f +; check: v15 = ishl_imm v14, 4 +; check: v16 = bor v13, v15 +; check: v27 = iconst.i32 0xff00_ff00 +; check: v17 = band v16, v27 +; check: v18 = ushr_imm v17, 8 +; check: v19 = band_imm v16, 0x00ff_00ff +; check: v20 = ishl_imm v19, 8 +; check: v21 = bor v18, v20 +; check: v22 = ushr_imm v21, 16 +; check: v23 = ishl_imm v21, 16 +; check: v1 = bor v22, v23 + + +function %reverse_bits_64(i64) -> i64 { +block0(v0: i64): + v1 = bitrev.i64 v0 + return v1 +} +; check: v29 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa +; check: v2 = band v0, v29 +; check: v3 = ushr_imm v2, 1 +; check: v30 = iconst.i64 0x5555_5555_5555_5555 +; check: v4 = band v0, v30 +; check: v5 = ishl_imm v4, 1 +; check: v6 = bor v3, v5 +; check: v31 = iconst.i64 0xcccc_cccc_cccc_cccc +; check: v7 = band v6, v31 +; check: v8 = ushr_imm v7, 2 +; check: v32 = iconst.i64 0x3333_3333_3333_3333 +; check: v9 = band v6, v32 +; check: v10 = ishl_imm v9, 2 +; check: v11 = bor v8, v10 +; check: v33 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 +; check: v12 = band v11, v33 +; check: v13 = ushr_imm v12, 4 +; check: v34 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f +; check: v14 = band v11, v34 +; check: v15 = ishl_imm v14, 4 +; check: v16 = bor v13, v15 +; check: v35 = iconst.i64 0xff00_ff00_ff00_ff00 +; check: v17 = band v16, v35 +; check: v18 = ushr_imm v17, 8 +; check: v36 = iconst.i64 0x00ff_00ff_00ff_00ff +; check: v19 = band v16, v36 +; check: v20 = ishl_imm v19, 8 +; check: v21 = bor v18, v20 +; check: v37 = iconst.i64 0xffff_0000_ffff_0000 +; check: v22 = band v21, v37 +; check: v23 = ushr_imm v22, 16 +; check: v38 = iconst.i64 0xffff_0000_ffff +; check: v24 = band v21, v38 +; check: v25 = ishl_imm v24, 16 +; check: v26 = bor v23, v25 +; check: v27 = ushr_imm v26, 32 +; check: v28 = ishl_imm v26, 32 +; check: v1 = bor v27, v28 diff --git a/cranelift/filetests/filetests/legalizer/br_table_cond.clif b/cranelift/filetests/filetests/legalizer/br_table_cond.clif new file mode 100644 index 0000000000..9677e2c9f3 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/br_table_cond.clif @@ -0,0 +1,64 @@ +test legalizer +set enable_probestack=false +set enable_jump_tables=false +target x86_64 + +; Test that when jump_tables_enables is false, all jump tables are eliminated. +; regex: V=v\d+ +; regex: BB=block\d+ + +function u0:0(i64 vmctx) baldrdash_system_v { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 48 + jt0 = jump_table [block2, block2, block7] + jt1 = jump_table [block8, block8] + +block0(v0: i64): + jump block5 + +block5: + v1 = global_value.i64 gv1 + v2 = load.i64 v1 + trapnz v2, interrupt + v3 = iconst.i32 0 + br_table v3, block3, jt0 +; check: block5: +; check: $(val0=$V) = iconst.i32 0 +; nextln: $(cmp0=$V) = icmp_imm eq $val0, 0 +; nextln: brnz $cmp0, block2 +; nextln: jump $(fail0=$BB) +; check: $fail0: +; nextln: $(cmp1=$V) = icmp_imm.i32 eq $val0, 1 +; nextln: brnz $cmp1, block2 +; nextln: jump $(fail1=$BB) +; check: $fail1: +; nextln: $(cmp2=$V) = icmp_imm.i32 eq $val0, 2 +; nextln: brnz $cmp2, block7 +; nextln: jump block3 + +block7: + v4 = iconst.i32 0 + br_table v4, block3, jt1 +; check: block7: +; check: $(val1=$V) = iconst.i32 0 +; nextln: $(cmp3=$V) = icmp_imm eq $val1, 0 +; nextln: brnz $cmp3, block8 +; nextln: jump $(fail3=$BB) +; check: $fail3: +; nextln: $(cmp4=$V) = icmp_imm.i32 eq $val1, 1 +; nextln: brnz $cmp4, block8 +; nextln: jump block3 + +block8: + jump block5 + +block3: + jump block2 + +block2: + jump block1 + +block1: + fallthrough_return +} +; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/bxor_imm.clif b/cranelift/filetests/filetests/legalizer/bxor_imm.clif new file mode 100644 index 0000000000..bf959a7364 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/bxor_imm.clif @@ -0,0 +1,11 @@ +test legalizer +target x86_64 + +function %foo(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor_imm.i64 v0, 0x100000000 + return v2 +} + +; check: v3 = iconst.i64 0x0001_0000_0000 +; check: v2 = bxor v0, v3 diff --git a/cranelift/filetests/filetests/legalizer/empty_br_table.clif b/cranelift/filetests/filetests/legalizer/empty_br_table.clif new file mode 100644 index 0000000000..606a07f605 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/empty_br_table.clif @@ -0,0 +1,17 @@ +test legalizer +set enable_probestack=false +set enable_jump_tables=false +target x86_64 + +function u0:0(i64) { + jt0 = jump_table [] + +block0(v0: i64): + br_table v0, block1, jt0 +; check: block0(v0: i64): +; nextln: jump block1 + +block1: + return +} +; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/iconst-i64.clif b/cranelift/filetests/filetests/legalizer/iconst-i64.clif new file mode 100644 index 0000000000..6aa7361b45 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/iconst-i64.clif @@ -0,0 +1,12 @@ +test legalizer +target i686 + +function %foo() -> i64 { +block0: + v1 = iconst.i64 0x6400000042 + return v1 +} + +; check: v2 = iconst.i32 66 +; check: v3 = iconst.i32 100 +; check: v1 = iconcat v2, v3 diff --git a/cranelift/filetests/filetests/legalizer/isplit-bb.clif b/cranelift/filetests/filetests/legalizer/isplit-bb.clif new file mode 100644 index 0000000000..7e55eb1eb9 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/isplit-bb.clif @@ -0,0 +1,24 @@ +test legalizer +target x86_64 + +function u0:0(i128, i128, i64) -> i128 system_v { +block0(v0: i128, v1: i128, v2: i64): + jump block1 + +block1: + v17 = iadd v0, v1 + v20 = iadd v1, v17 + jump block79 + +block79: + v425 = iconst.i64 0 + v426 = icmp_imm eq v425, 1 + brnz v426, block80 + jump block85(v20, v17) + +block80: + trap user0 + +block85(v462: i128, v874: i128): + trap user0 +} diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif new file mode 100644 index 0000000000..f4919f4781 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif @@ -0,0 +1,31 @@ +test legalizer +target i686 + +function %foo() -> i128 { +block0: + v1 = iconst.i64 0x6400000042 + v2 = iconst.i64 0x7F10100042 + v3 = iconcat v1, v2 + v4 = popcnt.i128 v3 + return v4 +} + +; check: v5 = iconst.i32 66 +; check: v6 = iconst.i32 100 +; check: v1 = iconcat v5, v6 +; check: v7 = iconst.i32 0x1010_0042 +; check: v8 = iconst.i32 127 +; check: v2 = iconcat v7, v8 +; check: v3 = iconcat v1, v2 +; check: v9 = popcnt v1 +; check: v10 = popcnt v2 +; check: v12, v13 = isplit v9 +; check: v14, v15 = isplit v10 +; check: v16, v17 = iadd_ifcout v12, v14 +; check: v18 = iadd_ifcin v13, v15, v17 +; check: v11 = iconcat v16, v18 +; check: v20 = iconst.i32 0 +; check: v21 = iconst.i32 0 +; check: v19 = iconcat v20, v21 +; check: v4 = iconcat v11, v19 +; check: return v16, v18, v20, v21 diff --git a/cranelift/filetests/filetests/licm/basic.clif b/cranelift/filetests/filetests/licm/basic.clif new file mode 100644 index 0000000000..b089d0b182 --- /dev/null +++ b/cranelift/filetests/filetests/licm/basic.clif @@ -0,0 +1,41 @@ +test licm +target riscv32 + +function %simple_loop(i32) -> i32 { + +block0(v0: i32): + jump block1(v0) + +block1(v1: i32): + v2 = iconst.i32 1 + v3 = iconst.i32 2 + v4 = iadd v2, v3 + brz v1, block3(v1) + jump block2 + +block2: + v5 = isub v1, v2 + jump block1(v5) + +block3(v6: i32): + return v6 + +} +; sameln: function %simple_loop +; nextln: block0(v0: i32): +; nextln: v2 = iconst.i32 1 +; nextln: v3 = iconst.i32 2 +; nextln: v4 = iadd v2, v3 +; nextln: jump block1(v0) +; nextln: +; nextln: block1(v1: i32): +; nextln: brz v1, block3(v1) +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: v5 = isub.i32 v1, v2 +; nextln: jump block1(v5) +; nextln: +; nextln: block3(v6: i32): +; nextln: return v6 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/complex.clif b/cranelift/filetests/filetests/licm/complex.clif new file mode 100644 index 0000000000..ab9c905e39 --- /dev/null +++ b/cranelift/filetests/filetests/licm/complex.clif @@ -0,0 +1,96 @@ +test licm +target riscv32 + +function %complex(i32) -> i32 system_v { +block0(v0: i32): +[UJ#1b] jump block1(v0) + + block1(v1: i32): + v2 = iconst.i32 1 + v3 = iconst.i32 4 + v4 = iadd v2, v1 +[SBzero#18] brz v1, block2(v2) +[UJ#1b] jump block4(v4) + + block2(v5: i32): + v6 = iconst.i32 2 + v7 = iadd v5, v4 + v8 = iadd v6, v1 +[UJ#1b] jump block3(v8) + + block3(v9: i32): + v10 = iadd v9, v5 + v11 = iadd.i32 v1, v4 +[SBzero#18] brz.i32 v1, block2(v9) +[UJ#1b] jump block6(v10) + + block4(v12: i32): + v13 = iconst.i32 3 + v14 = iadd v12, v13 + v15 = iadd.i32 v4, v13 +[UJ#1b] jump block5(v13) + + block5(v16: i32): + v17 = iadd.i32 v14, v4 +[SBzero#18] brz.i32 v1, block4(v16) +[UJ#1b] jump block6(v16) + + block6(v18: i32): + v19 = iadd v18, v2 + v20 = iadd.i32 v2, v3 +[SBzero#18] brz.i32 v1, block1(v20) +[UJ#1b] jump block7 + + block7: +[Iret#19] return v19 +} + +; sameln: function %complex +; nextln: block0(v0: i32): +; nextln: v2 = iconst.i32 1 +; nextln: v3 = iconst.i32 4 +; nextln: v6 = iconst.i32 2 +; nextln: v13 = iconst.i32 3 +; nextln: v20 = iadd v2, v3 +; nextln: jump block1(v0) +; nextln: +; nextln: block1(v1: i32): +; nextln: v4 = iadd.i32 v2, v1 +; nextln: brz v1, block8(v2) +; nextln: jump block9(v4) +; nextln: +; nextln: block8(v21: i32): +; nextln: v8 = iadd.i32 v6, v1 +; nextln: v11 = iadd.i32 v1, v4 +; nextln: jump block2(v21) +; nextln: +; nextln: block2(v5: i32): +; nextln: v7 = iadd v5, v4 +; nextln: jump block3(v8) +; nextln: +; nextln: block3(v9: i32): +; nextln: v10 = iadd v9, v5 +; nextln: brz.i32 v1, block2(v9) +; nextln: jump block6(v10) +; nextln: +; nextln: block9(v22: i32): +; nextln: v15 = iadd.i32 v4, v13 +; nextln: jump block4(v22) +; nextln: +; nextln: block4(v12: i32): +; nextln: v14 = iadd v12, v13 +; nextln: jump block5(v13) +; nextln: +; nextln: block5(v16: i32): +; nextln: v17 = iadd.i32 v14, v4 +; nextln: brz.i32 v1, block4(v16) +; nextln: jump block6(v16) +; nextln: +; nextln: block6(v18: i32): +; nextln: v19 = iadd v18, v2 +; nextln: brz.i32 v1, block1(v20) +; nextln: jump block7 +; nextln: +; nextln: block7: +; nextln: return v19 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/critical-edge.clif b/cranelift/filetests/filetests/licm/critical-edge.clif new file mode 100644 index 0000000000..1940a4ed36 --- /dev/null +++ b/cranelift/filetests/filetests/licm/critical-edge.clif @@ -0,0 +1,54 @@ +test licm +target riscv32 + +; The loop in this function is entered from a critical edge. + +function %critical_edge(i32, i32) -> i32 { + + block0(v0: i32, v7: i32): +[SBzero#38] brnz v7, block2(v0) +[UJ#1b] jump block1 + + block1: +[Iret#19] return v0 + + block2(v1: i32): + v2 = iconst.i32 1 + v3 = iconst.i32 2 + v4 = iadd v2, v3 +[SBzero#18] brz v1, block4(v1) +[UJ#1b] jump block3 + + block3: + v5 = isub v1, v2 +[UJ#1b] jump block2(v5) + + block4(v6: i32): +[Iret#19] return v6 + +} +; sameln: function %critical_edge +; nextln: block0(v0: i32, v7: i32): +; nextln: brnz v7, block5(v0) +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: return v0 +; nextln: +; nextln: block5(v8: i32): +; nextln: v2 = iconst.i32 1 +; nextln: v3 = iconst.i32 2 +; nextln: v4 = iadd v2, v3 +; nextln: jump block2(v8) +; nextln: +; nextln: block2(v1: i32): +; nextln: brz v1, block4(v1) +; nextln: jump block3 +; nextln: +; nextln: block3: +; nextln: v5 = isub.i32 v1, v2 +; nextln: jump block2(v5) +; nextln: +; nextln: block4(v6: i32): +; nextln: return v6 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/encoding.clif b/cranelift/filetests/filetests/licm/encoding.clif new file mode 100644 index 0000000000..2b0114d2d0 --- /dev/null +++ b/cranelift/filetests/filetests/licm/encoding.clif @@ -0,0 +1,42 @@ +test licm +target riscv32 + +; Ensure that instructions emitted by LICM get encodings. + +function %simple_loop(i32) -> i32 { + block0(v0: i32): +[UJ#1b] jump block1(v0) + + block1(v1: i32): +[Iz#04,%x0] v2 = iconst.i32 1 +[Iz#04,%x1] v3 = iconst.i32 2 +[R#0c,%x2] v4 = iadd v2, v3 +[SBzero#18] brz v1, block3(v1) +[UJ#1b] jump block2 + + block2: +[R#200c,%x5] v5 = isub v1, v2 +[UJ#1b] jump block1(v5) + + block3(v6: i32): +[Iret#19] return v6 +} + +; check: function +; nextln: block0(v0: i32): +; nextln: [Iz#04,%x0] v2 = iconst.i32 1 +; nextln: [Iz#04,%x1] v3 = iconst.i32 2 +; nextln: [R#0c,%x2] v4 = iadd v2, v3 +; nextln: [UJ#1b] jump block1(v0) +; nextln: +; nextln: block1(v1: i32): +; nextln: [SBzero#18] brz v1, block3(v1) +; nextln: [UJ#1b] jump block2 +; nextln: +; nextln: block2: +; nextln: [R#200c,%x5] v5 = isub.i32 v1, v2 +; nextln: [UJ#1b] jump block1(v5) +; nextln: +; nextln: block3(v6: i32): +; nextln: [Iret#19] return v6 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/jump-table-entry.clif b/cranelift/filetests/filetests/licm/jump-table-entry.clif new file mode 100644 index 0000000000..6f754185a5 --- /dev/null +++ b/cranelift/filetests/filetests/licm/jump-table-entry.clif @@ -0,0 +1,33 @@ +test licm +target x86_64 + +function %dont_hoist_jump_table_entry_during_licm() { + jt0 = jump_table [block1, block1] + +block0: + fallthrough block1 + +block1: ; the loop! + v2 = iconst.i32 42 + v3 = ifcmp_imm v2, 0 + brif uge v3, block1 + fallthrough block2 + +block2: + v1 = iconst.i64 -14 + v8 = ifcmp_imm v1, 2 + brif uge v8, block1 + jump block3 + +block3: + v5 = jump_table_base.i64 jt0 + v6 = jump_table_entry.i64 v1, v5, 4, jt0 + v7 = iadd v5, v6 + indirect_jump_table_br v7, jt0 +; check: block2: +; nextln: v8 = ifcmp_imm.i64 v1, 2 +; nextln: brif uge v8, block1 +; nextln: jump block3 +; check: block3: +; nextln: jump_table_entry.i64 +} diff --git a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif new file mode 100644 index 0000000000..f663646b9e --- /dev/null +++ b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif @@ -0,0 +1,54 @@ +test licm + +target x86_64 + +;; Nontrapping readonly load from address that is not loop-dependent +;; should be hoisted out of loop. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i32, v1: i64): + jump block1(v0, v1) + +block1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 notrap aligned readonly v5 + v7 = iadd v2, v6 + brz v2, block3(v2) + jump block2 + +block2: + v8 = isub v2, v4 + jump block1(v8, v3) + +block3(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: block0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: v6 = load.i32 notrap aligned readonly v5 +; nextln: jump block1(v0, v1) +; nextln: +; nextln: block1(v2: i32, v3: i64): +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, block3(v2) +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: v8 = isub.i32 v2, v4 +; nextln: jump block1(v8, v3) +; nextln: +; nextln: block3(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/multiple-blocks.clif b/cranelift/filetests/filetests/licm/multiple-blocks.clif new file mode 100644 index 0000000000..04cfb9d621 --- /dev/null +++ b/cranelift/filetests/filetests/licm/multiple-blocks.clif @@ -0,0 +1,59 @@ +test licm +target riscv32 + +function %multiple_blocks(i32) -> i32 { + +block0(v0: i32): + jump block1(v0) + +block1(v10: i32): + v11 = iconst.i32 1 + v12 = iconst.i32 2 + v13 = iadd v11, v12 + brz v10, block4(v10) + jump block2 + +block2: + v15 = isub v10, v11 + brz v15, block5(v15) + jump block3 + +block3: + v14 = isub v10, v11 + jump block1(v14) + +block4(v20: i32): + return v20 + +block5(v30: i32): + v31 = iadd v11, v13 + jump block1(v30) + +} +; sameln:function %multiple_blocks(i32) -> i32 { +; nextln: block0(v0: i32): +; nextln: v11 = iconst.i32 1 +; nextln: v12 = iconst.i32 2 +; nextln: v13 = iadd v11, v12 +; nextln: v31 = iadd v11, v13 +; nextln: jump block1(v0) +; nextln: +; nextln: block1(v10: i32): +; nextln: brz v10, block4(v10) +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: v15 = isub.i32 v10, v11 +; nextln: brz v15, block5(v15) +; nextln: jump block3 +; nextln: +; nextln: block3: +; nextln: v14 = isub.i32 v10, v11 +; nextln: jump block1(v14) +; nextln: +; nextln: block4(v20: i32): +; nextln: return v20 +; nextln: +; nextln: block5(v30: i32): +; nextln: jump block1(v30) +; nextln: } diff --git a/cranelift/filetests/filetests/licm/nested_loops.clif b/cranelift/filetests/filetests/licm/nested_loops.clif new file mode 100644 index 0000000000..7f9cb928db --- /dev/null +++ b/cranelift/filetests/filetests/licm/nested_loops.clif @@ -0,0 +1,62 @@ +test licm +target riscv32 + +function %nested_loops(i32) -> i32 { + +block0(v0: i32): + jump block1(v0) + +block1(v1: i32): + v2 = iconst.i32 1 + v3 = iconst.i32 2 + v4 = iadd v2, v3 + v5 = isub v1, v2 + jump block2(v5, v5) + +block2(v10: i32, v11: i32): + brz v11, block4(v10) + jump block3 + +block3: + v12 = iconst.i32 1 + v15 = iadd v12, v5 + v13 = isub v11, v12 + jump block2(v10,v13) + +block4(v20: i32): + brz v20, block5(v20) + jump block1(v20) + +block5(v30: i32): + return v30 + +} + +; sameln:function %nested_loops(i32) -> i32 { +; nextln: block0(v0: i32): +; nextln: v2 = iconst.i32 1 +; nextln: v3 = iconst.i32 2 +; nextln: v4 = iadd v2, v3 +; nextln: v12 = iconst.i32 1 +; nextln: jump block1(v0) +; nextln: +; nextln: block1(v1: i32): +; nextln: v5 = isub v1, v2 +; nextln: v15 = iadd.i32 v12, v5 +; nextln: jump block2(v5, v5) +; nextln: +; nextln: block2(v10: i32, v11: i32): +; nextln: brz v11, block4(v10) +; nextln: jump block3 +; nextln: +; nextln: block3: +; nextln: v13 = isub.i32 v11, v12 +; nextln: jump block2(v10, v13) +; nextln: +; nextln: block4(v20: i32): +; nextln: brz v20, block5(v20) +; nextln: jump block1(v20) +; nextln: +; nextln: block5(v30: i32): +; nextln: return v30 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject.clif b/cranelift/filetests/filetests/licm/reject.clif new file mode 100644 index 0000000000..eab03760b6 --- /dev/null +++ b/cranelift/filetests/filetests/licm/reject.clif @@ -0,0 +1,94 @@ +test licm +target riscv32 + +function %other_side_effects(i32) -> i32 { + +block0(v0: i32): + jump block1(v0) + +block1(v1: i32): + regmove.i32 v0, %x10 -> %x20 +; check: block1(v1: i32): +; check: regmove.i32 v0, %x10 -> %x20 + v2 = iconst.i32 1 + brz v1, block3(v1) + jump block2 + +block2: + v5 = isub v1, v2 + jump block1(v5) + +block3(v6: i32): + return v6 + +} + +function %cpu_flags(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + jump block1(v0, v1) + +block1(v2: i32, v3: i32): + v4 = ifcmp.i32 v0, v1 + v5 = selectif.i32 eq v4, v2, v3 +; check: block1(v2: i32, v3: i32): +; check: ifcmp.i32 v0, v1 +; check: v5 = selectif.i32 eq v4, v2, v3 + v8 = iconst.i32 1 + brz v1, block3(v1) + jump block2 + +block2: + v9 = isub v1, v8 + v10 = iadd v1, v8 + jump block1(v9, v10) + +block3(v6: i32): + return v6 +} + +function %spill(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = spill.i32 v0 + jump block1(v0, v1) + +block1(v3: i32, v4: i32): + v5 = spill.i32 v1 + v6 = fill.i32 v2 + v7 = fill.i32 v5 +; check: block1(v3: i32, v4: i32): +; check: v5 = spill.i32 v1 +; check: v6 = fill.i32 v2 +; check: v7 = fill v5 + brz v1, block3(v1) + jump block2 + +block2: + v9 = isub v1, v4 + jump block1(v9, v3) + +block3(v10: i32): + return v10 +} + +function %non_invariant_aliases(i32) -> i32 { + +block0(v0: i32): + jump block1(v0) + +block1(v1: i32): + v8 -> v1 + v9 -> v1 + v2 = iadd v8, v9 +; check: block1(v1: i32): +; check: v2 = iadd v8, v9 + brz v1, block3(v1) + jump block2 + +block2: + v5 = isub v1, v2 + jump block1(v5) + +block3(v6: i32): + return v6 + +} diff --git a/cranelift/filetests/filetests/licm/reject_load_notrap.clif b/cranelift/filetests/filetests/licm/reject_load_notrap.clif new file mode 100644 index 0000000000..58f046357d --- /dev/null +++ b/cranelift/filetests/filetests/licm/reject_load_notrap.clif @@ -0,0 +1,55 @@ +test licm + +target x86_64 + +;; Nontrapping possibly-not-readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i32, v1: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + jump block1(v0, v1) + +block1(v2: i32, v3: i64): + v6 = load.i32 notrap aligned v5 + v7 = iadd v2, v6 + brz v2, block3(v2) + jump block2 + +block2: + v8 = isub v2, v4 + jump block1(v8, v3) + +block3(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: block0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump block1(v0, v1) +; nextln: +; nextln: block1(v2: i32, v3: i64): +; nextln: v6 = load.i32 notrap aligned v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, block3(v2) +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: v8 = isub.i32 v2, v4 +; nextln: jump block1(v8, v3) +; nextln: +; nextln: block3(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject_load_readonly.clif b/cranelift/filetests/filetests/licm/reject_load_readonly.clif new file mode 100644 index 0000000000..f794bad6b0 --- /dev/null +++ b/cranelift/filetests/filetests/licm/reject_load_readonly.clif @@ -0,0 +1,55 @@ +test licm + +target x86_64 + +;; Maybe-trapping readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be hoisted. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i32, v1: i64): + jump block1(v0, v1) + +block1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 aligned readonly v5 + v7 = iadd v2, v6 + brz v2, block3(v2) + jump block2 + +block2: + v8 = isub v2, v4 + jump block1(v8, v3) + +block3(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: block0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump block1(v0, v1) +; nextln: +; nextln: block1(v2: i32, v3: i64): +; nextln: v6 = load.i32 aligned readonly v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, block3(v2) +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: v8 = isub.i32 v2, v4 +; nextln: jump block1(v8, v3) +; nextln: +; nextln: block3(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/cranelift/filetests/filetests/parser/alias.clif b/cranelift/filetests/filetests/parser/alias.clif new file mode 100644 index 0000000000..6197ae35d1 --- /dev/null +++ b/cranelift/filetests/filetests/parser/alias.clif @@ -0,0 +1,32 @@ +test cat +test verifier + +function %basic(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 -> v0 + v3 -> v1 + v4 = iadd.i32 v2, v3 + return v4 +} + +function %transitive() -> i32 { +block0: + v0 = iconst.i32 0 + v1 -> v0 + v2 -> v1 + v3 -> v2 + v4 -> v3 + return v4 +} + +function %duplicate(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 -> v0 + v2 -> v0 + v2 -> v0 + v3 -> v1 + v3 -> v1 + v3 -> v1 + v4 = iadd.i32 v2, v3 + return v4 +} diff --git a/cranelift/filetests/filetests/parser/branch.clif b/cranelift/filetests/filetests/parser/branch.clif new file mode 100644 index 0000000000..c9a71312d9 --- /dev/null +++ b/cranelift/filetests/filetests/parser/branch.clif @@ -0,0 +1,116 @@ +; Parsing branches and jumps. +test cat + +; Jumps with no arguments. The '()' empty argument list is optional. +function %minimal() { +block0: + jump block1 + +block1: + jump block0() +} +; sameln: function %minimal() fast { +; nextln: block0: +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: jump block0 +; nextln: } + +; Jumps with 1 arg. +function %onearg(i32) { +block0(v90: i32): + jump block1(v90) + +block1(v91: i32): + jump block0(v91) +} +; sameln: function %onearg(i32) fast { +; nextln: block0(v90: i32): +; nextln: jump block1(v90) +; nextln: +; nextln: block1(v91: i32): +; nextln: jump block0(v91) +; nextln: } + +; Jumps with 2 args. +function %twoargs(i32, f32) { +block0(v90: i32, v91: f32): + jump block1(v90, v91) + +block1(v92: i32, v93: f32): + jump block0(v92, v93) +} +; sameln: function %twoargs(i32, f32) fast { +; nextln: block0(v90: i32, v91: f32): +; nextln: jump block1(v90, v91) +; nextln: +; nextln: block1(v92: i32, v93: f32): +; nextln: jump block0(v92, v93) +; nextln: } + +; Branches with no arguments. The '()' empty argument list is optional. +function %minimal(i32) { +block0(v90: i32): + brz v90, block1 + +block1: + brnz v90, block1() +} +; sameln: function %minimal(i32) fast { +; nextln: block0(v90: i32): +; nextln: brz v90, block1 +; nextln: +; nextln: block1: +; nextln: brnz.i32 v90, block1 +; nextln: } + +function %twoargs(i32, f32) { +block0(v90: i32, v91: f32): + brz v90, block1(v90, v91) + +block1(v92: i32, v93: f32): + brnz v90, block0(v92, v93) +} +; sameln: function %twoargs(i32, f32) fast { +; nextln: block0(v90: i32, v91: f32): +; nextln: brz v90, block1(v90, v91) +; nextln: +; nextln: block1(v92: i32, v93: f32): +; nextln: brnz.i32 v90, block0(v92, v93) +; nextln: } + +function %jumptable(i32) { + jt200 = jump_table [] + jt2 = jump_table [block10, block40, block20, block30] + +block10(v3: i32): + br_table v3, block50, jt2 + +block20: + trap user2 +block30: + trap user3 +block40: + trap user4 +block50: + trap user1 +} +; sameln: function %jumptable(i32) fast { +; check: jt2 = jump_table [block10, block40, block20, block30] +; check: jt200 = jump_table [] +; check: block10(v3: i32): +; nextln: br_table v3, block50, jt2 +; nextln: +; nextln: block20: +; nextln: trap user2 +; nextln: +; nextln: block30: +; nextln: trap user3 +; nextln: +; nextln: block40: +; nextln: trap user4 +; nextln: +; nextln: block50: +; nextln: trap user1 +; nextln: } diff --git a/cranelift/filetests/filetests/parser/call.clif b/cranelift/filetests/filetests/parser/call.clif new file mode 100644 index 0000000000..35e43822d1 --- /dev/null +++ b/cranelift/filetests/filetests/parser/call.clif @@ -0,0 +1,94 @@ +; Parser tests for call and return syntax. +test cat + +function %mini() { +block1: + return +} +; sameln: function %mini() fast { +; nextln: block1: +; nextln: return +; nextln: } + +function %r1() -> i32, f32 baldrdash_system_v { +block1: + v1 = iconst.i32 3 + v2 = f32const 0.0 + return v1, v2 +} +; sameln: function %r1() -> i32, f32 baldrdash_system_v { +; nextln: block1: +; nextln: v1 = iconst.i32 3 +; nextln: v2 = f32const 0.0 +; nextln: return v1, v2 +; nextln: } + +function %signatures() { + sig10 = () + sig11 = (i32, f64) -> i32, b1 baldrdash_system_v + fn5 = %foo sig11 + fn8 = %bar(i32) -> b1 +} +; sameln: function %signatures() fast { +; check: sig10 = () fast +; check: sig11 = (i32, f64) -> i32, b1 baldrdash_system_v +; check: sig12 = (i32) -> b1 fast +; not: fn0 +; check: fn5 = %foo sig11 +; check: fn8 = %bar sig12 +; check: } + +function %direct() { + fn0 = %none() + fn1 = %one() -> i32 + fn2 = %two() -> i32, f32 + +block0: + call fn0() + v1 = call fn1() + v2, v3 = call fn2() + return +} +; check: call fn0() +; check: v1 = call fn1() +; check: v2, v3 = call fn2() +; check: return + +function %indirect(i64) { + sig0 = (i64) + sig1 = () -> i32 + sig2 = () -> i32, f32 + +block0(v0: i64): + v1 = call_indirect sig1, v0() + call_indirect sig0, v1(v0) + v3, v4 = call_indirect sig2, v1() + return +} +; check: v1 = call_indirect sig1, v0() +; check: call_indirect sig0, v1(v0) +; check: v3, v4 = call_indirect sig2, v1() +; check: return + +function %long_call() { + sig0 = () + fn0 = %none sig0 + +block0: + v0 = func_addr.i32 fn0 + call_indirect sig0, v0() + return +} +; check: v0 = func_addr.i32 fn0 +; check: call_indirect sig0, v0() +; check: return + +; Special purpose function arguments +function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret { +block0(v1: i32, v2: i32, v3: i32, v4: i32): + return v4, v2, v3, v1 +} +; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret fast { +; check: block0(v1: i32, v2: i32, v3: i32, v4: i32): +; check: return v4, v2, v3, v1 +; check: } diff --git a/cranelift/filetests/filetests/parser/flags.clif b/cranelift/filetests/filetests/parser/flags.clif new file mode 100644 index 0000000000..c8d6e78912 --- /dev/null +++ b/cranelift/filetests/filetests/parser/flags.clif @@ -0,0 +1,64 @@ +test cat +test verifier + +function %iflags(i32) { +block200(v0: i32): + v1 = ifcmp_imm v0, 17 + brif eq v1, block201 + jump block400 + +block400: + brif ugt v1, block202 + jump block401 + +block401: + v2 = iconst.i32 34 + v3 = ifcmp v0, v2 + v4 = trueif eq v3 + brnz v4, block202 + jump block402 + +block402: + return + +block201: + return + +block202: + trap oob +} +; check: v1 = ifcmp_imm v0, 17 +; check: brif eq v1, block201 +; check: brif ugt v1, block202 +; check: v3 = ifcmp.i32 v0, v2 +; check: v4 = trueif eq v3 + +function %fflags(f32) { +block200(v0: f32): + v1 = f32const 0x34.0p0 + v2 = ffcmp v0, v1 + brff eq v2, block201 + jump block400 + +block400: + brff ord v2, block202 + jump block401 + +block401: + v3 = trueff gt v2 + brnz v3, block202 + jump block402 + +block402: + return + +block201: + return + +block202: + trap oob +} +; check: v2 = ffcmp v0, v1 +; check: brff eq v2, block201 +; check: brff ord v2, block202 +; check: v3 = trueff gt v2 diff --git a/cranelift/filetests/filetests/parser/instruction_encoding.clif b/cranelift/filetests/filetests/parser/instruction_encoding.clif new file mode 100644 index 0000000000..5386808482 --- /dev/null +++ b/cranelift/filetests/filetests/parser/instruction_encoding.clif @@ -0,0 +1,24 @@ +test cat + +target riscv32 + +; regex: WS=[ \t]* + +function %foo(i32, i32) { +block1(v0: i32 [%x8], v1: i32): + [-,-] v2 = iadd v0, v1 + [-] trap heap_oob + [R#1234, %x5, %x11] v6, v7 = iadd_ifcout v2, v0 + [Rshamt#beef, %x25] v8 = ishl_imm v6, 2 +@55 v9 = iadd v8, v7 +@a5 [Iret#5] return v0, v8 +} +; sameln: function %foo(i32, i32) fast { +; nextln: block1(v0: i32 [%x8], v1: i32): +; nextln: [-,-]$WS v2 = iadd v0, v1 +; nextln: [-]$WS trap heap_oob +; nextln: [R#1234,%x5,%x11]$WS v6, v7 = iadd_ifcout v2, v0 +; nextln: [Rshamt#beef,%x25]$WS v8 = ishl_imm v6, 2 +; nextln: @0055 [-,-]$WS v9 = iadd v8, v7 +; nextln: @00a5 [Iret#05]$WS return v0, v8 +; nextln: } diff --git a/cranelift/filetests/filetests/parser/keywords.clif b/cranelift/filetests/filetests/parser/keywords.clif new file mode 100644 index 0000000000..e079892be7 --- /dev/null +++ b/cranelift/filetests/filetests/parser/keywords.clif @@ -0,0 +1,5 @@ +test cat + +; 'function' is not a keyword, and can be used as the name of a function too. +function %function() {} +; check: function %function() fast diff --git a/cranelift/filetests/filetests/parser/memory.clif b/cranelift/filetests/filetests/parser/memory.clif new file mode 100644 index 0000000000..ecf872d64f --- /dev/null +++ b/cranelift/filetests/filetests/parser/memory.clif @@ -0,0 +1,82 @@ +test cat +test verifier + +function %vmglobal(i64 vmctx) -> i32 { + gv3 = vmctx + ; check: gv3 = vmctx +block0(v0: i64): + v1 = global_value.i32 gv3 + ; check: v1 = global_value.i32 gv3 + return v1 +} + +function %load_and_add_imm(i64 vmctx) -> i32 { + gv2 = vmctx + gv3 = load.i32 notrap aligned gv2-72 + gv4 = iadd_imm.i32 gv3, -32 + ; check: gv2 = vmctx + ; check: gv3 = load.i32 notrap aligned gv2-72 + ; check: gv4 = iadd_imm.i32 gv3, -32 +block0(v0: i64): + v1 = global_value.i32 gv4 + ; check: v1 = global_value.i32 gv4 + return v1 +} + +; Refer to a global value before it's been declared. +function %backref(i64 vmctx) -> i32 { + gv0 = iadd_imm.i32 gv1, -32 + ; check: gv0 = iadd_imm.i32 gv1, -32 + gv1 = load.i32 notrap aligned gv2 + ; check: gv1 = load.i32 notrap aligned gv2 + gv2 = vmctx + ; check: gv2 = vmctx +block0(v0: i64): + v1 = global_value.i32 gv1 + return v1 +} + +function %symbol() -> i32 { + gv0 = symbol %something + ; check: gv0 = symbol %something + gv1 = symbol u8:9 + ; check: gv1 = symbol u8:9 +block0: + v0 = global_value.i32 gv0 + ; check: v0 = global_value.i32 gv0 + v1 = global_value.i32 gv1 + ; check: v1 = global_value.i32 gv1 + v2 = bxor v0, v1 + return v2 +} + +; Declare static heaps. +function %sheap(i32, i64 vmctx) -> i64 { + heap1 = static gv5, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000 + heap2 = static gv5, offset_guard 0x1000, bound 0x1_0000 + gv4 = vmctx + gv5 = iadd_imm.i64 gv4, 64 + + ; check: heap1 = static gv5, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + ; check: heap2 = static gv5, min 0, bound 0x0001_0000, offset_guard 4096 +block0(v1: i32, v2: i64): + v3 = heap_addr.i64 heap1, v1, 0 + ; check: v3 = heap_addr.i64 heap1, v1, 0 + return v3 +} + +; Declare dynamic heaps. +function %dheap(i32, i64 vmctx) -> i64 { + heap1 = dynamic gv5, min 0x1_0000, bound gv6, offset_guard 0x8000_0000 + heap2 = dynamic gv5, bound gv6, offset_guard 0x1000 + gv4 = vmctx + gv5 = iadd_imm.i64 gv4, 64 + gv6 = iadd_imm.i64 gv4, 72 + + ; check: heap1 = dynamic gv5, min 0x0001_0000, bound gv6, offset_guard 0x8000_0000 + ; check: heap2 = dynamic gv5, min 0, bound gv6, offset_guard 4096 +block0(v1: i32, v2: i64): + v3 = heap_addr.i64 heap2, v1, 0 + ; check: v3 = heap_addr.i64 heap2, v1, 0 + return v3 +} diff --git a/cranelift/filetests/filetests/parser/rewrite.clif b/cranelift/filetests/filetests/parser/rewrite.clif new file mode 100644 index 0000000000..a0520d25f5 --- /dev/null +++ b/cranelift/filetests/filetests/parser/rewrite.clif @@ -0,0 +1,31 @@ +; It is possible to refer to instructions and blocks that have not yet been +; defined in the lexical order. +test cat + +; Defining numbers. +function %defs() { +block100(v20: i32): + v1000 = iconst.i32x8 5 + v9200 = f64const 0x4.0p0 + trap user4 +} +; sameln: function %defs() fast { +; nextln: block100(v20: i32): +; nextln: v1000 = iconst.i32x8 5 +; nextln: v9200 = f64const 0x1.0000000000000p2 +; nextln: trap user4 +; nextln: } + +; Using values. +function %use_value() { +block100(v20: i32): + v1000 = iadd_imm v20, 5 + v200 = iadd v20, v1000 + jump block100(v1000) +} +; sameln: function %use_value() fast { +; nextln: block100(v20: i32): +; nextln: v1000 = iadd_imm v20, 5 +; nextln: v200 = iadd v20, v1000 +; nextln: jump block100(v1000) +; nextln: } diff --git a/cranelift/filetests/filetests/parser/ternary.clif b/cranelift/filetests/filetests/parser/ternary.clif new file mode 100644 index 0000000000..b148850198 --- /dev/null +++ b/cranelift/filetests/filetests/parser/ternary.clif @@ -0,0 +1,24 @@ +test cat +test verifier + +function %add_i96(i32, i32, i32, i32, i32, i32) -> i32, i32, i32 { +block1(v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): + v10, v11 = iadd_ifcout v1, v4 + ;check: v10, v11 = iadd_ifcout v1, v4 + v20, v21 = iadd_ifcarry v2, v5, v11 + ; check: v20, v21 = iadd_ifcarry v2, v5, v11 + v30 = iadd_ifcin v3, v6, v21 + ; check: v30 = iadd_ifcin v3, v6, v21 + return v10, v20, v30 +} + +function %sub_i96(i32, i32, i32, i32, i32, i32) -> i32, i32, i32 { +block1(v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): + v10, v11 = isub_ifbout v1, v4 + ;check: v10, v11 = isub_ifbout v1, v4 + v20, v21 = isub_ifborrow v2, v5, v11 + ; check: v20, v21 = isub_ifborrow v2, v5, v11 + v30 = isub_ifbin v3, v6, v21 + ; check: v30 = isub_ifbin v3, v6, v21 + return v10, v20, v30 +} diff --git a/cranelift/filetests/filetests/parser/tiny.clif b/cranelift/filetests/filetests/parser/tiny.clif new file mode 100644 index 0000000000..98f477f808 --- /dev/null +++ b/cranelift/filetests/filetests/parser/tiny.clif @@ -0,0 +1,241 @@ +test cat + +; The smallest possible function. +function %minimal() { +block0: + trap user0 +} +; sameln: function %minimal() fast { +; nextln: block0: +; nextln: trap user0 +; nextln: } + +; Create and use values. +; Polymorphic instructions with type suffix. +function %ivalues() { +block0: + v0 = iconst.i32 2 + v1 = iconst.i8 6 + v2 = ishl v0, v1 +} +; sameln: function %ivalues() fast { +; nextln: block0: +; nextln: v0 = iconst.i32 2 +; nextln: v1 = iconst.i8 6 +; nextln: v2 = ishl v0, v1 +; nextln: } + +; Create and use values. +; Polymorphic instructions with type suffix. +function %bvalues() { +block0: + v0 = bconst.b32 true + v1 = bconst.b8 false + v2 = bextend.b32 v1 + v3 = bxor v0, v2 +} +; sameln: function %bvalues() fast { +; nextln: block0: +; nextln: v0 = bconst.b32 true +; nextln: v1 = bconst.b8 false +; nextln: v2 = bextend.b32 v1 +; nextln: v3 = bxor v0, v2 +; nextln: } + +; Polymorphic instruction controlled by second operand. +function %select() { +block0(v90: i32, v91: i32, v92: b1): + v0 = select v92, v90, v91 +} +; sameln: function %select() fast { +; nextln: block0(v90: i32, v91: i32, v92: b1): +; nextln: v0 = select v92, v90, v91 +; nextln: } + +; Polymorphic instruction controlled by third operand. +function %selectif() system_v { +block0(v95: i32, v96: i32, v97: b1): + v98 = selectif.i32 eq v97, v95, v96 +} +; sameln: function %selectif() system_v { +; nextln: block0(v95: i32, v96: i32, v97: b1): +; nextln: v98 = selectif.i32 eq v97, v95, v96 +; nextln: } + +; Lane indexes. +function %lanes() { +block0: + v0 = iconst.i32x4 2 + v1 = extractlane v0, 3 + v2 = insertlane v0, 1, v1 +} +; sameln: function %lanes() fast { +; nextln: block0: +; nextln: v0 = iconst.i32x4 2 +; nextln: v1 = extractlane v0, 3 +; nextln: v2 = insertlane v0, 1, v1 +; nextln: } + +; Integer condition codes. +function %icmp(i32, i32) { +block0(v90: i32, v91: i32): + v0 = icmp eq v90, v91 + v1 = icmp ult v90, v91 + v2 = icmp_imm sge v90, -12 + v3 = irsub_imm v91, 45 + br_icmp eq v90, v91, block0(v91, v90) +} +; sameln: function %icmp(i32, i32) fast { +; nextln: block0(v90: i32, v91: i32): +; nextln: v0 = icmp eq v90, v91 +; nextln: v1 = icmp ult v90, v91 +; nextln: v2 = icmp_imm sge v90, -12 +; nextln: v3 = irsub_imm v91, 45 +; nextln: br_icmp eq v90, v91, block0(v91, v90) +; nextln: } + +; Floating condition codes. +function %fcmp(f32, f32) { +block0(v90: f32, v91: f32): + v0 = fcmp eq v90, v91 + v1 = fcmp uno v90, v91 + v2 = fcmp lt v90, v91 +} +; sameln: function %fcmp(f32, f32) fast { +; nextln: block0(v90: f32, v91: f32): +; nextln: v0 = fcmp eq v90, v91 +; nextln: v1 = fcmp uno v90, v91 +; nextln: v2 = fcmp lt v90, v91 +; nextln: } + +; The bitcast instruction has two type variables: The controlling type variable +; controls the outout type, and the input type is a free variable. +function %bitcast(i32, f32) { +block0(v90: i32, v91: f32): + v0 = bitcast.i8x4 v90 + v1 = bitcast.i32 v91 +} +; sameln: function %bitcast(i32, f32) fast { +; nextln: block0(v90: i32, v91: f32): +; nextln: v0 = bitcast.i8x4 v90 +; nextln: v1 = bitcast.i32 v91 +; nextln: } + +; Stack slot references +function %stack() { + ss10 = spill_slot 8 + ss2 = explicit_slot 4 + ss3 = incoming_arg 4, offset 8 + ss4 = outgoing_arg 4 + ss5 = emergency_slot 4 + +block0: + v1 = stack_load.i32 ss10 + v2 = stack_load.i32 ss10+4 + stack_store v1, ss10+2 + stack_store v2, ss2 +} +; sameln: function %stack() fast { +; check: ss2 = explicit_slot 4 +; check: ss3 = incoming_arg 4, offset 8 +; check: ss4 = outgoing_arg 4 +; check: ss5 = emergency_slot 4 +; check: ss10 = spill_slot 8 + +; check: block0: +; nextln: v1 = stack_load.i32 ss10 +; nextln: v2 = stack_load.i32 ss10+4 +; nextln: stack_store v1, ss10+2 +; nextln: stack_store v2, ss2 + +; Memory access instructions. +function %memory(i32) { +block0(v1: i32): + v2 = load.i64 v1 + v3 = load.i64 aligned v1 + v4 = load.i64 notrap v1 + v5 = load.i64 notrap aligned v1 + v6 = load.i64 aligned notrap v1 + v7 = load.i64 v1-12 + v8 = load.i64 notrap v1+0x1_0000 + v9 = load_complex.i64 v1+v2 + v10 = load_complex.i64 v1+v2+0x1 + store v2, v1 + store aligned v3, v1+12 + store notrap aligned v3, v1-12 + store_complex v3, v1+v2 + store_complex v3, v1+v2+0x1 +} +; sameln: function %memory(i32) fast { +; nextln: block0(v1: i32): +; nextln: v2 = load.i64 v1 +; nextln: v3 = load.i64 aligned v1 +; nextln: v4 = load.i64 notrap v1 +; nextln: v5 = load.i64 notrap aligned v1 +; nextln: v6 = load.i64 notrap aligned v1 +; nextln: v7 = load.i64 v1-12 +; nextln: v8 = load.i64 notrap v1+0x0001_0000 +; nextln: v9 = load_complex.i64 v1+v2 +; nextln: v10 = load_complex.i64 v1+v2+1 +; nextln: store v2, v1 +; nextln: store aligned v3, v1+12 +; nextln: store notrap aligned v3, v1-12 +; nextln: store_complex v3, v1+v2 +; nextln: store_complex v3, v1+v2+1 + +; Register diversions. +; This test file has no ISA, so we can unly use register unit numbers. +function %diversion(i32) { + ss0 = spill_slot 4 + +block0(v1: i32): + regmove v1, %10 -> %20 + regmove v1, %20 -> %10 + regspill v1, %10 -> ss0 + regfill v1, ss0 -> %10 + return +} +; sameln: function %diversion(i32) fast { +; nextln: ss0 = spill_slot 4 +; check: block0(v1: i32): +; nextln: regmove v1, %10 -> %20 +; nextln: regmove v1, %20 -> %10 +; nextln: regspill v1, %10 -> ss0 +; nextln: regfill v1, ss0 -> %10 +; nextln: return +; nextln: } + +; Register copies. +function %copy_special() { +block0: + copy_special %10 -> %20 + copy_special %20 -> %10 + return +} +; sameln: function %copy_special() fast { +; nextln: block0: +; nextln: copy_special %10 -> %20 +; nextln: copy_special %20 -> %10 +; nextln: return +; nextln: } + +function %cond_traps(i32) { +block0(v0: i32): + trapz v0, stk_ovf + v1 = ifcmp_imm v0, 5 + trapif ugt v1, oob + v2 = bitcast.f32 v1 + v3 = ffcmp v2, v2 + trapff uno v3, int_ovf + return +} +; sameln: function %cond_traps(i32) +; nextln: block0(v0: i32): +; nextln: trapz v0, stk_ovf +; nextln: v1 = ifcmp_imm v0, 5 +; nextln: trapif ugt v1, oob +; nextln: v2 = bitcast.f32 v1 +; nextln: v3 = ffcmp v2, v2 +; nextln: trapff uno v3, int_ovf +; nextln: return +; nextln: } diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif new file mode 100644 index 0000000000..4fb9e9664c --- /dev/null +++ b/cranelift/filetests/filetests/postopt/basic.clif @@ -0,0 +1,124 @@ +test postopt +target i686 + +; Test that compare+branch sequences are folded effectively on x86. + +function %br_icmp(i32, i32) -> i32 { +block0(v0: i32, v1: i32): +[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 +[Op1t8jccd_long#85] brnz v2, block1 +[Op1jmpb#eb] jump block2 + +block2: +[Op1ret#c3] return v1 + +block1: +[Op1pu_id#b8,%rax] v8 = iconst.i32 3 +[Op1ret#c3] return v8 +} +; sameln: function %br_icmp +; nextln: block0(v0: i32, v1: i32): +; nextln: v9 = ifcmp v0, v1 +; nextln: v2 = trueif slt v9 +; nextln: brif slt v9, block1 +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: return v1 +; nextln: +; nextln: block1: +; nextln: v8 = iconst.i32 3 +; nextln: return v8 +; nextln: } + +; Use brz instead of brnz, so the condition is inverted. + +function %br_icmp_inverse(i32, i32) -> i32 { +block0(v0: i32, v1: i32): +[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 +[Op1t8jccd_long#84] brz v2, block1 +[Op1jmpb#eb] jump block2 + +block2: +[Op1ret#c3] return v1 + +block1: +[Op1pu_id#b8,%rax] v8 = iconst.i32 3 +[Op1ret#c3] return v8 +} +; sameln: function %br_icmp_inverse +; nextln: block0(v0: i32, v1: i32): +; nextln: v9 = ifcmp v0, v1 +; nextln: v2 = trueif slt v9 +; nextln: brif sge v9, block1 +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: return v1 +; nextln: +; nextln: block1: +; nextln: v8 = iconst.i32 3 +; nextln: return v8 +; nextln: } + +; Use icmp_imm instead of icmp. + +function %br_icmp_imm(i32, i32) -> i32 { +block0(v0: i32, v1: i32): +[DynRexOp1icscc_ib#7083] v2 = icmp_imm slt v0, 2 +[Op1t8jccd_long#84] brz v2, block1 +[Op1jmpb#eb] jump block2 + +block2: +[Op1ret#c3] return v1 + +block1: +[Op1pu_id#b8,%rax] v8 = iconst.i32 3 +[Op1ret#c3] return v8 +} +; sameln: function %br_icmp_imm +; nextln: block0(v0: i32, v1: i32): +; nextln: v9 = ifcmp_imm v0, 2 +; nextln: v2 = trueif slt v9 +; nextln: brif sge v9, block1 +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: return v1 +; nextln: +; nextln: block1: +; nextln: v8 = iconst.i32 3 +; nextln: return v8 +; nextln: } + +; Use fcmp instead of icmp. + +function %br_fcmp(f32, f32) -> f32 { +block0(v0: f32, v1: f32): +[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1 +[Op1t8jccd_long#84] brz v2, block1 +[Op1jmpb#eb] jump block2 + +block2: +[Op1ret#c3] return v1 + +block1: +[Op1pu_id#b8,%rax] v18 = iconst.i32 0x40a8_0000 +[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18 +[Op1ret#c3] return v8 +} +; sameln: function %br_fcmp +; nextln: block0(v0: f32, v1: f32): +; nextln: v19 = ffcmp v0, v1 +; nextln: v2 = trueff gt v19 +; nextln: brff ule v19, block1 +; nextln: jump block2 +; nextln: +; nextln: block2: +; nextln: return v1 +; nextln: +; nextln: block1: +; nextln: v18 = iconst.i32 0x40a8_0000 +; nextln: v8 = bitcast.f32 v18 +; nextln: return v8 +; nextln: } diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif new file mode 100644 index 0000000000..43206f431c --- /dev/null +++ b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif @@ -0,0 +1,94 @@ +test postopt +target x86_64 + +function %dual_loads(i64, i64) -> i64 { +block0(v0: i64, v1: i64): +[DynRexOp1rr#8001] v3 = iadd v0, v1 + v4 = load.i64 v3 + v5 = uload8.i64 v3 + v6 = sload8.i64 v3 + v7 = uload16.i64 v3 + v8 = sload16.i64 v3 + v9 = uload32.i64 v3 + v10 = sload32.i64 v3 +[Op1ret#c3] return v10 +} + +; sameln: function %dual_loads +; nextln: block0(v0: i64, v1: i64): +; nextln: v3 = iadd v0, v1 +; nextln: v4 = load_complex.i64 v0+v1 +; nextln: v5 = uload8_complex.i64 v0+v1 +; nextln: v6 = sload8_complex.i64 v0+v1 +; nextln: v7 = uload16_complex.i64 v0+v1 +; nextln: v8 = sload16_complex.i64 v0+v1 +; nextln: v9 = uload32_complex v0+v1 +; nextln: v10 = sload32_complex v0+v1 +; nextln: return v10 +; nextln: } + +function %dual_loads2(i64, i64) -> i64 { +block0(v0: i64, v1: i64): +[DynRexOp1rr#8001] v3 = iadd v0, v1 + v4 = load.i64 v3+1 + v5 = uload8.i64 v3+1 + v6 = sload8.i64 v3+1 + v7 = uload16.i64 v3+1 + v8 = sload16.i64 v3+1 + v9 = uload32.i64 v3+1 + v10 = sload32.i64 v3+1 +[Op1ret#c3] return v10 +} + +; sameln: function %dual_loads2 +; nextln: block0(v0: i64, v1: i64): +; nextln: v3 = iadd v0, v1 +; nextln: v4 = load_complex.i64 v0+v1+1 +; nextln: v5 = uload8_complex.i64 v0+v1+1 +; nextln: v6 = sload8_complex.i64 v0+v1+1 +; nextln: v7 = uload16_complex.i64 v0+v1+1 +; nextln: v8 = sload16_complex.i64 v0+v1+1 +; nextln: v9 = uload32_complex v0+v1+1 +; nextln: v10 = sload32_complex v0+v1+1 +; nextln: return v10 +; nextln: } + +function %dual_stores(i64, i64, i64) { +block0(v0: i64, v1: i64, v2: i64): +[DynRexOp1rr#8001] v3 = iadd v0, v1 +[RexOp1st#8089] store.i64 v2, v3 +[RexOp1st#88] istore8.i64 v2, v3 +[RexMp1st#189] istore16.i64 v2, v3 +[RexOp1st#89] istore32.i64 v2, v3 +[Op1ret#c3] return +} + +; sameln: function %dual_stores +; nextln: block0(v0: i64, v1: i64, v2: i64): +; nextln: v3 = iadd v0, v1 +; nextln: store_complex v2, v0+v1 +; nextln: istore8_complex v2, v0+v1 +; nextln: istore16_complex v2, v0+v1 +; nextln: istore32_complex v2, v0+v1 +; nextln: return +; nextln: } + +function %dual_stores2(i64, i64, i64) { +block0(v0: i64, v1: i64, v2: i64): +[DynRexOp1rr#8001] v3 = iadd v0, v1 +[RexOp1stDisp8#8089] store.i64 v2, v3+1 +[RexOp1stDisp8#88] istore8.i64 v2, v3+1 +[RexMp1stDisp8#189] istore16.i64 v2, v3+1 +[RexOp1stDisp8#89] istore32.i64 v2, v3+1 +[Op1ret#c3] return +} + +; sameln: function %dual_stores2 +; nextln: block0(v0: i64, v1: i64, v2: i64): +; nextln: v3 = iadd v0, v1 +; nextln: store_complex v2, v0+v1+1 +; nextln: istore8_complex v2, v0+v1+1 +; nextln: istore16_complex v2, v0+v1+1 +; nextln: istore32_complex v2, v0+v1+1 +; nextln: return +; nextln: } diff --git a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif new file mode 100644 index 0000000000..1b58caed72 --- /dev/null +++ b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif @@ -0,0 +1,32 @@ +test postopt +target x86_64 + +; Fold the immediate of an iadd_imm into an address offset. + +function u0:0(i64 vmctx) -> i64 { +block0(v0: i64): + v1 = iadd_imm.i64 v0, 16 +[RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v1 +[Op1ret#c3] return v2 +} + +; sameln: function u0:0(i64 vmctx) -> i64 fast { +; nextln: block0(v0: i64): +; nextln: v1 = iadd_imm v0, 16 +; nextln: [RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v0+16 +; nextln: [Op1ret#c3] return v2 +; nextln: } + +function u0:1(i64, i64 vmctx) { +block0(v3: i64, v0: i64): + v1 = iadd_imm.i64 v0, 16 +[RexOp1stDisp8#8089] store.i64 notrap aligned v3, v1 +[Op1ret#c3] return +} + +; sameln: function u0:1(i64, i64 vmctx) fast { +; nextln: block0(v3: i64, v0: i64): +; nextln: v1 = iadd_imm v0, 16 +; nextln: [RexOp1stDisp8#8089] store notrap aligned v3, v0+16 +; nextln: [Op1ret#c3] return +; nextln: } diff --git a/cranelift/filetests/filetests/preopt/branch.clif b/cranelift/filetests/filetests/preopt/branch.clif new file mode 100644 index 0000000000..50274c4890 --- /dev/null +++ b/cranelift/filetests/filetests/preopt/branch.clif @@ -0,0 +1,79 @@ +test preopt +target x86_64 + +function %brz_fold() -> i32 { +block0: + v0 = bconst.b1 false + brz v0, block2 + jump block1 +block1: + v1 = iconst.i32 42 + return v1 +block2: + v2 = iconst.i32 24 + return v2 +} +; sameln: function %brz_fold +; nextln: block0: +; nextln: v0 = bconst.b1 false +; nextln: jump block2 +; nextln: +; nextln: block1: +; nextln: v1 = iconst.i32 42 +; nextln: return v1 +; nextln: +; nextln: block2: +; nextln: v2 = iconst.i32 24 +; nextln: return v2 +; nextln: } + +function %brnz_fold() -> i32 { +block0: + v0 = bconst.b1 true + brnz v0, block2 + jump block1 +block1: + v1 = iconst.i32 42 + return v1 +block2: + v2 = iconst.i32 24 + return v2 +} +; sameln: function %brnz_fold +; nextln: block0: +; nextln: v0 = bconst.b1 true +; nextln: jump block2 +; nextln: +; nextln: block1: +; nextln: v1 = iconst.i32 42 +; nextln: return v1 +; nextln: +; nextln: block2: +; nextln: v2 = iconst.i32 24 +; nextln: return v2 +; nextln: } + +function %brz_fold_param(b1) -> i32 { +block0(v0: b1): + brz v0, block2 + jump block1 +block1: + v1 = iconst.i32 42 + return v1 +block2: + v2 = iconst.i32 24 + return v2 +} +; sameln: function %brz_fold_param(b1) -> i32 fast { +; nextln: block0(v0: b1): +; nextln: brz v0, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v1 = iconst.i32 42 +; nextln: return v1 +; nextln: +; nextln: block2: +; nextln: v2 = iconst.i32 24 +; nextln: return v2 +; nextln: } diff --git a/cranelift/filetests/filetests/preopt/constant_fold.clif b/cranelift/filetests/filetests/preopt/constant_fold.clif new file mode 100644 index 0000000000..e2cc3e4562 --- /dev/null +++ b/cranelift/filetests/filetests/preopt/constant_fold.clif @@ -0,0 +1,19 @@ +test preopt +target x86_64 + +function %constant_fold(f64) -> f64 { +block0(v0: f64): + v1 = f64const 0x1.0000000000000p0 + v2 = f64const 0x1.0000000000000p1 + v3 = fadd v1, v2 + v4 = fadd v3, v0 + return v4 +} +; sameln: function %constant_fold(f64) -> f64 fast { +; nextln: block0(v0: f64): +; nextln: v1 = f64const 0x1.0000000000000p0 +; nextln: v2 = f64const 0x1.0000000000000p1 +; nextln: v3 = f64const 0x1.8000000000000p1 +; nextln: v4 = fadd v3, v0 +; nextln: return v4 +; nextln: } diff --git a/cranelift/filetests/filetests/preopt/numerical.clif b/cranelift/filetests/filetests/preopt/numerical.clif new file mode 100644 index 0000000000..044a3df6a0 --- /dev/null +++ b/cranelift/filetests/filetests/preopt/numerical.clif @@ -0,0 +1,36 @@ +test preopt +target x86_64 + +function %iadd_fold() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = iconst.i32 5 + v2 = iadd v0, v1 + v3 = iconst.i32 8 + v4 = iadd v2, v3 + return v4 +} +; sameln: function %iadd_fold +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = iconst.i32 5 +; nextln: v2 = iconst.i32 42 +; nextln: v3 = iconst.i32 8 +; nextln: v4 = iconst.i32 50 +; nextln: return v4 +; nextln: } + +function %isub_fold() -> i32 { +block0: + v0 = iconst.i32 42 + v1 = iconst.i32 1 + v2 = isub v0, v1 + return v2 +} +; sameln: function %isub_fold +; nextln: block0: +; nextln: v0 = iconst.i32 42 +; nextln: v1 = iconst.i32 1 +; nextln: v2 = iconst.i32 41 +; nextln: return v2 +; nextln: } \ No newline at end of file diff --git a/cranelift/filetests/filetests/regalloc/aliases.clif b/cranelift/filetests/filetests/regalloc/aliases.clif new file mode 100644 index 0000000000..6114298873 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/aliases.clif @@ -0,0 +1,35 @@ +test regalloc +target x86_64 haswell + +function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: f32, v2: i64): + v3 = iconst.i32 0 + jump block3(v3) + +block3(v4: i32): + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.f32 v5 + v7 -> v1 + v8 = fdiv v6, v7 + v9 = heap_addr.i64 heap0, v4, 1 + store v8, v9 + v10 = iconst.i32 4 + v11 = iadd v4, v10 + v12 -> v0 + v13 = icmp ult v11, v12 + v14 = bint.i32 v13 + brnz v14, block3(v11) + jump block4 + +block4: + jump block2 + +block2: + jump block1 + +block1: + return +} diff --git a/cranelift/filetests/filetests/regalloc/basic.clif b/cranelift/filetests/filetests/regalloc/basic.clif new file mode 100644 index 0000000000..48111253ae --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/basic.clif @@ -0,0 +1,80 @@ +test regalloc + +; We can add more ISAs once they have defined encodings. +target riscv32 + +; regex: RX=%x\d+ + +function %add(i32, i32) { +block0(v1: i32, v2: i32): + v3 = iadd v1, v2 +; check: [R#0c,%x5] +; sameln: iadd + return +} + +; Function with a dead argument. +function %dead_arg(i32, i32) -> i32{ +block0(v1: i32, v2: i32): +; not: regmove +; check: return v1 + return v1 +} + +; Return a value from a different register. +function %move1(i32, i32) -> i32 { +block0(v1: i32, v2: i32): +; not: regmove +; check: regmove v2, %x11 -> %x10 +; nextln: return v2 + return v2 +} + +; Swap two registers. +function %swap(i32, i32) -> i32, i32 { +block0(v1: i32, v2: i32): +; not: regmove +; check: regmove v2, %x11 -> $(tmp=$RX) +; nextln: regmove v1, %x10 -> %x11 +; nextln: regmove v2, $tmp -> %x10 +; nextln: return v2, v1 + return v2, v1 +} + +; Return a block argument. +function %retblock(i32, i32) -> i32 { +block0(v1: i32, v2: i32): + brnz v1, block1(v1) + jump block1(v2) + +block1(v10: i32): + return v10 +} + +; Pass a block argument as a function argument. +function %callblock(i32, i32) -> i32 { + fn0 = %foo(i32) -> i32 + +block0(v1: i32, v2: i32): + brnz v1, block1(v1) + jump block1(v2) + +block1(v10: i32): + v11 = call fn0(v10) + return v11 +} + +; Pass a block argument as a jump argument. +function %jumpblock(i32, i32) -> i32 { + fn0 = %foo(i32) -> i32 + +block0(v1: i32, v2: i32): + brnz v1, block1(v1, v2) + jump block1(v2, v1) + +block1(v10: i32, v11: i32): + jump block2(v10, v11) + +block2(v20: i32, v21: i32): + return v21 +} diff --git a/cranelift/filetests/filetests/regalloc/coalesce.clif b/cranelift/filetests/filetests/regalloc/coalesce.clif new file mode 100644 index 0000000000..48395da1b3 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/coalesce.clif @@ -0,0 +1,157 @@ +test regalloc +target riscv32 + +; Test the coalescer. +; regex: V=v\d+ +; regex: WS=\s+ +; regex: LOC=%\w+ +; regex: BB=block\d+ + +; This function is already CSSA, so no copies should be inserted. +function %cssa(i32) -> i32 { +block0(v0: i32): + ; not: copy + ; v0 is used by the branch and passed as an arg - that's no conflict. + brnz v0, block1(v0) + jump block2 + +block2: + ; v0 is live across the branch above. That's no conflict. + v1 = iadd_imm v0, 7 + jump block1(v1) + +block1(v10: i32): + v11 = iadd_imm v10, 7 + return v11 +} + +function %trivial(i32) -> i32 { +block0(v0: i32): + ; check: brnz v0, $(splitEdge=$BB) + brnz v0, block1(v0) + jump block2 + +block2: + ; not: copy + v1 = iadd_imm v0, 7 + jump block1(v1) + + ; check: $splitEdge: + ; nextln: $(cp1=$V) = copy.i32 v0 + ; nextln: jump block1($cp1) + +block1(v10: i32): + ; Use v0 in the destination block causes a conflict. + v11 = iadd v10, v0 + return v11 +} + +; A value is used as an SSA argument twice in the same branch. +function %dualuse(i32) -> i32 { +block0(v0: i32): + ; check: brnz v0, $(splitEdge=$BB) + brnz v0, block1(v0, v0) + jump block2 + +block2: + v1 = iadd_imm v0, 7 + v2 = iadd_imm v1, 56 + jump block1(v1, v2) + + ; check: $splitEdge: + ; check: $(cp1=$V) = copy.i32 v0 + ; nextln: jump block1($cp1, v0) + +block1(v10: i32, v11: i32): + v12 = iadd v10, v11 + return v12 +} + +; Interference away from the branch +; The interference can be broken with a copy at either branch. +function %interference(i32) -> i32 { +block0(v0: i32): + ; not: copy + ; check: brnz v0, $(splitEdge=$BB) + ; not: copy + brnz v0, block1(v0) + jump block2 + +block2: + v1 = iadd_imm v0, 7 + ; v1 and v0 interfere here: + v2 = iadd_imm v0, 8 + ; check: $(cp0=$V) = copy v1 + ; check: jump block1($cp0) + jump block1(v1) + + ; check: $splitEdge: + ; not: copy + ; nextln: jump block1(v0) + +block1(v10: i32): + ; not: copy + v11 = iadd_imm v10, 7 + return v11 +} + +; A loop where one induction variable is used as a backedge argument. +function %fibonacci(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = iconst.i32 2 + jump block1(v1, v2) + + ; check: $(splitEdge=$BB): + ; check: $(nv11b=$V) = copy.i32 v11 + ; not: copy + ; check: jump block1($nv11b, v12) + +block1(v10: i32, v11: i32): + ; v11 needs to be isolated because it interferes with v10. + ; check: block1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC]) + ; check: v11 = copy $nv11a + v12 = iadd v10, v11 + v13 = icmp ult v12, v0 + ; check: brnz v13, $splitEdge + brnz v13, block1(v11, v12) + jump block2 + +block2: + return v12 +} + +; Function arguments passed on the stack aren't allowed to be part of a virtual +; register, at least for now. This is because the other values in the virtual +; register would need to be spilled to the incoming_arg stack slot which we treat +; as belonging to the caller. +function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { +; check: ss0 = incoming_arg 4 +; not: incoming_arg +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): + ; check: fill v8 + ; not: v8 + jump block1(v8) + +block1(v10: i32): + v11 = iadd_imm v10, 1 + return v11 +} + +function %gvn_unremovable_phi(i32) system_v { +block0(v0: i32): + v2 = iconst.i32 0 + jump block2(v2, v0) + +block2(v3: i32, v4: i32): + brnz v3, block2(v3, v4) + jump block3 + +block3: + v5 = iconst.i32 1 + brnz v3, block2(v2, v5) + jump block4 + +block4: + return +} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-207.clif b/cranelift/filetests/filetests/regalloc/coalescing-207.clif new file mode 100644 index 0000000000..39ddf0fa88 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/coalescing-207.clif @@ -0,0 +1,1527 @@ +test regalloc +target x86_64 haswell + +; Reported as https://github.com/bytecodealliance/cranelift/issues/207 +; +; The coalescer creates a virtual register with two interfering values. +function %pr207(i64 vmctx, i32, i32) -> i32 system_v { + gv1 = vmctx + gv0 = iadd_imm.i64 gv1, -8 + heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 + sig0 = (i64 vmctx, i32, i32) -> i32 system_v + sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v + sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v + fn0 = u0:2 sig0 + fn1 = u0:0 sig1 + fn2 = u0:1 sig2 + +block0(v0: i64, v1: i32, v2: i32): + v3 = iconst.i32 0 + v4 = iconst.i32 0 + v5 = iconst.i32 0 + v6 = iconst.i32 0x4ffe + v7 = icmp uge v5, v6 + brz v7, block1 + jump block100 + +block100: + trap heap_oob + +block1: + v8 = uextend.i64 v5 + v9 = iadd_imm.i64 v0, -8 + v10 = load.i64 v9 + v11 = iadd v10, v8 + v12 = load.i32 v11+4 + v13 = iconst.i32 1056 + v14 = isub v12, v13 + v15 = iconst.i32 0x4ffe + v16 = icmp.i32 uge v4, v15 + brz v16, block2 + jump block101 + +block101: + trap heap_oob + +block2: + v17 = uextend.i64 v4 + v18 = iadd_imm.i64 v0, -8 + v19 = load.i64 v18 + v20 = iadd v19, v17 + store.i32 v14, v20+4 + v21 = iconst.i32 0x4ffe + v22 = icmp.i32 uge v2, v21 + brz v22, block3 + jump block102 + +block102: + trap heap_oob + +block3: + v23 = uextend.i64 v2 + v24 = iadd_imm.i64 v0, -8 + v25 = load.i64 v24 + v26 = iadd v25, v23 + v27 = sload8.i32 v26 + v28 = iconst.i32 255 + v29 = band v27, v28 + v30 = iconst.i32 0 + v31 = icmp eq v29, v30 + v32 = bint.i32 v31 + brnz v32, block90(v14, v1) + jump block103 + +block103: + v33 = call fn0(v0, v1, v27) + v34 = iconst.i32 0 + v35 = iconst.i32 0 + v36 = icmp eq v33, v35 + v37 = bint.i32 v36 + brnz v37, block90(v14, v34) + jump block104 + +block104: + v38 = iconst.i32 0x4ffe + v39 = icmp.i32 uge v2, v38 + brz v39, block4 + jump block105 + +block105: + trap heap_oob + +block4: + v40 = uextend.i64 v2 + v41 = iadd_imm.i64 v0, -8 + v42 = load.i64 v41 + v43 = iadd v42, v40 + v44 = uload8.i32 v43+1 + v45 = iconst.i32 0 + v46 = icmp eq v44, v45 + v47 = bint.i32 v46 + brnz v47, block56(v33, v14) + jump block106 + +block106: + v48 = iconst.i32 0x4ffe + v49 = icmp.i32 uge v33, v48 + brz v49, block5 + jump block107 + +block107: + trap heap_oob + +block5: + v50 = uextend.i64 v33 + v51 = iadd_imm.i64 v0, -8 + v52 = load.i64 v51 + v53 = iadd v52, v50 + v54 = uload8.i32 v53+1 + v55 = iconst.i32 0 + v56 = icmp eq v54, v55 + v57 = bint.i32 v56 + brnz v57, block90(v14, v34) + jump block108 + +block108: + v58 = iconst.i32 0x4ffe + v59 = icmp.i32 uge v2, v58 + brz v59, block6 + jump block109 + +block109: + trap heap_oob + +block6: + v60 = uextend.i64 v2 + v61 = iadd_imm.i64 v0, -8 + v62 = load.i64 v61 + v63 = iadd v62, v60 + v64 = uload8.i32 v63+2 + v65 = iconst.i32 0 + v66 = icmp eq v64, v65 + v67 = bint.i32 v66 + brnz v67, block42 + jump block110 + +block110: + v68 = iconst.i32 0x4ffe + v69 = icmp.i32 uge v33, v68 + brz v69, block7 + jump block111 + +block111: + trap heap_oob + +block7: + v70 = uextend.i64 v33 + v71 = iadd_imm.i64 v0, -8 + v72 = load.i64 v71 + v73 = iadd v72, v70 + v74 = uload8.i32 v73+2 + v75 = iconst.i32 0 + v76 = icmp eq v74, v75 + v77 = bint.i32 v76 + brnz v77, block90(v14, v34) + jump block112 + +block112: + v78 = iconst.i32 0x4ffe + v79 = icmp.i32 uge v2, v78 + brz v79, block8 + jump block113 + +block113: + trap heap_oob + +block8: + v80 = uextend.i64 v2 + v81 = iadd_imm.i64 v0, -8 + v82 = load.i64 v81 + v83 = iadd v82, v80 + v84 = uload8.i32 v83+3 + v85 = iconst.i32 0 + v86 = icmp eq v84, v85 + v87 = bint.i32 v86 + brnz v87, block46 + jump block114 + +block114: + v88 = iconst.i32 0x4ffe + v89 = icmp.i32 uge v33, v88 + brz v89, block9 + jump block115 + +block115: + trap heap_oob + +block9: + v90 = uextend.i64 v33 + v91 = iadd_imm.i64 v0, -8 + v92 = load.i64 v91 + v93 = iadd v92, v90 + v94 = uload8.i32 v93+3 + v95 = iconst.i32 0 + v96 = icmp eq v94, v95 + v97 = bint.i32 v96 + brnz v97, block90(v14, v34) + jump block116 + +block116: + v98 = iconst.i32 0x4ffe + v99 = icmp.i32 uge v2, v98 + brz v99, block10 + jump block117 + +block117: + trap heap_oob + +block10: + v100 = uextend.i64 v2 + v101 = iadd_imm.i64 v0, -8 + v102 = load.i64 v101 + v103 = iadd v102, v100 + v104 = uload8.i32 v103+4 + v105 = iconst.i32 0 + v106 = icmp eq v104, v105 + v107 = bint.i32 v106 + brnz v107, block54 + jump block118 + +block118: + v108 = iconst.i32 1 + v109 = iadd.i32 v2, v108 + v110 = iconst.i32 1048 + v111 = iadd.i32 v14, v110 + v112 = iconst.i64 0 + v113 = iconst.i32 0x4ffe + v114 = icmp uge v111, v113 + brz v114, block11 + jump block119 + +block119: + trap heap_oob + +block11: + v115 = uextend.i64 v111 + v116 = iadd_imm.i64 v0, -8 + v117 = load.i64 v116 + v118 = iadd v117, v115 + store.i64 v112, v118 + v119 = iconst.i32 1040 + v120 = iadd.i32 v14, v119 + v121 = iconst.i64 0 + v122 = iconst.i32 0x4ffe + v123 = icmp uge v120, v122 + brz v123, block12 + jump block120 + +block120: + trap heap_oob + +block12: + v124 = uextend.i64 v120 + v125 = iadd_imm.i64 v0, -8 + v126 = load.i64 v125 + v127 = iadd v126, v124 + store.i64 v121, v127 + v128 = iconst.i64 0 + v129 = iconst.i32 0x4ffe + v130 = icmp.i32 uge v14, v129 + brz v130, block13 + jump block121 + +block121: + trap heap_oob + +block13: + v131 = uextend.i64 v14 + v132 = iadd_imm.i64 v0, -8 + v133 = load.i64 v132 + v134 = iadd v133, v131 + store.i64 v128, v134+1032 + v135 = iconst.i64 0 + v136 = iconst.i32 0x4ffe + v137 = icmp.i32 uge v14, v136 + brz v137, block14 + jump block122 + +block122: + trap heap_oob + +block14: + v138 = uextend.i64 v14 + v139 = iadd_imm.i64 v0, -8 + v140 = load.i64 v139 + v141 = iadd v140, v138 + store.i64 v135, v141+1024 + v142 = iconst.i32 -1 + jump block15(v142, v27) + +block15(v143: i32, v144: i32): + v145 = iadd.i32 v33, v143 + v146 = iconst.i32 1 + v147 = iadd v145, v146 + v148 = iconst.i32 0x4ffe + v149 = icmp uge v147, v148 + brz v149, block16 + jump block123 + +block123: + trap heap_oob + +block16: + v150 = uextend.i64 v147 + v151 = iadd_imm.i64 v0, -8 + v152 = load.i64 v151 + v153 = iadd v152, v150 + v154 = uload8.i32 v153 + v155 = iconst.i32 0 + v156 = icmp eq v154, v155 + v157 = bint.i32 v156 + brnz v157, block89(v14) + jump block124 + +block124: + v158 = iconst.i32 255 + v159 = band.i32 v144, v158 + v160 = iconst.i32 2 + v161 = ishl v159, v160 + v162 = iadd.i32 v14, v161 + v163 = iconst.i32 2 + v164 = iadd.i32 v143, v163 + v165 = iconst.i32 0x4ffe + v166 = icmp uge v162, v165 + brz v166, block17 + jump block125 + +block125: + trap heap_oob + +block17: + v167 = uextend.i64 v162 + v168 = iadd_imm.i64 v0, -8 + v169 = load.i64 v168 + v170 = iadd v169, v167 + store.i32 v164, v170 + v171 = iconst.i32 1024 + v172 = iadd.i32 v14, v171 + v173 = iconst.i32 3 + v174 = ushr.i32 v159, v173 + v175 = iconst.i32 28 + v176 = band v174, v175 + v177 = iadd v172, v176 + v178 = iconst.i32 0x4ffe + v179 = icmp uge v177, v178 + brz v179, block18 + jump block126 + +block126: + trap heap_oob + +block18: + v180 = uextend.i64 v177 + v181 = iadd_imm.i64 v0, -8 + v182 = load.i64 v181 + v183 = iadd v182, v180 + v184 = load.i32 v183 + v185 = iconst.i32 1 + v186 = iconst.i32 31 + v187 = band.i32 v144, v186 + v188 = ishl v185, v187 + v189 = bor v184, v188 + v190 = iconst.i32 0x4ffe + v191 = icmp.i32 uge v177, v190 + brz v191, block19 + jump block127 + +block127: + trap heap_oob + +block19: + v192 = uextend.i64 v177 + v193 = iadd_imm.i64 v0, -8 + v194 = load.i64 v193 + v195 = iadd v194, v192 + store.i32 v189, v195 + v196 = iadd.i32 v109, v143 + v197 = iconst.i32 1 + v198 = iadd.i32 v143, v197 + v199 = iconst.i32 1 + v200 = iadd v196, v199 + v201 = iconst.i32 0x4ffe + v202 = icmp uge v200, v201 + brz v202, block20 + jump block128 + +block128: + trap heap_oob + +block20: + v203 = uextend.i64 v200 + v204 = iadd_imm.i64 v0, -8 + v205 = load.i64 v204 + v206 = iadd v205, v203 + v207 = uload8.i32 v206 + brnz v207, block15(v198, v207) + jump block21 + +block21: + v208 = iconst.i32 -1 + v209 = iconst.i32 1 + v210 = iconst.i32 -1 + v211 = iconst.i32 1 + v212 = iconst.i32 1 + v213 = iadd.i32 v198, v212 + v214 = iconst.i32 2 + v215 = icmp ult v213, v214 + v216 = bint.i32 v215 + brnz v216, block38(v2, v211, v209, v210, v208, v198, v213, v33, v14) + jump block129 + +block129: + v217 = iconst.i32 -1 + v218 = iconst.i32 0 + v219 = iconst.i32 1 + v220 = iconst.i32 1 + v221 = iconst.i32 1 + v222 = copy.i32 v44 + jump block22(v217, v221, v44, v220, v218, v219, v213, v222, v198, v33, v14) + +block22(v223: i32, v224: i32, v225: i32, v226: i32, v227: i32, v228: i32, v229: i32, v230: i32, v231: i32, v232: i32, v233: i32): + v234 = copy v228 + v235 = iadd v223, v224 + v236 = iadd.i32 v2, v235 + v237 = iconst.i32 0x4ffe + v238 = icmp uge v236, v237 + brz v238, block23 + jump block130 + +block130: + trap heap_oob + +block23: + v239 = uextend.i64 v236 + v240 = iadd_imm.i64 v0, -8 + v241 = load.i64 v240 + v242 = iadd v241, v239 + v243 = uload8.i32 v242 + v244 = iconst.i32 255 + v245 = band.i32 v225, v244 + v246 = icmp ne v243, v245 + v247 = bint.i32 v246 + brnz v247, block24 + jump block131 + +block131: + v248 = icmp.i32 ne v224, v226 + v249 = bint.i32 v248 + brnz v249, block25 + jump block132 + +block132: + v250 = iadd.i32 v227, v226 + v251 = iconst.i32 1 + jump block27(v251, v250, v223, v226) + +block24: + v252 = icmp.i32 ule v243, v245 + v253 = bint.i32 v252 + brnz v253, block26 + jump block133 + +block133: + v254 = isub.i32 v234, v223 + v255 = iconst.i32 1 + jump block27(v255, v234, v223, v254) + +block25: + v256 = iconst.i32 1 + v257 = iadd.i32 v224, v256 + v258 = copy.i32 v227 + jump block27(v257, v258, v223, v226) + +block26: + v259 = iconst.i32 1 + v260 = iconst.i32 1 + v261 = iadd.i32 v227, v260 + v262 = iconst.i32 1 + v263 = copy.i32 v227 + jump block27(v259, v261, v263, v262) + +block27(v264: i32, v265: i32, v266: i32, v267: i32): + v268 = iadd v264, v265 + v269 = icmp uge v268, v229 + v270 = bint.i32 v269 + brnz v270, block29 + jump block134 + +block134: + v271 = iadd.i32 v2, v268 + v272 = iconst.i32 0x4ffe + v273 = icmp uge v271, v272 + brz v273, block28 + jump block135 + +block135: + trap heap_oob + +block28: + v274 = uextend.i64 v271 + v275 = iadd_imm.i64 v0, -8 + v276 = load.i64 v275 + v277 = iadd v276, v274 + v278 = uload8.i32 v277 + v279 = copy.i32 v265 + jump block22(v266, v264, v278, v267, v279, v268, v229, v230, v231, v232, v233) + +block29: + jump block30 + +block30: + v280 = iconst.i32 -1 + v281 = iconst.i32 0 + v282 = iconst.i32 1 + v283 = iconst.i32 1 + v284 = iconst.i32 1 + jump block31(v280, v284, v230, v283, v281, v282, v229, v267, v266, v231, v232, v233) + +block31(v285: i32, v286: i32, v287: i32, v288: i32, v289: i32, v290: i32, v291: i32, v292: i32, v293: i32, v294: i32, v295: i32, v296: i32): + v297 = copy v290 + v298 = iadd v285, v286 + v299 = iadd.i32 v2, v298 + v300 = iconst.i32 0x4ffe + v301 = icmp uge v299, v300 + brz v301, block32 + jump block136 + +block136: + trap heap_oob + +block32: + v302 = uextend.i64 v299 + v303 = iadd_imm.i64 v0, -8 + v304 = load.i64 v303 + v305 = iadd v304, v302 + v306 = uload8.i32 v305 + v307 = iconst.i32 255 + v308 = band.i32 v287, v307 + v309 = icmp ne v306, v308 + v310 = bint.i32 v309 + brnz v310, block33 + jump block137 + +block137: + v311 = icmp.i32 ne v286, v288 + v312 = bint.i32 v311 + brnz v312, block34 + jump block138 + +block138: + v313 = iadd.i32 v289, v288 + v314 = iconst.i32 1 + jump block36(v314, v313, v285, v288) + +block33: + v315 = icmp.i32 uge v306, v308 + v316 = bint.i32 v315 + brnz v316, block35 + jump block139 + +block139: + v317 = isub.i32 v297, v285 + v318 = iconst.i32 1 + jump block36(v318, v297, v285, v317) + +block34: + v319 = iconst.i32 1 + v320 = iadd.i32 v286, v319 + v321 = copy.i32 v289 + jump block36(v320, v321, v285, v288) + +block35: + v322 = iconst.i32 1 + v323 = iconst.i32 1 + v324 = iadd.i32 v289, v323 + v325 = iconst.i32 1 + v326 = copy.i32 v289 + jump block36(v322, v324, v326, v325) + +block36(v327: i32, v328: i32, v329: i32, v330: i32): + v331 = iadd v327, v328 + v332 = icmp uge v331, v291 + v333 = bint.i32 v332 + brnz v333, block38(v2, v330, v292, v329, v293, v294, v291, v295, v296) + jump block140 + +block140: + v334 = iadd.i32 v2, v331 + v335 = iconst.i32 0x4ffe + v336 = icmp uge v334, v335 + brz v336, block37 + jump block141 + +block141: + trap heap_oob + +block37: + v337 = uextend.i64 v334 + v338 = iadd_imm.i64 v0, -8 + v339 = load.i64 v338 + v340 = iadd v339, v337 + v341 = uload8.i32 v340 + v342 = copy.i32 v328 + jump block31(v329, v327, v341, v330, v342, v331, v291, v292, v293, v294, v295, v296) + +block38(v343: i32, v344: i32, v345: i32, v346: i32, v347: i32, v348: i32, v349: i32, v350: i32, v351: i32): + v352 = iconst.i32 1 + v353 = iadd v346, v352 + v354 = iconst.i32 1 + v355 = iadd v347, v354 + v356 = icmp ugt v353, v355 + v357 = bint.i32 v356 + brnz v357, block39(v344) + jump block142 + +block142: + v358 = copy v345 + jump block39(v358) + +block39(v359: i32): + v360 = iadd.i32 v343, v359 + brnz.i32 v357, block40(v346) + jump block143 + +block143: + v361 = copy.i32 v347 + jump block40(v361) + +block40(v362: i32): + v363 = iconst.i32 1 + v364 = iadd v362, v363 + v365 = call fn1(v0, v343, v360, v364) + v366 = iconst.i32 0 + v367 = icmp eq v365, v366 + v368 = bint.i32 v367 + brnz v368, block63 + jump block144 + +block144: + v369 = iconst.i32 1 + v370 = iadd v362, v369 + v371 = isub.i32 v348, v370 + v372 = iconst.i32 1 + v373 = iadd v371, v372 + v374 = icmp ugt v362, v373 + v375 = bint.i32 v374 + v376 = copy v362 + brnz v375, block41(v376) + jump block145 + +block145: + v377 = copy v373 + jump block41(v377) + +block41(v378: i32): + v379 = iconst.i32 1 + v380 = iadd v378, v379 + v381 = iconst.i32 0 + jump block64(v380, v381) + +block42: + v382 = iconst.i32 8 + v383 = ishl.i32 v29, v382 + v384 = bor v383, v44 + v385 = iconst.i32 0x4ffe + v386 = icmp.i32 uge v33, v385 + brz v386, block43 + jump block146 + +block146: + trap heap_oob + +block43: + v387 = uextend.i64 v33 + v388 = iadd_imm.i64 v0, -8 + v389 = load.i64 v388 + v390 = iadd v389, v387 + v391 = uload8.i32 v390 + jump block44(v391, v54, v33) + +block44(v392: i32, v393: i32, v394: i32): + v395 = iconst.i32 8 + v396 = ishl v392, v395 + v397 = iconst.i32 0xff00 + v398 = band v396, v397 + v399 = iconst.i32 255 + v400 = band v393, v399 + v401 = bor v398, v400 + v402 = icmp eq v401, v384 + v403 = bint.i32 v402 + brnz v403, block56(v394, v14) + jump block147 + +block147: + v404 = iconst.i32 2 + v405 = iadd v394, v404 + v406 = iconst.i32 1 + v407 = iadd v394, v406 + v408 = iconst.i32 0x4ffe + v409 = icmp uge v405, v408 + brz v409, block45 + jump block148 + +block148: + trap heap_oob + +block45: + v410 = uextend.i64 v405 + v411 = iadd_imm.i64 v0, -8 + v412 = load.i64 v411 + v413 = iadd v412, v410 + v414 = uload8.i32 v413 + brnz v414, block44(v401, v414, v407) + jump block90(v14, v34) + +block46: + v415 = iconst.i32 8 + v416 = ishl.i32 v74, v415 + v417 = iconst.i32 16 + v418 = ishl.i32 v54, v417 + v419 = bor v416, v418 + v420 = iconst.i32 0x4ffe + v421 = icmp.i32 uge v33, v420 + brz v421, block47 + jump block149 + +block149: + trap heap_oob + +block47: + v422 = uextend.i64 v33 + v423 = iadd_imm.i64 v0, -8 + v424 = load.i64 v423 + v425 = iadd v424, v422 + v426 = uload8.i32 v425 + v427 = iconst.i32 24 + v428 = ishl v426, v427 + v429 = bor.i32 v419, v428 + v430 = iconst.i32 16 + v431 = ishl.i32 v44, v430 + v432 = iconst.i32 24 + v433 = ishl.i32 v29, v432 + v434 = bor v431, v433 + v435 = iconst.i32 8 + v436 = ishl.i32 v64, v435 + v437 = bor v434, v436 + v438 = icmp eq v429, v437 + v439 = bint.i32 v438 + brnz v439, block56(v33, v14) + jump block48(v33, v429) + +block48(v440: i32, v441: i32): + v442 = iconst.i32 1 + v443 = iadd v440, v442 + v444 = iconst.i32 3 + v445 = iadd v440, v444 + v446 = iconst.i32 0x4ffe + v447 = icmp uge v445, v446 + brz v447, block49 + jump block150 + +block150: + trap heap_oob + +block49: + v448 = uextend.i64 v445 + v449 = iadd_imm.i64 v0, -8 + v450 = load.i64 v449 + v451 = iadd v450, v448 + v452 = uload8.i32 v451 + v453 = iconst.i32 0 + v454 = icmp eq v452, v453 + v455 = bint.i32 v454 + brnz v455, block51(v14) + jump block151 + +block151: + v456 = bor.i32 v441, v452 + v457 = iconst.i32 8 + v458 = ishl v456, v457 + v459 = icmp ne v458, v437 + v460 = bint.i32 v459 + v461 = copy.i32 v443 + brnz v460, block48(v461, v458) + jump block50 + +block50: + jump block51(v14) + +block51(v462: i32): + v463 = iconst.i32 0 + v464 = iconst.i32 1056 + v465 = iadd v462, v464 + v466 = iconst.i32 0x4ffe + v467 = icmp uge v463, v466 + brz v467, block52 + jump block152 + +block152: + trap heap_oob + +block52: + v468 = uextend.i64 v463 + v469 = iadd_imm.i64 v0, -8 + v470 = load.i64 v469 + v471 = iadd v470, v468 + store.i32 v465, v471+4 + v472 = iconst.i32 0 + brnz.i32 v452, block53(v443) + jump block153 + +block153: + v473 = copy v472 + jump block53(v473) + +block53(v474: i32): + return v474 + +block54: + v475 = iconst.i32 8 + v476 = ishl.i32 v74, v475 + v477 = iconst.i32 16 + v478 = ishl.i32 v54, v477 + v479 = bor v476, v478 + v480 = bor v479, v94 + v481 = iconst.i32 0x4ffe + v482 = icmp.i32 uge v33, v481 + brz v482, block55 + jump block154 + +block154: + trap heap_oob + +block55: + v483 = uextend.i64 v33 + v484 = iadd_imm.i64 v0, -8 + v485 = load.i64 v484 + v486 = iadd v485, v483 + v487 = uload8.i32 v486 + v488 = iconst.i32 24 + v489 = ishl v487, v488 + v490 = bor.i32 v480, v489 + v491 = iconst.i32 16 + v492 = ishl.i32 v44, v491 + v493 = iconst.i32 24 + v494 = ishl.i32 v29, v493 + v495 = bor v492, v494 + v496 = iconst.i32 8 + v497 = ishl.i32 v64, v496 + v498 = bor v495, v497 + v499 = bor v498, v84 + v500 = icmp ne v490, v499 + v501 = bint.i32 v500 + brnz v501, block57 + jump block56(v33, v14) + +block56(v502: i32, v503: i32): + v504 = copy v502 + jump block90(v503, v504) + +block57: + jump block58(v33, v490) + +block58(v505: i32, v506: i32): + v507 = iconst.i32 4 + v508 = iadd v505, v507 + v509 = iconst.i32 1 + v510 = iadd v505, v509 + v511 = iconst.i32 0x4ffe + v512 = icmp uge v508, v511 + brz v512, block59 + jump block155 + +block155: + trap heap_oob + +block59: + v513 = uextend.i64 v508 + v514 = iadd_imm.i64 v0, -8 + v515 = load.i64 v514 + v516 = iadd v515, v513 + v517 = uload8.i32 v516 + v518 = iconst.i32 0 + v519 = icmp eq v517, v518 + v520 = bint.i32 v519 + brnz v520, block61(v14) + jump block156 + +block156: + v521 = iconst.i32 8 + v522 = ishl.i32 v506, v521 + v523 = bor v522, v517 + v524 = icmp ne v523, v499 + v525 = bint.i32 v524 + brnz v525, block58(v510, v523) + jump block60 + +block60: + jump block61(v14) + +block61(v526: i32): + v527 = iconst.i32 0 + brnz.i32 v517, block62(v510) + jump block157 + +block157: + v528 = copy v527 + jump block62(v528) + +block62(v529: i32): + v530 = copy v529 + jump block90(v526, v530) + +block63: + v531 = isub.i32 v348, v359 + v532 = iconst.i32 1 + v533 = iadd v531, v532 + jump block64(v359, v533) + +block64(v534: i32, v535: i32): + v536 = iconst.i32 1 + v537 = iadd.i32 v343, v536 + v538 = iconst.i32 0 + v539 = isub v538, v362 + v540 = iconst.i32 63 + v541 = bor.i32 v349, v540 + v542 = isub.i32 v348, v534 + v543 = iconst.i32 1 + v544 = iadd v542, v543 + v545 = iconst.i32 0 + v546 = copy.i32 v350 + jump block65(v350, v546, v349, v541, v348, v351, v544, v534, v545, v535, v343, v364, v537, v539, v362) + +block65(v547: i32, v548: i32, v549: i32, v550: i32, v551: i32, v552: i32, v553: i32, v554: i32, v555: i32, v556: i32, v557: i32, v558: i32, v559: i32, v560: i32, v561: i32): + v562 = copy v556 + v563 = isub v547, v548 + v564 = icmp uge v563, v549 + v565 = bint.i32 v564 + brnz v565, block67(v547) + jump block158 + +block158: + v566 = iconst.i32 0 + v567 = call fn2(v0, v547, v566, v550) + brnz v567, block66 + jump block159 + +block159: + v568 = iadd v547, v550 + jump block67(v568) + +block66: + v569 = isub.i32 v567, v548 + v570 = icmp ult v569, v549 + v571 = bint.i32 v570 + brnz v571, block89(v552) + jump block160 + +block160: + v572 = copy.i32 v567 + jump block67(v572) + +block67(v573: i32): + v574 = iconst.i32 1 + v575 = iadd.i32 v548, v551 + v576 = iconst.i32 0x4ffe + v577 = icmp uge v575, v576 + brz v577, block68 + jump block161 + +block161: + trap heap_oob + +block68: + v578 = uextend.i64 v575 + v579 = iadd_imm.i64 v0, -8 + v580 = load.i64 v579 + v581 = iadd v580, v578 + v582 = uload8.i32 v581 + v583 = iconst.i32 31 + v584 = band v582, v583 + v585 = ishl.i32 v574, v584 + v586 = iconst.i32 1024 + v587 = iadd.i32 v552, v586 + v588 = iconst.i32 3 + v589 = ushr v582, v588 + v590 = iconst.i32 28 + v591 = band v589, v590 + v592 = iadd v587, v591 + v593 = iconst.i32 0x4ffe + v594 = icmp uge v592, v593 + brz v594, block69 + jump block162 + +block162: + trap heap_oob + +block69: + v595 = uextend.i64 v592 + v596 = iadd_imm.i64 v0, -8 + v597 = load.i64 v596 + v598 = iadd v597, v595 + v599 = load.i32 v598 + v600 = band.i32 v585, v599 + v601 = iconst.i32 0 + v602 = icmp eq v600, v601 + v603 = bint.i32 v602 + brnz v603, block74 + jump block163 + +block163: + v604 = iconst.i32 2 + v605 = ishl.i32 v582, v604 + v606 = iadd.i32 v552, v605 + v607 = iconst.i32 0x4ffe + v608 = icmp uge v606, v607 + brz v608, block70 + jump block164 + +block164: + trap heap_oob + +block70: + v609 = uextend.i64 v606 + v610 = iadd_imm.i64 v0, -8 + v611 = load.i64 v610 + v612 = iadd v611, v609 + v613 = load.i32 v612 + v614 = isub.i32 v551, v613 + v615 = iconst.i32 -1 + v616 = icmp eq v614, v615 + v617 = bint.i32 v616 + brnz v617, block75 + jump block165 + +block165: + v618 = iconst.i32 1 + v619 = iadd v614, v618 + v620 = icmp ult v619, v554 + v621 = bint.i32 v620 + v622 = copy.i32 v553 + brnz v621, block71(v622) + jump block166 + +block166: + v623 = copy v619 + jump block71(v623) + +block71(v624: i32): + v625 = copy v624 + brnz.i32 v555, block72(v625) + jump block72(v619) + +block72(v626: i32): + brnz.i32 v562, block73(v626) + jump block73(v619) + +block73(v627: i32): + v628 = copy.i32 v554 + v629 = copy.i32 v562 + jump block87(v548, v627, v573, v549, v550, v551, v552, v553, v628, v629, v557, v558, v559, v560, v561) + +block74: + v630 = copy.i32 v549 + v631 = copy.i32 v554 + v632 = copy.i32 v562 + jump block87(v548, v630, v573, v549, v550, v551, v552, v553, v631, v632, v557, v558, v559, v560, v561) + +block75: + v633 = icmp.i32 ugt v558, v555 + v634 = bint.i32 v633 + v635 = copy.i32 v558 + brnz v634, block76(v635) + jump block167 + +block167: + v636 = copy.i32 v555 + jump block76(v636) + +block76(v637: i32): + v638 = iadd.i32 v557, v637 + v639 = iconst.i32 0x4ffe + v640 = icmp uge v638, v639 + brz v640, block77 + jump block168 + +block168: + trap heap_oob + +block77: + v641 = uextend.i64 v638 + v642 = iadd_imm.i64 v0, -8 + v643 = load.i64 v642 + v644 = iadd v643, v641 + v645 = uload8.i32 v644 + v646 = iconst.i32 0 + v647 = icmp eq v645, v646 + v648 = bint.i32 v647 + brnz v648, block82(v548, v549, v551, v552) + jump block169 + +block169: + v649 = iadd.i32 v548, v637 + v650 = iadd.i32 v559, v637 + v651 = iadd.i32 v560, v637 + jump block78(v645, v649, v651, v650) + +block78(v652: i32, v653: i32, v654: i32, v655: i32): + v656 = iconst.i32 255 + v657 = band v652, v656 + v658 = iconst.i32 0x4ffe + v659 = icmp uge v653, v658 + brz v659, block79 + jump block170 + +block170: + trap heap_oob + +block79: + v660 = uextend.i64 v653 + v661 = iadd_imm.i64 v0, -8 + v662 = load.i64 v661 + v663 = iadd v662, v660 + v664 = uload8.i32 v663 + v665 = icmp.i32 ne v657, v664 + v666 = bint.i32 v665 + v667 = copy.i32 v554 + v668 = copy.i32 v562 + brnz v666, block87(v548, v654, v573, v549, v550, v551, v552, v553, v667, v668, v557, v558, v559, v560, v561) + jump block171 + +block171: + v669 = iconst.i32 1 + v670 = iadd.i32 v653, v669 + v671 = iconst.i32 1 + v672 = iadd.i32 v654, v671 + v673 = iconst.i32 0x4ffe + v674 = icmp.i32 uge v655, v673 + brz v674, block80 + jump block172 + +block172: + trap heap_oob + +block80: + v675 = uextend.i64 v655 + v676 = iadd_imm.i64 v0, -8 + v677 = load.i64 v676 + v678 = iadd v677, v675 + v679 = uload8.i32 v678 + v680 = iconst.i32 1 + v681 = iadd.i32 v655, v680 + brnz v679, block78(v679, v670, v672, v681) + jump block81 + +block81: + jump block82(v548, v549, v551, v552) + +block82(v682: i32, v683: i32, v684: i32, v685: i32): + v686 = icmp.i32 ule v558, v555 + v687 = bint.i32 v686 + brnz v687, block90(v685, v682) + jump block173 + +block173: + v688 = copy.i32 v561 + jump block83(v688) + +block83(v689: i32): + v690 = iadd.i32 v557, v689 + v691 = iconst.i32 0x4ffe + v692 = icmp uge v690, v691 + brz v692, block84 + jump block174 + +block174: + trap heap_oob + +block84: + v693 = uextend.i64 v690 + v694 = iadd_imm.i64 v0, -8 + v695 = load.i64 v694 + v696 = iadd v695, v693 + v697 = uload8.i32 v696 + v698 = iadd.i32 v682, v689 + v699 = iconst.i32 0x4ffe + v700 = icmp uge v698, v699 + brz v700, block85 + jump block175 + +block175: + trap heap_oob + +block85: + v701 = uextend.i64 v698 + v702 = iadd_imm.i64 v0, -8 + v703 = load.i64 v702 + v704 = iadd v703, v701 + v705 = uload8.i32 v704 + v706 = icmp.i32 ne v697, v705 + v707 = bint.i32 v706 + brnz v707, block86 + jump block176 + +block176: + v708 = icmp.i32 ule v689, v555 + v709 = bint.i32 v708 + v710 = iconst.i32 -1 + v711 = iadd.i32 v689, v710 + v712 = iconst.i32 0 + v713 = icmp eq v709, v712 + v714 = bint.i32 v713 + brnz v714, block83(v711) + jump block90(v685, v682) + +block86: + v715 = copy.i32 v554 + v716 = copy.i32 v562 + jump block88(v682, v554, v573, v683, v550, v684, v685, v553, v715, v562, v716, v557, v558, v559, v560, v561) + +block87(v717: i32, v718: i32, v719: i32, v720: i32, v721: i32, v722: i32, v723: i32, v724: i32, v725: i32, v726: i32, v727: i32, v728: i32, v729: i32, v730: i32, v731: i32): + v732 = copy v718 + v733 = iconst.i32 0 + jump block88(v717, v732, v719, v720, v721, v722, v723, v724, v725, v733, v726, v727, v728, v729, v730, v731) + +block88(v734: i32, v735: i32, v736: i32, v737: i32, v738: i32, v739: i32, v740: i32, v741: i32, v742: i32, v743: i32, v744: i32, v745: i32, v746: i32, v747: i32, v748: i32, v749: i32): + v750 = iadd v734, v735 + v751 = copy v742 + v752 = copy v743 + v753 = copy v744 + jump block65(v736, v750, v737, v738, v739, v740, v741, v751, v752, v753, v745, v746, v747, v748, v749) + +block89(v754: i32): + v755 = iconst.i32 0 + jump block90(v754, v755) + +block90(v756: i32, v757: i32): + v758 = iconst.i32 0 + v759 = iconst.i32 1056 + v760 = iadd v756, v759 + v761 = iconst.i32 0x4ffe + v762 = icmp uge v758, v761 + brz v762, block91 + jump block177 + +block177: + trap heap_oob + +block91: + v763 = uextend.i64 v758 + v764 = iadd_imm.i64 v0, -8 + v765 = load.i64 v764 + v766 = iadd v765, v763 + store.i32 v760, v766+4 + jump block92(v757) + +block92(v767: i32): + return v767 +} + +; Same problem from musl.wasm. +function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v { + gv0 = vmctx + heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v + fn0 = u0:517 sig0 + +block0(v0: f64, v1: i64): + v3 = iconst.i64 0 + v4 = iconst.i32 0 + v131 = iconst.i64 0 + v5 = bitcast.f64 v131 + v6 = iconst.i32 0 + v7 = iconst.i32 0 + v8 = iconst.i32 0 + v132 = uextend.i64 v8 + v133 = iadd_imm v1, 0 + v134 = load.i64 v133 + v9 = iadd v134, v132 + v10 = load.i32 v9+4 + v11 = iconst.i32 16 + v12 = isub v10, v11 + v135 = uextend.i64 v7 + v136 = iadd_imm v1, 0 + v137 = load.i64 v136 + v13 = iadd v137, v135 + store v12, v13+4 + v14 = bitcast.i64 v0 + v15 = iconst.i64 63 + v16 = ushr v14, v15 + v17 = ireduce.i32 v16 + v18 = iconst.i64 32 + v19 = ushr v14, v18 + v20 = ireduce.i32 v19 + v21 = iconst.i32 0x7fff_ffff + v22 = band v20, v21 + v23 = iconst.i32 0x4086_232b + v24 = icmp ult v22, v23 + v25 = bint.i32 v24 + brnz v25, block10 + jump block178 + +block178: + v26 = iconst.i64 0x7fff_ffff_ffff_ffff + v27 = band v14, v26 + v28 = iconst.i64 0x7ff0_0000_0000_0000 + v29 = icmp ule v27, v28 + v30 = bint.i32 v29 + brnz v30, block9 + jump block2(v12, v0) + +block10: + v31 = iconst.i32 0x3fd6_2e43 + v32 = icmp.i32 ult v22, v31 + v33 = bint.i32 v32 + brnz v33, block8 + jump block179 + +block179: + v34 = iconst.i32 0x3ff0_a2b2 + v35 = icmp.i32 uge v22, v34 + v36 = bint.i32 v35 + brnz v36, block6 + jump block180 + +block180: + v37 = iconst.i32 1 + v38 = bxor.i32 v17, v37 + v39 = isub v38, v17 + jump block5(v0, v39) + +block9: + v138 = iconst.i64 0x4086_2e42_fefa_39ef + v40 = bitcast.f64 v138 + v41 = fcmp ge v40, v0 + v42 = bint.i32 v41 + v139 = fcmp.f64 uno v0, v0 + v140 = fcmp.f64 one v0, v0 + v43 = bor v139, v140 + v44 = bint.i32 v43 + v45 = bor v42, v44 + brnz v45, block7 + jump block181 + +block181: + v141 = iconst.i64 0x7fe0_0000_0000_0000 + v46 = bitcast.f64 v141 + v47 = fmul.f64 v0, v46 + jump block2(v12, v47) + +block8: + v48 = iconst.i32 0x3e30_0000 + v49 = icmp.i32 ule v22, v48 + v50 = bint.i32 v49 + brnz v50, block3 + jump block182 + +block182: + v51 = iconst.i32 0 + v142 = iconst.i64 0 + v52 = bitcast.f64 v142 + v178 = copy.f64 v0 + jump block4(v0, v178, v52, v51) + +block7: + v143 = iconst.i64 0xc086_232b_dd7a_bcd2 + v53 = bitcast.f64 v143 + v54 = fcmp.f64 ge v0, v53 + v55 = bint.i32 v54 + v56 = bor v55, v44 + brnz v56, block6 + jump block183 + +block183: + v144 = iconst.i64 0xb6a0_0000_0000_0000 + v57 = bitcast.f64 v144 + v58 = fdiv v57, v0 + v59 = fdemote.f32 v58 + v145 = uextend.i64 v12 + v146 = iadd_imm.i64 v1, 0 + v147 = load.i64 v146 + v60 = iadd v147, v145 + store v59, v60+12 + v148 = iconst.i64 0 + v61 = bitcast.f64 v148 + v149 = iconst.i64 0xc087_4910_d52d_3051 + v62 = bitcast.f64 v149 + v63 = fcmp gt v62, v0 + v64 = bint.i32 v63 + brnz v64, block2(v12, v61) + jump block6 + +block6: + v150 = iconst.i64 0x3ff7_1547_652b_82fe + v66 = bitcast.f64 v150 + v67 = fmul.f64 v0, v66 + v69 = iconst.i32 3 + v70 = ishl.i32 v17, v69 + v71 = iconst.i32 5040 + v72 = iadd v70, v71 + v151 = uextend.i64 v72 + v152 = iadd_imm.i64 v1, 0 + v153 = load.i64 v152 + v73 = iadd v153, v151 + v74 = load.f64 v73 + v75 = fadd v67, v74 + v76 = x86_cvtt2si.i32 v75 + v158 = iconst.i32 0x8000_0000 + v154 = icmp ne v76, v158 + brnz v154, block11 + jump block184 + +block184: + v155 = fcmp uno v75, v75 + brz v155, block12 + jump block185 + +block185: + trap bad_toint + +block12: + v159 = iconst.i64 0xc1e0_0000_0020_0000 + v156 = bitcast.f64 v159 + v157 = fcmp ge v156, v75 + brz v157, block13 + jump block186 + +block186: + trap int_ovf + +block13: + jump block11 + +block11: + jump block5(v0, v76) + +block5(v77: f64, v78: i32): + v79 = fcvt_from_sint.f64 v78 + v160 = iconst.i64 0xbfe6_2e42_fee0_0000 + v80 = bitcast.f64 v160 + v81 = fmul v79, v80 + v82 = fadd v77, v81 + v161 = iconst.i64 0x3dea_39ef_3579_3c76 + v83 = bitcast.f64 v161 + v84 = fmul v79, v83 + v85 = fsub v82, v84 + jump block4(v82, v85, v84, v78) + +block4(v86: f64, v87: f64, v108: f64, v113: i32): + v88 = fmul v87, v87 + v162 = iconst.i64 0x3e66_3769_72be_a4d0 + v89 = bitcast.f64 v162 + v90 = fmul v88, v89 + v163 = iconst.i64 0xbeeb_bd41_c5d2_6bf1 + v91 = bitcast.f64 v163 + v92 = fadd v90, v91 + v93 = fmul v88, v92 + v164 = iconst.i64 0x3f11_566a_af25_de2c + v94 = bitcast.f64 v164 + v95 = fadd v93, v94 + v96 = fmul v88, v95 + v165 = iconst.i64 0xbf66_c16c_16be_bd93 + v97 = bitcast.f64 v165 + v98 = fadd v96, v97 + v99 = fmul v88, v98 + v166 = iconst.i64 0x3fc5_5555_5555_553e + v100 = bitcast.f64 v166 + v101 = fadd v99, v100 + v102 = fmul v88, v101 + v103 = fsub v87, v102 + v104 = fmul v87, v103 + v167 = iconst.i64 0x4000_0000_0000_0000 + v105 = bitcast.f64 v167 + v106 = fsub v105, v103 + v107 = fdiv v104, v106 + v109 = fsub v107, v108 + v110 = fadd v86, v109 + v168 = iconst.i64 0x3ff0_0000_0000_0000 + v111 = bitcast.f64 v168 + v112 = fadd v110, v111 + v169 = iconst.i32 0 + v114 = icmp eq v113, v169 + v115 = bint.i32 v114 + brnz v115, block2(v12, v112) + jump block187 + +block187: + v116 = call fn0(v112, v113, v1) + jump block2(v12, v116) + +block3: + v170 = iconst.i64 0x7fe0_0000_0000_0000 + v117 = bitcast.f64 v170 + v118 = fadd.f64 v0, v117 + v171 = uextend.i64 v12 + v172 = iadd_imm.i64 v1, 0 + v173 = load.i64 v172 + v119 = iadd v173, v171 + store v118, v119 + v174 = iconst.i64 0x3ff0_0000_0000_0000 + v120 = bitcast.f64 v174 + v121 = fadd.f64 v0, v120 + jump block2(v12, v121) + +block2(v123: i32, v130: f64): + v122 = iconst.i32 0 + v127 = iconst.i32 16 + v128 = iadd v123, v127 + v175 = uextend.i64 v122 + v176 = iadd_imm.i64 v1, 0 + v177 = load.i64 v176 + v129 = iadd v177, v175 + store v128, v129+4 + jump block1(v130) + +block1(v2: f64): + return v2 +} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-216.clif b/cranelift/filetests/filetests/regalloc/coalescing-216.clif new file mode 100644 index 0000000000..020ced084b --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/coalescing-216.clif @@ -0,0 +1,87 @@ +test regalloc +target x86_64 haswell + +; Reported as https://github.com/bytecodealliance/cranelift/issues/216 from the Binaryen fuzzer. +; +; The (old) coalescer creates a virtual register with two identical values. +function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { +block0(v0: i32, v1: i64): + v3 = iconst.i64 0 + v5 = iconst.i32 0 + brz v5, block3(v3) + jump block4(v3, v3) + +block4(v11: i64, v29: i64): + v6 = iconst.i32 0 + brz v6, block14 + jump block15 + +block15: + v9 = iconst.i32 -17 + v12 = iconst.i32 0xffff_ffff_ffff_8000 + jump block9(v12) + +block9(v10: i32): + brnz v10, block8(v9, v11, v11) + jump block16 + +block16: + brz.i32 v9, block13 + jump block17 + +block17: + v13 = iconst.i32 0 + brnz v13, block6(v11, v11) + jump block18 + +block18: + v14 = iconst.i32 0 + brz v14, block12 + jump block11 + +block12: + jump block4(v11, v11) + +block11: + jump block10(v11) + +block13: + v15 = iconst.i64 1 + jump block10(v15) + +block10(v21: i64): + v16 = iconst.i32 0 + brnz v16, block6(v21, v11) + jump block19 + +block19: + v17 = iconst.i32 0xffff_ffff_ffff_9f35 + jump block8(v17, v21, v11) + +block8(v8: i32, v23: i64, v28: i64): + jump block7(v8, v23, v28) + +block14: + v18 = iconst.i32 0 + jump block7(v18, v11, v29) + +block7(v7: i32, v22: i64, v27: i64): + jump block6(v22, v27) + +block6(v20: i64, v25: i64): + v19 = iconst.i32 0xffc7 + brnz v19, block4(v20, v25) + jump block5 + +block5: + jump block3(v25) + +block3(v24: i64): + jump block2(v24) + +block2(v4: i64): + jump block1(v4) + +block1(v2: i64): + return v2 +} diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif new file mode 100644 index 0000000000..52b36d4c70 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/coloring-227.clif @@ -0,0 +1,115 @@ +test regalloc +target x86_64 haswell + +function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v { + gv0 = vmctx + heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + + block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): +[RexOp1pu_id#b8] v5 = iconst.i32 0 +[RexOp1pu_id#b8] v6 = iconst.i32 0 +[RexOp1tjccb#74] brz v6, block10 +[Op1jmpb#eb] jump block3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) + + block3(v15: i32, v17: i32, v25: i32, v31: i32, v40: i32, v47: i32, v54: i32, v61: i32, v68: i32, v75: i32): +[Op1jmpb#eb] jump block6 + + block6: +[RexOp1pu_id#b8] v8 = iconst.i32 0 +[RexOp1tjccb#75] brnz v8, block5 +[Op1jmpb#eb] jump block20 + + block20: +[RexOp1pu_id#b8] v9 = iconst.i32 0 +[RexOp1pu_id#b8] v11 = iconst.i32 0 +[DynRexOp1icscc#39] v12 = icmp.i32 eq v15, v11 +[RexOp2urm_noflags#4b6] v13 = bint.i32 v12 +[DynRexOp1rr#21] v14 = band v9, v13 +[RexOp1tjccb#75] brnz v14, block6 +[Op1jmpb#eb] jump block7 + + block7: +[RexOp1tjccb#74] brz.i32 v17, block8 +[Op1jmpb#eb] jump block17 + + block17: +[RexOp1pu_id#b8] v18 = iconst.i32 0 +[RexOp1tjccb#74] brz v18, block9 +[Op1jmpb#eb] jump block16 + + block16: +[RexOp1pu_id#b8] v21 = iconst.i32 0 +[RexOp1umr#89] v79 = uextend.i64 v5 +[DynRexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 +[RexOp1ld#808b] v81 = load.i64 v80 +[DynRexOp1rr#8001] v22 = iadd v81, v79 +[RexMp1st#189] istore16 v21, v22 +[Op1jmpb#eb] jump block9 + + block9: +[Op1jmpb#eb] jump block8 + + block8: +[RexOp1pu_id#b8] v27 = iconst.i32 3 +[RexOp1pu_id#b8] v28 = iconst.i32 4 +[DynRexOp1rr#09] v35 = bor.i32 v31, v13 +[RexOp1tjccb#75] brnz v35, block15(v27) +[Op1jmpb#eb] jump block15(v28) + + block15(v36: i32): +[Op1jmpb#eb] jump block3(v25, v36, v25, v31, v40, v47, v54, v61, v68, v75) + + block5: +[Op1jmpb#eb] jump block4 + + block4: +[Op1jmpb#eb] jump block2(v40, v47, v54, v61, v68, v75) + + block10: +[RexOp1pu_id#b8] v43 = iconst.i32 0 +[Op1jmpb#eb] jump block2(v43, v5, v0, v1, v2, v3) + + block2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): +[RexOp1pu_id#b8] v44 = iconst.i32 0 +[RexOp1tjccb#74] brz v44, block12 +[Op1jmpb#eb] jump block18 + + block18: +[RexOp1pu_id#b8] v50 = iconst.i32 11 +[RexOp1tjccb#74] brz v50, block14 +[Op1jmpb#eb] jump block19 + + block19: +[RexOp1umr#89] v82 = uextend.i64 v52 +[DynRexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 +[RexOp1ld#808b] v84 = load.i64 v83 +[DynRexOp1rr#8001] v57 = iadd v84, v82 +[RexOp1ld#8b] v58 = load.i32 v57 +[RexOp1umr#89] v85 = uextend.i64 v58 +[DynRexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 +[RexOp1ld#808b] v87 = load.i64 v86 +[DynRexOp1rr#8001] v64 = iadd v87, v85 +[RexOp1st#88] istore8 v59, v64 +[RexOp1pu_id#b8] v65 = iconst.i32 0 +[Op1jmpb#eb] jump block13(v65) + + block14: +[Op1jmpb#eb] jump block13(v66) + + block13(v51: i32): +[RexOp1umr#89] v88 = uextend.i64 v45 +[DynRexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 +[RexOp1ld#808b] v90 = load.i64 v89 +[DynRexOp1rr#8001] v71 = iadd v90, v88 +[RexOp1st#89] store v51, v71 +[Op1jmpb#eb] jump block12 + + block12: +[Op1jmpb#eb] jump block11 + + block11: +[Op1jmpb#eb] jump block1 + + block1: +[Op1ret#c3] return +} diff --git a/cranelift/filetests/filetests/regalloc/constraints.clif b/cranelift/filetests/filetests/regalloc/constraints.clif new file mode 100644 index 0000000000..60cd731ed8 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/constraints.clif @@ -0,0 +1,82 @@ +test regalloc +target i686 + +; regex: V=v\d+ +; regex: REG=%r([abcd]x|[sd]i) + +; Tied operands, both are killed at instruction. +function %tied_easy() -> i32 { +block0: + v0 = iconst.i32 12 + v1 = iconst.i32 13 + ; not: copy + ; check: isub + v2 = isub v0, v1 + return v2 +} + +; Tied operand is live after instruction. +function %tied_alive() -> i32 { +block0: + v0 = iconst.i32 12 + v1 = iconst.i32 13 + ; check: $(v0c=$V) = copy v0 + ; check: v2 = isub $v0c, v1 + v2 = isub v0, v1 + ; check: v3 = iadd v2, v0 + v3 = iadd v2, v0 + return v3 +} + +; Fixed register constraint. +function %fixed_op() -> i32 { +block0: + ; check: ,%rax] + ; sameln: v0 = iconst.i32 12 + v0 = iconst.i32 12 + v1 = iconst.i32 13 + ; The dynamic shift amount must be in %rcx + ; check: regmove v0, %rax -> %rcx + v2 = ishl v1, v0 + return v2 +} + +; Fixed register constraint twice. +function %fixed_op_twice() -> i32 { +block0: + ; check: ,%rax] + ; sameln: v0 = iconst.i32 12 + v0 = iconst.i32 12 + v1 = iconst.i32 13 + ; The dynamic shift amount must be in %rcx + ; check: regmove v0, %rax -> %rcx + v2 = ishl v1, v0 + ; check: regmove v0, %rcx -> $REG + ; check: regmove v2, $REG -> %rcx + v3 = ishl v0, v2 + + return v3 +} + +; Tied use of a diverted register. +function %fixed_op_twice() -> i32 { +block0: + ; check: ,%rax] + ; sameln: v0 = iconst.i32 12 + v0 = iconst.i32 12 + v1 = iconst.i32 13 + ; The dynamic shift amount must be in %rcx + ; check: regmove v0, %rax -> %rcx + ; check: v2 = ishl v1, v0 + v2 = ishl v1, v0 + + ; Now v0 is globally allocated to %rax, but diverted to %rcx. + ; Check that the tied def gets the diverted register. + v3 = isub v0, v2 + ; not: regmove + ; check: ,%rcx] + ; sameln: isub + ; Move it into place for the return value. + ; check: regmove v3, %rcx -> %rax + return v3 +} diff --git a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif new file mode 100644 index 0000000000..58ec61f0d8 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif @@ -0,0 +1,23 @@ +test regalloc +target x86_64 + +; Test that fallthrough returns are visited by reload and coloring. + +function %foo() -> f64 { + fn0 = %bar() + +block0: + v0 = f64const 0.0 + call fn0() + fallthrough_return v0 +} +; check: fill v0 + +function %foo() -> f64 { + fn0 = %bar() -> f64, f64 + +block0: + v0, v1 = call fn0() + fallthrough_return v1 +} +; check: regmove v1, %xmm1 -> %xmm0 diff --git a/cranelift/filetests/filetests/regalloc/ghost-param.clif b/cranelift/filetests/filetests/regalloc/ghost-param.clif new file mode 100644 index 0000000000..d51f4a7f72 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/ghost-param.clif @@ -0,0 +1,45 @@ +test regalloc +target x86_64 haswell + +; This test case would create a block parameter that was a ghost value. +; The coalescer would insert a copy of the ghost value, leading to verifier errors. +; +; We don't allow block parameters to be ghost values any longer. +; +; Test case by binaryen fuzzer! + +function %pr215(i64 vmctx [%rdi]) system_v { +block0(v0: i64): + v10 = iconst.i64 0 + v1 = bitcast.f64 v10 + jump block5(v1) + +block5(v9: f64): + v11 = iconst.i64 0xffff_ffff_ff9a_421a + v4 = bitcast.f64 v11 + v6 = iconst.i32 0 + v7 = iconst.i32 1 + brnz v7, block4(v6) + jump block8 + +block8: + v8 = iconst.i32 0 + jump block7(v8) + +block7(v5: i32): + brnz v5, block3(v4) + jump block5(v4) + +block4(v3: i32): + brnz v3, block2 + jump block3(v9) + +block3(v2: f64): + jump block2 + +block2: + jump block1 + +block1: + return +} diff --git a/cranelift/filetests/filetests/regalloc/global-constraints.clif b/cranelift/filetests/filetests/regalloc/global-constraints.clif new file mode 100644 index 0000000000..8149b9bae6 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/global-constraints.clif @@ -0,0 +1,30 @@ +test regalloc +target i686 + +; This test covers the troubles when values with global live ranges are defined +; by instructions with constrained register classes. +; +; The icmp_imm instrutions write their b1 result to the ABCD register class on +; 32-bit x86. So if we define 5 live values, they can't all fit. +function %global_constraints(i32) { +block0(v0: i32): + v1 = icmp_imm eq v0, 1 + v2 = icmp_imm ugt v0, 2 + v3 = icmp_imm sle v0, 3 + v4 = icmp_imm ne v0, 4 + v5 = icmp_imm sge v0, 5 + brnz v5, block1 + jump block2 + +block2: + return + +block1: + ; Make sure v1-v5 are live in. + v10 = band v1, v2 + v11 = bor v3, v4 + v12 = bor v10, v11 + v13 = bor v12, v5 + trapnz v13, user0 + return +} diff --git a/cranelift/filetests/filetests/regalloc/global-fixed.clif b/cranelift/filetests/filetests/regalloc/global-fixed.clif new file mode 100644 index 0000000000..851f012492 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/global-fixed.clif @@ -0,0 +1,16 @@ +test regalloc +target x86_64 haswell + +function %foo() system_v { +block4: + v3 = iconst.i32 0 + jump block3 + +block3: + v9 = udiv v3, v3 + jump block1 + +block1: + v19 = iadd.i32 v9, v9 + jump block3 +} diff --git a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif new file mode 100644 index 0000000000..04e9cc54fb --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif @@ -0,0 +1,44 @@ +test regalloc +target x86_64 + +function u0:587() fast { +block0: + v97 = iconst.i32 0 + v169 = iconst.i32 0 + v1729 = iconst.i32 0 + jump block100(v97, v97, v97, v97, v97) + +block100(v1758: i32, v1784: i32, v1845: i32, v1856: i32, v1870: i32): + v1762 = iconst.i32 0 + v1769 = iconst.i32 0 + v1774 = iconst.i32 0 + v1864 = iconst.i32 0 + v1897 = iconst.i32 0 + jump block102(v1774, v1784, v1845, v1856, v1870, v1758, v1762, v169, v1729, v97, v169, v169, v169, v169) + +block102(v1785: i32, v1789: i32, v1843: i32, v1854: i32, v1868: i32, v1882: i32, v1890: i32, v1901: i32, v1921: i32, v1933: i32, v2058: i32, v2124: i32, v2236: i32, v2366: i32): + v1929 = iconst.i32 0 + v1943 = iconst.i32 0 + v1949 = iconst.i32 0 + jump block123(v1897, v1769) + +block123(v1950: i32, v1979: i32): + v1955 = iconst.i32 0 + brz v1955, block125 + jump block122(v1929, v1843, v1864, v2058, v1882, v1897, v1943, v1868, v2124, v1901) + +block125: + v1961 = iadd_imm.i32 v1949, 0 + v1952 = iconst.i32 0 + v1962 = iconst.i64 0 + v1963 = load.i32 v1962 + brz v1963, block123(v1952, v1961) + jump block127 + +block127: + v1966 = iconst.i32 0 + jump block122(v1963, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966) + +block122(v1967: i32, v1971: i32, v1972: i32, v1978: i32, v2032: i32, v2041: i32, v2053: i32, v2076: i32, v2085: i32, v2096: i32): + trap user0 +} diff --git a/cranelift/filetests/filetests/regalloc/infinite-interference.clif b/cranelift/filetests/filetests/regalloc/infinite-interference.clif new file mode 100644 index 0000000000..b7a7736405 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/infinite-interference.clif @@ -0,0 +1,37 @@ +test regalloc +target riscv32 + +; Here, the coalescer initially builds vreg0 = [v1, v2, v3] +; +; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to +; resolve that conflict since v1 will just interfere with the inserted copy too. + +;function %c1(i32) -> i32 { +;block0(v0: i32): +; v1 = iadd_imm v0, 1 +; v2 = iconst.i32 1 +; brz v1, block1(v2) +; jump block2 +; +;block1(v3: i32): +; return v3 +; +;block2: +; jump block1(v1) +;} + +; Same thing with v1 and v2 swapped to reverse the order of definitions. + +function %c2(i32) -> i32 { +block0(v0: i32): + v1 = iadd_imm v0, 1 + v2 = iconst.i32 1 + brz v2, block1(v1) + jump block2 + +block1(v3: i32): + return v3 + +block2: + jump block1(v2) +} diff --git a/cranelift/filetests/filetests/regalloc/iterate.clif b/cranelift/filetests/filetests/regalloc/iterate.clif new file mode 100644 index 0000000000..2c7d691765 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/iterate.clif @@ -0,0 +1,164 @@ +test regalloc +target x86_64 haswell + +function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { +block0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64): + v32 = iconst.i32 0 + v6 = bitcast.f32 v32 + v7 = iconst.i64 0 + v33 = iconst.i64 0 + v8 = bitcast.f64 v33 + v34 = iconst.i32 0xbe99_999a + v9 = bitcast.f32 v34 + v10 = iconst.i32 40 + v11 = iconst.i32 -7 + v35 = iconst.i32 0x40b0_0000 + v12 = bitcast.f32 v35 + v13 = iconst.i64 6 + v36 = iconst.i64 0x4020_0000_0000_0000 + v14 = bitcast.f64 v36 + v44 = iconst.i64 0 + v37 = icmp slt v0, v44 + brnz v37, block2 + jump block11 + +block11: + v38 = fcvt_from_sint.f64 v0 + jump block3(v38) + +block2: + v45 = iconst.i32 1 + v39 = ushr.i64 v0, v45 + v40 = band_imm.i64 v0, 1 + v41 = bor v39, v40 + v42 = fcvt_from_sint.f64 v41 + v43 = fadd v42, v42 + jump block3(v43) + +block3(v15: f64): + v16 = fpromote.f64 v9 + v46 = uextend.i64 v10 + v17 = fcvt_from_sint.f64 v46 + v18 = fcvt_from_sint.f64 v11 + v19 = fpromote.f64 v12 + v54 = iconst.i64 0 + v47 = icmp.i64 slt v13, v54 + brnz v47, block4 + jump block12 + +block12: + v48 = fcvt_from_sint.f64 v13 + jump block5(v48) + +block4: + v55 = iconst.i32 1 + v49 = ushr.i64 v13, v55 + v50 = band_imm.i64 v13, 1 + v51 = bor v49, v50 + v52 = fcvt_from_sint.f64 v51 + v53 = fadd v52, v52 + jump block5(v53) + +block5(v20: f64): + v63 = iconst.i64 0 + v56 = icmp.i64 slt v7, v63 + brnz v56, block6 + jump block13 + +block13: + v57 = fcvt_from_sint.f64 v7 + jump block7(v57) + +block6: + v64 = iconst.i32 1 + v58 = ushr.i64 v7, v64 + v59 = band_imm.i64 v7, 1 + v60 = bor v58, v59 + v61 = fcvt_from_sint.f64 v60 + v62 = fadd v61, v61 + jump block7(v62) + +block7(v21: f64): + v22 = fadd v21, v14 + v23 = fadd.f64 v20, v22 + v24 = fadd.f64 v19, v23 + v25 = fadd.f64 v18, v24 + v26 = fadd.f64 v17, v25 + v27 = fadd.f64 v2, v26 + v28 = fadd.f64 v16, v27 + v29 = fadd.f64 v15, v28 + v30 = x86_cvtt2si.i64 v29 + v69 = iconst.i64 0x8000_0000_0000_0000 + v65 = icmp ne v30, v69 + brnz v65, block8 + jump block15 + +block15: + v66 = fcmp uno v29, v29 + brz v66, block9 + jump block16 + +block16: + trap bad_toint + +block9: + v70 = iconst.i64 0xc3e0_0000_0000_0000 + v67 = bitcast.f64 v70 + v68 = fcmp gt v67, v29 + brz v68, block10 + jump block17 + +block17: + trap int_ovf + +block10: + jump block8 + +block8: + jump block1(v30) + +block1(v31: i64): + return v31 +} + +function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { + gv1 = vmctx + gv0 = iadd_imm.i64 gv1, 48 + sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash_system_v + +block0(v0: i64): + v1 = iconst.i32 32 + v2 = iconst.i64 64 + v3 = iconst.i32 9 + v4 = iconst.i64 1063 + v5 = iadd_imm v0, 48 + v6 = load.i32 v5 + v7 = icmp uge v3, v6 + ; If we're unlucky, there are no ABCD registers available for v7 at this branch. + brz v7, block2 + jump block4 + +block4: + trap oob + +block2: + v8 = load.i64 v5+8 + v9 = uextend.i64 v3 + v16 = iconst.i64 16 + v10 = imul v9, v16 + v11 = iadd v8, v10 + v12 = load.i64 v11 + brnz v12, block3 + jump block5 + +block5: + trap icall_null + +block3: + v13 = load.i64 v11+8 + v14 = call_indirect.i64 sig0, v12(v1, v2, v13, v4) + jump block1(v14) + +block1(v15: i64): + return v15 +} diff --git a/cranelift/filetests/filetests/regalloc/multi-constraints.clif b/cranelift/filetests/filetests/regalloc/multi-constraints.clif new file mode 100644 index 0000000000..b01be532f8 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/multi-constraints.clif @@ -0,0 +1,51 @@ +test regalloc +target x86_64 haswell + +; Test combinations of constraints. +; +; The x86 ushr instruction requires its second operand to be passed in %rcx and its output is +; tied to the first input operand. +; +; If we pass the same value to both operands, both constraints must be satisfied. + +; Found by the Binaryen fuzzer in PR221. +; +; Conditions triggering the problem: +; +; - The same value used for a tied operand and a fixed operand. +; - The common value is already in %rcx. +; - The tied output value is live outside the block. +; +; Under these conditions, Solver::add_tied_input() would create a variable for the tied input +; without considering the fixed constraint. +function %pr221(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + v4 = ushr v3, v3 + jump block1 + +block1: + return v4 +} + +; Found by the Binaryen fuzzer in PR218. +; +; This is a similar situation involving combined constraints on the ushr instruction: +; +; - The %rcx register is already in use by a globally live value. +; - The ushr x, x result is also a globally live value. +; +; Since the ushr x, x result is forced to be placed in %rcx, we must set the replace_global_defines +; flag so it can be reassigned to a different global register. +function %pr218(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { +block0(v0: i64, v1: i64, v2: i64, v3: i64): + ; check: regmove v3, %rcx -> + v4 = ushr v0, v0 + ; check: v4 = copy + jump block1 + +block1: + ; v3 is globally live in %rcx. + ; v4 is also globally live. Needs to be assigned something else for the trip across the CFG edge. + v5 = iadd v3, v4 + return v5 +} diff --git a/cranelift/filetests/filetests/regalloc/multiple-returns.clif b/cranelift/filetests/filetests/regalloc/multiple-returns.clif new file mode 100644 index 0000000000..3481747a60 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/multiple-returns.clif @@ -0,0 +1,23 @@ +test regalloc +target x86_64 + +; Return the same value twice. This needs a copy so that each value can be +; allocated its own register. +function %multiple_returns() -> i64, i64 { +block0: + v2 = iconst.i64 0 + return v2, v2 +} +; check: v2 = iconst.i64 0 +; check: v3 = copy v2 +; check: return v2, v3 + +; Same thing, now with a fallthrough_return. +function %multiple_returns() -> i64, i64 { +block0: + v2 = iconst.i64 0 + fallthrough_return v2, v2 +} +; check: v2 = iconst.i64 0 +; check: v3 = copy v2 +; check: fallthrough_return v2, v3 diff --git a/cranelift/filetests/filetests/regalloc/output-interference.clif b/cranelift/filetests/filetests/regalloc/output-interference.clif new file mode 100644 index 0000000000..513c81f4e5 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/output-interference.clif @@ -0,0 +1,14 @@ +test regalloc +target x86_64 haswell + +function %test(i64) -> i64 system_v { +block0(v0: i64): + v2 = iconst.i64 12 + ; This division clobbers two of its fixed input registers on x86. + ; These are FixedTied constraints that the spiller needs to resolve. + v5 = udiv v0, v2 + v6 = iconst.i64 13 + v9 = udiv v0, v6 + v10 = iadd v5, v9 + return v10 +} diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif new file mode 100644 index 0000000000..6a723f02f5 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/reload-208.clif @@ -0,0 +1,112 @@ +test regalloc +target x86_64 haswell + +; regex: V=v\d+ +; regex: BB=block\d+ + +; Filed as https://github.com/bytecodealliance/cranelift/issues/208 +; +; The verifier complains about a branch argument that is not in the same virtual register as the +; corresponding block argument. +; +; The problem was the reload pass rewriting block arguments on "brnz v9, block3(v9)" + +function %pr208(i64 vmctx [%rdi]) system_v { + gv1 = vmctx + gv0 = iadd_imm.i64 gv1, -8 + heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 + sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v + sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v + fn0 = u0:1 sig0 + fn1 = u0:3 sig1 + +block0(v0: i64): + v1 = iconst.i32 0 + v2 = call fn0(v0) + v20 = iconst.i32 0x4ffe + v16 = icmp uge v2, v20 + brz v16, block5 + jump block9 + +block9: + trap heap_oob + +block5: + v17 = uextend.i64 v2 + v18 = iadd_imm.i64 v0, -8 + v19 = load.i64 v18 + v3 = iadd v19, v17 + v4 = load.i32 v3 + v21 = iconst.i32 0 + v5 = icmp eq v4, v21 + v6 = bint.i32 v5 + brnz v6, block2 + jump block3(v4) + + ; check: block5: + ; check: jump block3(v4) + ; check: $(splitEdge=$BB): + ; nextln: jump block3(v9) + +block3(v7: i32): + call fn1(v0, v7) + v26 = iconst.i32 0x4ffe + v22 = icmp uge v7, v26 + brz v22, block6 + jump block10 + +block10: + trap heap_oob + +block6: + v23 = uextend.i64 v7 + v24 = iadd_imm.i64 v0, -8 + v25 = load.i64 v24 + v8 = iadd v25, v23 + v9 = load.i32 v8+56 + ; check: v9 = spill + ; check: brnz $V, $splitEdge + brnz v9, block3(v9) + jump block4 + +block4: + jump block2 + +block2: + v10 = iconst.i32 0 + v31 = iconst.i32 0x4ffe + v27 = icmp uge v10, v31 + brz v27, block7 + jump block11 + +block11: + trap heap_oob + +block7: + v28 = uextend.i64 v10 + v29 = iadd_imm.i64 v0, -8 + v30 = load.i64 v29 + v11 = iadd v30, v28 + v12 = load.i32 v11+12 + call fn1(v0, v12) + v13 = iconst.i32 0 + v36 = iconst.i32 0x4ffe + v32 = icmp uge v13, v36 + brz v32, block8 + jump block12 + +block12: + trap heap_oob + +block8: + v33 = uextend.i64 v13 + v34 = iadd_imm.i64 v0, -8 + v35 = load.i64 v34 + v14 = iadd v35, v33 + v15 = load.i32 v14+12 + call fn1(v0, v15) + jump block1 + +block1: + return +} diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif new file mode 100644 index 0000000000..ed6374c9fb --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/reload-779.clif @@ -0,0 +1,23 @@ +test compile +target x86_64 + +; Filed as https://github.com/bytecodealliance/cranelift/issues/779 +; +; The copy_nop optimisation to reload (see Issue 773) was creating +; copy_nop instructions for types for which there were no encoding. + +function u0:0(i64, i64, i64) system_v { + sig0 = () system_v + sig1 = (i16) system_v + fn1 = u0:94 sig0 + fn2 = u0:95 sig1 + +block0(v0: i64, v1: i64, v2: i64): + v3 = iconst.i16 0 + jump block1(v3) + +block1(v4: i16): + call fn1() + call fn2(v4) + jump block1(v4) +} diff --git a/cranelift/filetests/filetests/regalloc/reload.clif b/cranelift/filetests/filetests/regalloc/reload.clif new file mode 100644 index 0000000000..88b20c1501 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/reload.clif @@ -0,0 +1,46 @@ +test regalloc +target riscv32 enable_e + +; regex: V=v\d+ + +; Check that we can handle a function return value that got spilled. +function %spill_return() -> i32 { + fn0 = %foo() -> i32 system_v + +block0: + v0 = call fn0() + ; check: $(reg=$V) = call fn0 + ; check: v0 = spill $reg + v2 = call fn0() + ; check: v2 = call fn0 + return v0 + ; check: $(reload=$V) = fill v0 + ; check: return $reload +} + +; Check that copies where the arg has been spilled are replaced with fills. +; +; RV32E has 6 registers for function arguments so the 7th, v6, will be placed +; on the stack. +function %spilled_copy_arg(i32, i32, i32, i32, i32, i32, i32) -> i32 { + +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): + ; not: copy + ; check: v10 = fill v6 + v10 = copy v6 + return v10 +} + +; Check that copies where the result has been spilled are replaced with spills. +; +; v1 is live across a call so it will be spilled. +function %spilled_copy_result(i32) -> i32 { + fn0 = %foo(i32) + +block0(v0: i32): + ; not: copy + ; check: v1 = spill v0 + v1 = copy v0 + call fn0(v1) + return v1 +} diff --git a/cranelift/filetests/filetests/regalloc/schedule-moves.clif b/cranelift/filetests/filetests/regalloc/schedule-moves.clif new file mode 100644 index 0000000000..f46d8958f7 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/schedule-moves.clif @@ -0,0 +1,39 @@ +test regalloc +target i686 haswell + +function %pr165() system_v { +block0: + v0 = iconst.i32 0x0102_0304 + v1 = iconst.i32 0x1102_0304 + v2 = iconst.i32 0x2102_0304 + v20 = ishl v1, v0 + v21 = ishl v2, v0 + v22 = sshr v1, v0 + v23 = sshr v2, v0 + v24 = ushr v1, v0 + v25 = ushr v2, v0 + istore8 v0, v1+0x2710 + istore8 v1, v0+0x2710 + return +} + +; Same as above, but use so many registers that spilling is required. +; Note: This is also a candidate for using xchg instructions. +function %emergency_spill() system_v { +block0: + v0 = iconst.i32 0x0102_0304 + v1 = iconst.i32 0x1102_0304 + v2 = iconst.i32 0x2102_0304 + v3 = iconst.i32 0x3102_0304 + v4 = iconst.i32 0x4102_0304 + v20 = ishl v1, v0 + v21 = ishl v2, v3 + v22 = sshr v1, v0 + v23 = sshr v2, v0 + v24 = ushr v1, v0 + v25 = ushr v2, v0 + istore8 v0, v1+0x2710 + istore8 v1, v0+0x2710 + istore8 v3, v4+0x2710 + return +} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif new file mode 100644 index 0000000000..9737d4e163 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif @@ -0,0 +1,100 @@ +test compile +set opt_level=speed +set enable_pinned_reg=true +target x86_64 haswell + +function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { +block0(v0: i32, v1: i32, v2: i32, v3: i64): + v236 = iconst.i32 0x4de9_bd37 + v424 = iconst.i32 0 + jump block37(v424) + +block37(v65: i32): + v433 = iconst.i32 0 + jump block40(v433) + +block40(v70: i32): + v75 = iconst.i32 0 + v259 = iconst.i32 0 + v78 -> v259 + v449 = iconst.i32 0 + v450, v451 = x86_sdivmodx v75, v449, v259 + v79 -> v450 + v269 = iconst.i32 0 + v270 = ushr_imm v269, 31 + v271 = iadd v269, v270 + v98 -> v271 + v100 = iconst.i32 -31 + v272 = iconst.i32 0x4de9_bd37 + v490, v273 = x86_smulx v100, v272 + v493 = iconst.i32 0 + jump block61(v493) + +block61(v103: i32): + v104 = iconst.i32 -23 + v105 = iconst.i32 -23 + v106 = popcnt v105 + v500 = sshr_imm v104, 31 + v501 = iconst.i32 0 + jump block64(v501) + +block64(v107: i32): + v108 = iconst.i32 0 + v109 = iconst.i32 0 + v278 = iconst.i32 0 + v507, v279 = x86_smulx v109, v278 + v280 = isub v279, v109 + v281 = sshr_imm v280, 11 + v282 = iconst.i32 0 + v283 = iadd v281, v282 + v111 -> v283 + v112 = rotr v108, v283 + jump block65 + +block65: + v509 = iconst.i32 0 + v510, v511 = x86_sdivmodx v107, v509, v112 + v113 -> v510 + v114 = iconst.i32 0 + v517 = iconst.i32 0 + v518, v519 = x86_sdivmodx v103, v517, v114 + v115 -> v518 + v534 = iconst.i32 0 + v122 -> v534 + v541 = iconst.i32 0 + v542, v543 = x86_sdivmodx v271, v541, v122 + v123 -> v542 + v289 = iconst.i32 0 + v125 -> v289 + v550 = iconst.i32 0 + v551, v552 = x86_sdivmodx v79, v550, v289 + v126 -> v551 + v130 = iconst.i32 0 + v558 = iconst.i32 0 + v559, v560 = x86_sdivmodx v70, v558, v130 + v131 -> v559 + v305 = iconst.i32 0 + v140 -> v305 + v577 = iconst.i32 0 + v578, v579 = x86_sdivmodx v65, v577, v305 + v141 -> v578 + v166 = iconst.i32 0 + v167 = iconst.i32 -31 + v318 = iconst.i32 0x4de9_bd37 + v650, v319 = x86_smulx v167, v318 + v320 = isub v319, v167 + v321 = sshr_imm v320, 4 + v322 = iconst.i32 0 + v323 = iadd v321, v322 + v169 -> v323 + v652 = iconst.i32 0 + v653, v654 = x86_sdivmodx v166, v652, v323 + v170 -> v653 + v171 = iconst.i32 -23 + v172 = iconst.i32 -23 + v173 = popcnt v172 + v174 = popcnt v173 + v660 = sshr_imm v171, 31 + v661, v662 = x86_sdivmodx v171, v660, v174 + trap user0 +} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif new file mode 100644 index 0000000000..8a9a040eb1 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif @@ -0,0 +1,137 @@ +test compile +set opt_level=speed +set enable_pinned_reg=true +target x86_64 haswell + +function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { +block0(v0: i32, v1: i32, v2: i32, v3: i64): + v5 = iconst.i32 -8 + v114 = iconst.i32 0 + v16 = iconst.i32 -8 + v17 = popcnt v16 + v192 = ifcmp_imm v17, -1 + trapif ne v192, user0 + jump block12 + +block12: + v122 = iconst.i32 0 + v123 = ushr_imm v122, 31 + v124 = iadd v122, v123 + v20 -> v124 + v25 = iconst.i32 -19 + v204 = iconst.i32 0 + v31 -> v204 + v210 = ifcmp_imm v31, -1 + trapif ne v210, user0 + jump block18 + +block18: + v215 = iconst.i32 0 + jump block19(v215) + +block19(v32: i32): + v35 = iconst.i32 0 + v218 = ifcmp_imm v35, -1 + trapif ne v218, user0 + jump block21 + +block21: + v223 = iconst.i32 0 + jump block22(v223) + +block22(v36: i32): + v136 = iconst.i32 0 + v40 -> v136 + v227 = ifcmp_imm v136, -1 + trapif ne v227, user0 + jump block24 + +block24: + v232 = iconst.i32 0 + jump block25(v232) + +block25(v41: i32): + v142 = iconst.i32 0 + v45 -> v142 + v236 = ifcmp_imm v142, -1 + trapif ne v236, user0 + jump block27 + +block27: + v241 = iconst.i32 0 + jump block28(v241) + +block28(v46: i32): + v49 = iconst.i32 0 + v244 = ifcmp_imm v49, -1 + trapif ne v244, user0 + jump block30 + +block30: + v254 = iconst.i32 0 + v53 -> v254 + v54 = iconst.i32 -23 + v55 = popcnt v54 + v143 = iconst.i32 0x4de9_bd37 + v260, v144 = x86_smulx v55, v143 + v145 = iconst.i32 0 + v146 = sshr_imm v145, 4 + v147 = iconst.i32 0 + v148 = iadd v146, v147 + v57 -> v148 + v58 = ishl v53, v148 + jump block35 + +block35: + v262 = iconst.i32 0 + v263, v264 = x86_sdivmodx v46, v262, v58 + v59 -> v263 + v270 = iconst.i32 0 + v271, v272 = x86_sdivmodx v41, v270, v59 + v60 -> v271 + v61 = f32const 0.0 + v280 = iconst.i32 0 + v281 = ffcmp v61, v61 + trapff ord v281, user0 + jump block41(v280) + +block41(v62: i32): + v157 = iconst.i32 0 + v158 = sshr_imm v157, 4 + v159 = iconst.i32 0 + v160 = iadd v158, v159 + v75 -> v160 + v308 = ifcmp_imm v160, -1 + trapif ne v308, user0 + jump block52 + +block52: + v87 = iconst.i32 -23 + v88 = iconst.i32 -23 + v89 = popcnt v88 + v161 = iconst.i32 0x4de9_bd37 + v324, v162 = x86_smulx v89, v161 + v163 = isub v162, v89 + v164 = sshr_imm v163, 4 + v165 = iconst.i32 0 + v166 = iadd v164, v165 + v91 -> v166 + v326 = iconst.i32 0 + v327, v328 = x86_sdivmodx v87, v326, v166 + v92 -> v327 + v351 = iconst.i32 0 + v99 -> v351 + v358 = iconst.i32 0 + v359, v360 = x86_sdivmodx v36, v358, v99 + v100 -> v359 + v102 = iconst.i32 0 + v103 = rotr.i32 v32, v102 + v366 = iconst.i32 0 + v367, v368 = x86_sdivmodx v25, v366, v103 + v104 -> v367 + v383 = iconst.i32 0 + v107 -> v383 + v390 = iconst.i32 0 + v391, v392 = x86_sdivmodx v124, v390, v107 + trap user0 +} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif new file mode 100644 index 0000000000..475bfa0f47 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif @@ -0,0 +1,173 @@ +test compile +set opt_level=speed +set enable_pinned_reg=true +target x86_64 haswell + +;; Test for the issue #1123; https://github.com/bytecodealliance/cranelift/issues/1123 + +function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { +block0(v0: i32, v1: i32, v2: i32, v3: i64): + v351 = iconst.i32 0x4de9_bd37 + v31 = iconst.i32 -23 + v35 = iconst.i32 0 + v36 = iconst.i32 -31 + v357 = iconst.i32 0x4de9_bd37 + v530, v358 = x86_smulx v36, v357 + v359 = isub v358, v36 + v360 = sshr_imm v359, 4 + v361 = iconst.i32 0 + v362 = iadd v360, v361 + v38 -> v362 + v532 = sshr_imm v35, 31 + v533, v534 = x86_sdivmodx v35, v532, v362 + v39 -> v533 + v53 = iconst.i32 0 + v547 = ifcmp_imm v53, -1 + trapif ne v547, user0 + jump block30 + +block30: + v75 = iconst.i32 0 + v581 = ifcmp_imm v75, -1 + trapif ne v581, user0 + jump block42 + +block42: + v136 = iconst.i32 0 + v691 = ifcmp_imm v136, -1 + trapif ne v691, user0 + jump block81 + +block81: + v158 = iconst.i32 0 + v725 = ifcmp_imm v158, -1 + trapif ne v725, user0 + jump block93 + +block93: + v760 = iconst.i32 0 + jump block106(v760) + +block106(v175: i32): + v179 = iconst.i32 0 + v180 = icmp_imm eq v179, 0 + v183 = iconst.i32 0 + v766 = ifcmp_imm v183, -1 + trapif ne v766, user0 + jump block108 + +block108: + v771 = iconst.i32 0 + jump block109(v771) + +block109(v184: i32): + v785 = iconst.i32 0 + v193 -> v785 + v791 = ifcmp_imm v193, -1 + trapif ne v791, user0 + jump block117 + +block117: + v796 = iconst.i32 0 + jump block118(v796) + +block118(v194: i32): + v203 = iconst.i32 -63 + v809 = iconst.i32 0 + v207 -> v809 + v815 = ifcmp_imm v207, -1 + trapif ne v815, user0 + jump block126 + +block126: + v209 = iconst.i32 0 + v823 = ifcmp_imm v209, -1 + trapif ne v823, user0 + jump block129 + +block129: + v213 = iconst.i32 -23 + v214 = iconst.i32 -19 + v215 = icmp_imm eq v214, 0 + v216 = bint.i32 v215 + v217 = popcnt v216 + v435 = iconst.i32 0x7df7_df7d + v831, v436 = x86_smulx v217, v435 + v437 = isub v436, v217 + v438 = sshr_imm v437, 5 + v439 = ushr_imm v438, 31 + v440 = iadd v438, v439 + v219 -> v440 + v220 = rotr v213, v440 + v229 = iconst.i32 0 + v841 = iconst.i32 0 + v842, v843 = x86_sdivmodx v194, v841, v229 + v230 -> v842 + v849 = iconst.i32 0 + v850, v851 = x86_sdivmodx v184, v849, v230 + v231 -> v850 + v232 = iconst.i32 0 + v857 = iconst.i32 0 + v858, v859 = x86_sdivmodx v175, v857, v232 + v233 -> v858 + v915 = iconst.i32 0 + jump block163(v915) + +block163(v253: i32): + v255 = iconst.i32 0 + v256 = iconst.i32 -23 + v257 = iconst.i32 -19 + v258 = icmp_imm eq v257, 0 + v259 = bint.i32 v258 + v260 = popcnt v259 + v447 = iconst.i32 0x7df7_df7d + v921, v448 = x86_smulx v260, v447 + v449 = isub v448, v260 + v450 = sshr_imm v449, 5 + v451 = ushr_imm v450, 31 + v452 = iadd v450, v451 + v262 -> v452 + v263 = rotr v256, v452 + v264 = popcnt v263 + v265 = popcnt v264 + v266 = popcnt v265 + v267 = rotr v255, v266 + v268 = popcnt v267 + v923 = iconst.i32 0 + v924, v925 = x86_sdivmodx v253, v923, v268 + v269 -> v924 + v276 = iconst.i32 0 + v277 = iconst.i32 -63 + v278 = popcnt v277 + v947 = iconst.i32 0 + v948, v949 = x86_sdivmodx v276, v947, v278 + v279 -> v948 + v309 = iconst.i32 0 + v310 = iconst.i32 0 + v311 = iconst.i32 0 + v312 = icmp_imm eq v311, 0 + v313 = bint.i32 v312 + v314 = rotr v310, v313 + v315 = iconst.i32 -31 + v464 = iconst.i32 0 + v1020, v465 = x86_smulx v315, v464 + v466 = isub v465, v315 + v467 = sshr_imm v466, 4 + v468 = iconst.i32 0 + v469 = iadd v467, v468 + v317 -> v469 + v1022 = iconst.i32 0 + v1023, v1024 = x86_sdivmodx v314, v1022, v469 + v318 -> v1023 + v320 = iconst.i32 0 + v321 = iconst.i32 -19 + v322 = popcnt v321 + v1030 = iconst.i32 0 + v1031, v1032 = x86_sdivmodx v320, v1030, v322 + v323 -> v1031 + v1047 = iconst.i32 0 + v325 -> v1047 + v1054 = sshr_imm v309, 31 + v1055, v1056 = x86_sdivmodx v309, v1054, v325 + trap user0 +} diff --git a/cranelift/filetests/filetests/regalloc/spill-noregs.clif b/cranelift/filetests/filetests/regalloc/spill-noregs.clif new file mode 100644 index 0000000000..5acdd45b17 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/spill-noregs.clif @@ -0,0 +1,175 @@ +test regalloc +target x86_64 + +; Test case found by the Binaryen fuzzer. +; +; The spiller panics with a +; 'Ran out of GPR registers when inserting copy before v68 = icmp.i32 eq v66, v67', +; cranelift-codegen/src/regalloc/spilling.rs:425:28 message. +; +; The process_reg_uses() function is trying to insert a copy before the icmp instruction in block4 +; and runs out of registers to spill. Note that block7 has a lot of dead parameter values. +; +; The spiller was not releasing register pressure for dead block parameters. + +function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { +block0(v0: i32, v1: i64): + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = iconst.i32 0xffff_ffff_bb3f_4a2c + brz v4, block5 + jump block1 + +block1: + v5 = iconst.i32 0 + v6 = copy.i64 v3 + v7 = copy.i64 v3 + v8 = copy.i64 v3 + v9 = copy.i64 v3 + v10 = copy.i64 v3 + v11 = copy.i64 v3 + v12 = copy.i64 v3 + v13 = copy.i64 v3 + v14 = copy.i64 v3 + v15 = copy.i64 v3 + v16 = copy.i64 v3 + brnz v5, block4(v2, v3, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) + jump block2 + +block2: + v17 = iconst.i32 0 + v18 = copy.i64 v3 + v19 = copy.i64 v3 + v20 = copy.i64 v3 + v21 = copy.i64 v3 + v22 = copy.i64 v3 + v23 = copy.i64 v3 + v24 = copy.i64 v3 + v25 = copy.i64 v3 + v26 = copy.i64 v3 + v27 = copy.i64 v3 + v28 = copy.i64 v3 + brnz v17, block4(v2, v3, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) + jump block3 + +block3: + jump block1 + +block4(v29: i32, v30: i64, v31: i64, v32: i64, v33: i64, v34: i64, v35: i64, v36: i64, v37: i64, v38: i64, v39: i64, v40: i64, v41: i64): + jump block7(v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) + +block5: + jump block6 + +block6: + v42 = copy.i64 v3 + v43 = copy.i64 v3 + v44 = copy.i64 v3 + v45 = copy.i64 v3 + v46 = copy.i64 v3 + v47 = copy.i64 v3 + v48 = copy.i64 v3 + v49 = copy.i64 v3 + v50 = copy.i64 v3 + v51 = copy.i64 v3 + v52 = copy.i64 v3 + jump block7(v2, v3, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) + +block7(v53: i32, v54: i64, v55: i64, v56: i64, v57: i64, v58: i64, v59: i64, v60: i64, v61: i64, v62: i64, v63: i64, v64: i64, v65: i64): + v66 = iconst.i32 0 + v67 = iconst.i32 0 + v68 = icmp eq v66, v67 + v69 = bint.i32 v68 + jump block8 + +block8: + jump block9 + +block9: + v70 = iconst.i32 0xffff_ffff_ffff_912f + brz v70, block10 + jump block35 + +block10: + v71 = iconst.i32 0 + brz v71, block11 + jump block27 + +block11: + jump block12 + +block12: + jump block13 + +block13: + jump block14 + +block14: + jump block15 + +block15: + jump block16 + +block16: + jump block17 + +block17: + jump block18 + +block18: + jump block19 + +block19: + jump block20 + +block20: + jump block21 + +block21: + jump block22 + +block22: + jump block23 + +block23: + jump block24 + +block24: + jump block25 + +block25: + jump block26 + +block26: + jump block27 + +block27: + jump block28 + +block28: + jump block29 + +block29: + jump block30 + +block30: + jump block31 + +block31: + jump block32 + +block32: + jump block33 + +block33: + jump block34 + +block34: + jump block35 + +block35: + jump block36 + +block36: + trap user0 +} diff --git a/cranelift/filetests/filetests/regalloc/spill.clif b/cranelift/filetests/filetests/regalloc/spill.clif new file mode 100644 index 0000000000..23706cd2cf --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/spill.clif @@ -0,0 +1,223 @@ +test regalloc + +; Test the spiler on an ISA with few registers. +; RV32E has 16 registers, where: +; - %x0 is hardwired to zero. +; - %x1 is the return address. +; - %x2 is the stack pointer. +; - %x3 is the global pointer. +; - %x4 is the thread pointer. +; - %x10-%x15 are function arguments. +; +; regex: V=v\d+ +; regex: WS=\s+ + +target riscv32 enable_e + +; In straight-line code, the first value defined is spilled. +; That is in order: +; 1. The argument v1. +; 2. The link register. +; 3. The first computed value, v2 +function %pyramid(i32) -> i32 { +; check: ss0 = spill_slot 4 +; check: ss1 = spill_slot 4 +; check: ss2 = spill_slot 4 +; not: spill_slot +block0(v1: i32): +; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) + ; check: ,ss0]$WS v1 = spill $rv1 + ; nextln: ,ss1]$WS $(link=$V) = spill $rlink + ; not: spill + v2 = iadd_imm v1, 12 + ; check: $(r1v2=$V) = iadd_imm + ; nextln: ,ss2]$WS v2 = spill $r1v2 + ; not: spill + v3 = iadd_imm v2, 12 + v4 = iadd_imm v3, 12 + v5 = iadd_imm v4, 12 + v6 = iadd_imm v5, 12 + v7 = iadd_imm v6, 12 + v8 = iadd_imm v7, 12 + v9 = iadd_imm v8, 12 + v10 = iadd_imm v9, 12 + v11 = iadd_imm v10, 12 + v12 = iadd_imm v11, 12 + v13 = iadd_imm v12, 12 + v14 = iadd_imm v13, 12 + v33 = iadd v13, v14 + ; check: iadd v13 + v32 = iadd v33, v12 + v31 = iadd v32, v11 + v30 = iadd v31, v10 + v29 = iadd v30, v9 + v28 = iadd v29, v8 + v27 = iadd v28, v7 + v26 = iadd v27, v6 + v25 = iadd v26, v5 + v24 = iadd v25, v4 + v23 = iadd v24, v3 + v22 = iadd v23, v2 + ; check: $(r2v2=$V) = fill v2 + ; check: v22 = iadd v23, $r2v2 + v21 = iadd v22, v1 + ; check: $(r2v1=$V) = fill v1 + ; check: v21 = iadd v22, $r2v1 + ; check: $(rlink2=$V) = fill $link + return v21 + ; check: return v21, $rlink2 +} + +; All values live across a call must be spilled +function %across_call(i32) { + fn0 = %foo(i32) +block0(v1: i32): + ; check: v1 = spill + call fn0(v1) + ; check: call fn0 + call fn0(v1) + ; check: fill v1 + ; check: call fn0 + return +} + +; The same value used for two function arguments. +function %doubleuse(i32) { + fn0 = %xx(i32, i32) +block0(v0: i32): + ; check: $(c=$V) = copy v0 + call fn0(v0, v0) + ; check: call fn0(v0, $c) + return +} + +; The same value used as indirect callee and argument. +function %doubleuse_icall1(i32) { + sig0 = (i32) system_v +block0(v0: i32): + ; not:copy + call_indirect sig0, v0(v0) + return +} + +; The same value used as indirect callee and two arguments. +function %doubleuse_icall2(i32) { + sig0 = (i32, i32) system_v +block0(v0: i32): + ; check: $(c=$V) = copy v0 + call_indirect sig0, v0(v0, v0) + ; check: call_indirect sig0, v0(v0, $c) + return +} + +; Two arguments on the stack. +function %stackargs(i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { +; check: ss0 = incoming_arg 4 +; check: ss1 = incoming_arg 4, offset 4 +; not: incoming_arg +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32): + ; unordered: fill v6 + ; unordered: fill v7 + v10 = iadd v6, v7 + return v10 +} + +; More block arguments than registers. +function %blockargs(i32) -> i32 { +block0(v1: i32): + ; check: v1 = spill + v2 = iconst.i32 1 + jump block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) + +block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): + v22 = iadd v10, v11 + v23 = iadd v22, v12 + v24 = iadd v23, v13 + v25 = iadd v24, v14 + v26 = iadd v25, v15 + v27 = iadd v26, v16 + v28 = iadd v27, v17 + v29 = iadd v28, v18 + v30 = iadd v29, v19 + v31 = iadd v30, v20 + v32 = iadd v31, v21 + v33 = iadd v32, v1 + return v33 +} + +; Spilling a block argument to make room for a branch operand. +function %brargs(i32) -> i32 { +block0(v1: i32): + ; check: v1 = spill + v2 = iconst.i32 1 + brnz v1, block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) + jump block2 + +block2: + return v1 + +block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): + v22 = iadd v10, v11 + v23 = iadd v22, v12 + v24 = iadd v23, v13 + v25 = iadd v24, v14 + v26 = iadd v25, v15 + v27 = iadd v26, v16 + v28 = iadd v27, v17 + v29 = iadd v28, v18 + v30 = iadd v29, v19 + v31 = iadd v30, v20 + v32 = iadd v31, v21 + v33 = iadd v32, v1 + return v33 +} + +; In straight-line code, the first value defined is spilled. +; That is in order: +; 1. The argument v1. +; 2. The link register. +; 3. The first computed value, v2 +function %use_spilled_value(i32) -> i32 { +; check: ss0 = spill_slot 4 +; check: ss1 = spill_slot 4 +; check: ss2 = spill_slot 4 +block0(v1: i32): +; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) + ; check: ,ss0]$WS v1 = spill $rv1 + ; nextln: ,ss1]$WS $(link=$V) = spill $rlink + ; not: spill + v2 = iadd_imm v1, 12 + ; check: $(r1v2=$V) = iadd_imm + ; nextln: ,ss2]$WS v2 = spill $r1v2 + v3 = iadd_imm v2, 12 + v4 = iadd_imm v3, 12 + v5 = iadd_imm v4, 12 + v6 = iadd_imm v5, 12 + v7 = iadd_imm v6, 12 + v8 = iadd_imm v7, 12 + v9 = iadd_imm v8, 12 + v10 = iadd_imm v9, 12 + v11 = iadd_imm v10, 12 + v12 = iadd_imm v11, 12 + v13 = iadd_imm v12, 12 + v14 = iadd_imm v13, 12 + + ; Here we have maximum register pressure, and v2 has been spilled. + ; What happens if we use it? + v33 = iadd v2, v14 + v32 = iadd v33, v12 + v31 = iadd v32, v11 + v30 = iadd v31, v10 + v29 = iadd v30, v9 + v28 = iadd v29, v8 + v27 = iadd v28, v7 + v26 = iadd v27, v6 + v25 = iadd v26, v5 + v24 = iadd v25, v4 + v23 = iadd v24, v3 + v22 = iadd v23, v2 + v21 = iadd v22, v1 + v20 = iadd v21, v13 + v19 = iadd v20, v2 + return v21 +} diff --git a/cranelift/filetests/filetests/regalloc/unreachable_code.clif b/cranelift/filetests/filetests/regalloc/unreachable_code.clif new file mode 100644 index 0000000000..4c288a91dd --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/unreachable_code.clif @@ -0,0 +1,47 @@ +; Use "test compile" here otherwise the dead blocks won't be eliminated. +test compile + +set enable_probestack=0 +target x86_64 haswell + +; This function contains unreachable blocks which trip up the register +; allocator if they don't get cleared out. +function %unreachable_blocks(i64 vmctx) -> i32 baldrdash_system_v { +block0(v0: i64): + v1 = iconst.i32 0 + v2 = iconst.i32 0 + jump block2 + +block2: + jump block4 + +block4: + jump block2 + +; Everything below this point is unreachable. + +block3(v3: i32): + v5 = iadd.i32 v2, v3 + jump block6 + +block6: + jump block6 + +block7(v6: i32): + v7 = iadd.i32 v5, v6 + jump block8 + +block8: + jump block10 + +block10: + jump block8 + +block9(v8: i32): + v10 = iadd.i32 v7, v8 + jump block1(v10) + +block1(v11: i32): + return v11 +} + diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif new file mode 100644 index 0000000000..e239d0ad37 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/x86-regres.clif @@ -0,0 +1,49 @@ +test regalloc +target i686 + +; regex: V=v\d+ +; regex: BB=block\d+ + +; The value v9 appears both as the branch control and one of the block arguments +; in the brnz instruction in block2. It also happens that v7 and v9 are assigned +; to the same register, so v9 doesn't need to be moved before the brnz. +; +; This ended up confusong the constraint solver which had not made a record of +; the fixed register assignment for v9 since it was already in the correct +; register. +function %pr147(i32) -> i32 system_v { +block0(v0: i32): + v1 = iconst.i32 0 + v2 = iconst.i32 1 + v3 = iconst.i32 0 + jump block2(v3, v2, v0) + + ; check: $(splitEdge=$BB): + ; check: jump block2($V, $V, v9) + +block2(v4: i32, v5: i32, v7: i32): + ; check: block2 + v6 = iadd v4, v5 + v8 = iconst.i32 -1 + ; v7 is killed here and v9 gets the same register. + v9 = iadd v7, v8 + ; check: v9 = iadd v7, v8 + ; Here v9 the brnz control appears to interfere with v9 the block argument, + ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The + ; add_var sanity checks got confused when no fixed assignment could be + ; found for v9. + ; + ; We should be able to handle this situation without making copies of v9. + brnz v9, block2(v5, v6, v9) + ; check: brnz v9, $splitEdge + jump block3 + +block3: + return v5 +} + +function %select_i64(i64, i64, i32) -> i64 { +block0(v0: i64, v1: i64, v2: i32): + v3 = select v2, v0, v1 + return v3 +} diff --git a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif new file mode 100644 index 0000000000..995e7c5f64 --- /dev/null +++ b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif @@ -0,0 +1,56 @@ +test compile +target x86_64 + +; This checks that code shrink is allowed while relaxing code, when code shrink +; has not run. + +function u0:0(i64, i64) -> i64 system_v { + ss1 = explicit_slot 8 + sig0 = (i64) -> i64 system_v + fn0 = u0:8 sig0 + +block0(v0: i64, v1: i64): + v3 = stack_addr.i64 ss1 + v5 = call fn0(v1) + v6 = iconst.i64 0 + v8 = iconst.i64 0 + jump block3(v6, v1, v8) + +block3(v39: i64, v40: i64, v42: i64): + v9 = load.i64 v3 + v11 = icmp_imm ugt v9, 1 + v12 = bint.i8 v11 + v13 = uextend.i32 v12 + v14 = icmp_imm eq v13, 0 + brnz v14, block4 + jump block5 + +block4: + v18 = icmp_imm.i64 eq v40, 0 + v19 = bint.i8 v18 + v20 = uextend.i32 v19 + brz v20, block6 + jump block7 + +block7: + trap user0 + +block5: + v22 = iconst.i32 1 + v23 = ishl.i64 v39, v22 + v25 = iconst.i64 1 + v26 = band.i64 v42, v25 + v27 = bor v23, v26 + v28 = iconst.i32 1 + v29 = ushr.i64 v42, v28 + v30 = load.i64 v3 + v31 = iconst.i32 1 + v32 = ushr v30, v31 + store v32, v3 + jump block3(v27, v40, v29) + +block6: + v38 = iconst.i64 0 + return v38 +} + diff --git a/cranelift/filetests/filetests/safepoint/basic.clif b/cranelift/filetests/filetests/safepoint/basic.clif new file mode 100644 index 0000000000..7e0088b23b --- /dev/null +++ b/cranelift/filetests/filetests/safepoint/basic.clif @@ -0,0 +1,71 @@ +test safepoint +set enable_safepoints=true +target x86_64 + +function %test(i32, r64, r64) -> r64 { + block0(v0: i32, v1:r64, v2:r64): + jump block1(v0) + block1(v3: i32): + v4 = irsub_imm v3, 1 + jump block2(v4) + block2(v5: i32): + resumable_trap interrupt + brz v5, block1(v5) + jump block3 + block3: + v6 = null.r64 + v7 = is_null v6 + brnz v7, block2(v0) + jump block4 + block4: + brnz v0, block5 + jump block6 + block5: + return v1 + block6: + return v2 +} + +; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast { +; nextln: block0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]): +; nextln: v10 = copy v0 +; nextln: jump block1(v10) +; nextln: +; nextln: block7: +; nextln: regmove.i32 v5, %rcx -> %rax +; nextln: jump block1(v5) +; nextln: +; nextln: block1(v3: i32 [%rax]): +; nextln: v8 = iconst.i32 1 +; nextln: v4 = isub v8, v3 +; nextln: jump block2(v4) +; nextln: +; nextln: block8: +; nextln: v9 = copy.i32 v0 +; nextln: regmove v9, %rax -> %rcx +; nextln: jump block2(v9) +; nextln: +; nextln: block2(v5: i32 [%rcx]): +; nextln: safepoint v1, v2 +; nextln: resumable_trap interrupt +; nextln: brz v5, block7 +; nextln: jump block3 +; nextln: +; nextln: block3: +; nextln: v6 = null.r64 +; nextln: v7 = is_null v6 +; nextln: brnz v7, block8 +; nextln: jump block4 +; nextln: +; nextln: block4: +; nextln: brnz.i32 v0, block5 +; nextln: jump block6 +; nextln: +; nextln: block5: +; nextln: regmove.r64 v1, %rsi -> %rax +; nextln: return v1 +; nextln: +; nextln: block6: +; nextln: regmove.r64 v2, %rdx -> %rax +; nextln: return v2 +; nextln: } diff --git a/cranelift/filetests/filetests/safepoint/call.clif b/cranelift/filetests/filetests/safepoint/call.clif new file mode 100644 index 0000000000..53c9246323 --- /dev/null +++ b/cranelift/filetests/filetests/safepoint/call.clif @@ -0,0 +1,58 @@ +test safepoint +set enable_safepoints=true +target x86_64 + +function %direct() -> r64 { + fn0 = %none() + fn1 = %one() -> r64 + fn2 = %two() -> i32, r64 + +block0: + call fn0() + v1 = call fn1() + v2, v3 = call fn2() + brz v2, block2 + jump block1 +block1: + return v1 +block2: + v4 = call fn1() + return v3 +} + +; sameln: function %direct() -> r64 [%rax] fast { +; nextln: ss0 = spill_slot 8 +; nextln: ss1 = spill_slot 8 +; nextln: sig0 = () fast +; nextln: sig1 = () -> r64 [%rax] fast +; nextln: sig2 = () -> i32 [%rax], r64 [%rdx] fast +; nextln: fn0 = %none sig0 +; nextln: fn1 = %one sig1 +; nextln: fn2 = %two sig2 +; nextln: +; nextln: block0: +; nextln: v5 = func_addr.i64 fn0 +; nextln: call_indirect sig0, v5() +; nextln: v6 = func_addr.i64 fn1 +; nextln: v9 = call_indirect sig1, v6() +; nextln: v1 = spill v9 +; nextln: v7 = func_addr.i64 fn2 +; nextln: safepoint v1 +; nextln: v2, v10 = call_indirect sig2, v7() +; nextln: v3 = spill v10 +; nextln: brz v2, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v11 = fill.r64 v1 +; nextln: regmove v11, %r15 -> %rax +; nextln: return v11 +; nextln: +; nextln: block2: +; nextln: v8 = func_addr.i64 fn1 +; nextln: safepoint v3 +; nextln: v4 = call_indirect sig1, v8() +; nextln: v12 = fill.r64 v3 +; nextln: regmove v12, %r15 -> %rax +; nextln: return v12 +; nextln: } diff --git a/cranelift/filetests/filetests/simple_gvn/basic.clif b/cranelift/filetests/filetests/simple_gvn/basic.clif new file mode 100644 index 0000000000..107c3897d1 --- /dev/null +++ b/cranelift/filetests/filetests/simple_gvn/basic.clif @@ -0,0 +1,44 @@ +test simple-gvn + +function %simple_redundancy(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + v3 = iadd v0, v1 + v4 = imul v2, v3 +; check: v4 = imul v2, v2 + return v4 +} + +function %cascading_redundancy(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + v3 = iadd v0, v1 + v4 = imul v2, v3 + v5 = imul v2, v2 + v6 = iadd v4, v5 +; check: v6 = iadd v4, v4 + return v6 +} + +function %redundancies_on_some_paths(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = iadd v0, v1 + brz v3, block1 + jump block3 + +block3: + v4 = iadd v0, v1 + jump block2(v4) +; check: jump block2(v3) + +block1: + v5 = iadd v0, v1 + jump block2(v5) +; check: jump block2(v3) + +block2(v6: i32): + v7 = iadd v0, v1 + v8 = iadd v6, v7 +; check: v8 = iadd v6, v3 + return v8 +} diff --git a/cranelift/filetests/filetests/simple_gvn/readonly.clif b/cranelift/filetests/filetests/simple_gvn/readonly.clif new file mode 100644 index 0000000000..802396f4f8 --- /dev/null +++ b/cranelift/filetests/filetests/simple_gvn/readonly.clif @@ -0,0 +1,25 @@ +test simple-gvn + +target x86_64 + +function %eliminate_redundant_global_loads(i32, i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = heap_addr.i64 heap0, v0, 1 + + v4 = iconst.i32 0 + store.i32 notrap aligned v4, v2 + store.i32 notrap aligned v4, v3 + + return +} +; check: v2 = heap_addr.i64 heap0, v0, 1 +; check: v3 -> v2 +; check: v4 = iconst.i32 0 +; check: store notrap aligned v4, v2 +; check: store notrap aligned v4, v2 +; check: return diff --git a/cranelift/filetests/filetests/simple_gvn/reject.clif b/cranelift/filetests/filetests/simple_gvn/reject.clif new file mode 100644 index 0000000000..c4613af4dc --- /dev/null +++ b/cranelift/filetests/filetests/simple_gvn/reject.clif @@ -0,0 +1,56 @@ +test simple-gvn + +function %other_side_effects(i32) -> i32 { +block0(v0: i32): + regmove v0, %10 -> %20 + regmove v0, %10 -> %20 + regmove v0, %20 -> %10 +; check: regmove v0, %10 -> %20 +; check: regmove v0, %10 -> %20 + return v0 +} + +function %differing_typevars() -> i64 { +block0: + v0 = iconst.i32 7 + v1 = iconst.i64 7 + v2 = iconst.i64 8 +; check: v0 = iconst.i32 7 +; check: v1 = iconst.i64 7 +; check: v2 = iconst.i64 8 + v3 = uextend.i64 v0 + v4 = iadd v2, v1 + v5 = iadd v4, v3 + return v5 +} + +function %cpu_flags() -> b1 { +block0: + v0 = iconst.i32 7 + v1 = iconst.i32 8 + v2 = ifcmp v0, v1 + v3 = trueif eq v2 + v4 = ifcmp v0, v1 + v5 = trueif eq v4 + v6 = bor v3, v5 +; check: v2 = ifcmp v0, v1 +; check: v3 = trueif eq v2 +; check: v4 = ifcmp v0, v1 +; check: v5 = trueif eq v4 + return v6 +} + +function %spill() -> i32 { +block0: + v0 = iconst.i32 7 + v1 = spill v0 + v2 = fill v1 + v3 = spill v0 + v4 = fill v1 + v5 = bor v2, v4 +; check: v1 = spill v0 +; check: v2 = fill v1 +; check: v3 = spill v0 +; check: v4 = fill v1 + return v5 +} diff --git a/cranelift/filetests/filetests/simple_gvn/scopes.clif b/cranelift/filetests/filetests/simple_gvn/scopes.clif new file mode 100644 index 0000000000..63a425ad3f --- /dev/null +++ b/cranelift/filetests/filetests/simple_gvn/scopes.clif @@ -0,0 +1,82 @@ +test simple-gvn + +function %two_diamonds(i32, i32, i32, i32, i32) { +block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32): + v5 = iconst.i32 16 + ; check: v5 = iconst.i32 16 + brz v0, block1 + jump block5 + +block5: + v6 = iconst.i32 17 + ; check: v6 = iconst.i32 17 + v7 = iconst.i32 16 + ; not: v7 = iconst.i32 16 + jump block2 + +block1: + v8 = iconst.i32 18 + ; check: v8 = iconst.i32 18 + v9 = iconst.i32 17 + ; check: v9 = iconst.i32 17 + v10 = iconst.i32 16 + ; not: v10 = iconst.i32 16 + jump block2 + +block2: + v11 = iconst.i32 19 + ; check: v11 = iconst.i32 19 + v12 = iconst.i32 18 + ; check: v12 = iconst.i32 18 + v13 = iconst.i32 17 + ; check: v13 = iconst.i32 17 + v14 = iconst.i32 16 + ; not: v14 = iconst.i32 16 + brz v1, block3 + jump block6 + +block6: + v15 = iconst.i32 20 + ; check: v15 = iconst.i32 20 + v16 = iconst.i32 19 + ; not: v16 = iconst.i32 19 + v17 = iconst.i32 18 + ; not: v17 = iconst.i32 18 + v18 = iconst.i32 17 + ; not: v18 = iconst.i32 17 + v19 = iconst.i32 16 + ; not: v19 = iconst.i32 16 + jump block4 + +block3: + v20 = iconst.i32 21 + ; check: v20 = iconst.i32 21 + v21 = iconst.i32 20 + ; check: v21 = iconst.i32 20 + v22 = iconst.i32 19 + ; not: v22 = iconst.i32 19 + v23 = iconst.i32 18 + ; not: v23 = iconst.i32 18 + v24 = iconst.i32 17 + ; not: v24 = iconst.i32 17 + v25 = iconst.i32 16 + ; not: v25 = iconst.i32 16 + jump block4 + +block4: + v26 = iconst.i32 22 + ; check: v26 = iconst.i32 22 + v27 = iconst.i32 21 + ; check: v27 = iconst.i32 21 + v28 = iconst.i32 20 + ; check: v28 = iconst.i32 20 + v29 = iconst.i32 19 + ; not: v29 = iconst.i32 19 + v30 = iconst.i32 18 + ; not: v30 = iconst.i32 18 + v31 = iconst.i32 17 + ; not: v31 = iconst.i32 17 + v32 = iconst.i32 16 + ; not: v32 = iconst.i32 16 + return +} diff --git a/cranelift/filetests/filetests/simple_preopt/branch.clif b/cranelift/filetests/filetests/simple_preopt/branch.clif new file mode 100644 index 0000000000..21cc7afda3 --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/branch.clif @@ -0,0 +1,81 @@ +test simple_preopt +target x86_64 + +function %icmp_to_brz_fold(i32) -> i32 { +block0(v0: i32): + v1 = icmp_imm eq v0, 0 + brnz v1, block1 + jump block2 +block1: + v3 = iconst.i32 1 + return v3 +block2: + v4 = iconst.i32 2 + return v4 +} +; sameln: function %icmp_to_brz_fold +; nextln: block0(v0: i32): +; nextln: v1 = icmp_imm eq v0, 0 +; nextln: brnz v0, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v3 = iconst.i32 1 +; nextln: return v3 +; nextln: +; nextln: block2: +; nextln: v4 = iconst.i32 2 +; nextln: return v4 +; nextln: } + +function %icmp_to_brz_inverted_fold(i32) -> i32 { +block0(v0: i32): + v1 = icmp_imm ne v0, 0 + brz v1, block1 + jump block2 +block1: + v3 = iconst.i32 1 + return v3 +block2: + v4 = iconst.i32 2 + return v4 +} +; sameln: function %icmp_to_brz_inve +; nextln: block0(v0: i32): +; nextln: v1 = icmp_imm ne v0, 0 +; nextln: brnz v0, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v3 = iconst.i32 1 +; nextln: return v3 +; nextln: +; nextln: block2: +; nextln: v4 = iconst.i32 2 +; nextln: return v4 +; nextln: } + +function %br_icmp_inversion(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + br_icmp ugt v0, v1, block1 + jump block2 +block1: + v2 = iconst.i32 1 + return v2 +block2: + v3 = iconst.i32 2 + return v3 +} +; sameln: function %br_icmp_inversio +; nextln: block0(v0: i32, v1: i32): +; nextln: br_icmp ule v0, v1, block2 +; nextln: jump block1 +; nextln: +; nextln: block1: +; nextln: v2 = iconst.i32 1 +; nextln: return v2 +; nextln: +; nextln: block2: +; nextln: v3 = iconst.i32 2 +; nextln: return v3 +; nextln: } diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif new file mode 100644 index 0000000000..101e4eb201 --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif @@ -0,0 +1,59 @@ +test simple_preopt +target x86_64 baseline + +; Cases where the denominator is created by an iconst + +function %indir_udiv32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 7 + v2 = udiv v0, v1 + ; check: iconst.i32 7 + ; check: iconst.i32 0x2492_4925 + ; check: umulhi v0, v3 + ; check: isub v0, v4 + ; check: ushr_imm v5, 1 + ; check: iadd v6, v4 + ; check: v8 = ushr_imm v7, 2 + ; check: v2 -> v8 + return v2 +} + +function %indir_sdiv32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -17 + v2 = sdiv v0, v1 + ; check: iconst.i32 -17 + ; check: iconst.i32 0xffff_ffff_8787_8787 + ; check: smulhi v0, v3 + ; check: sshr_imm v4, 3 + ; check: ushr_imm v5, 31 + ; check: v7 = iadd v5, v6 + ; check: v2 -> v7 + return v2 +} + +function %indir_udiv64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1337 + v2 = udiv v0, v1 + ; check: iconst.i64 1337 + ; check: iconst.i64 0xc411_9d95_2866_a139 + ; check: umulhi v0, v3 + ; check: v5 = ushr_imm v4, 10 + ; check: v2 -> v5 + return v2 +} + +function %indir_sdiv64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -90210 + v2 = sdiv v0, v1 + ; check: iconst.i64 0xffff_ffff_fffe_9f9e + ; check: iconst.i64 0xd181_4ee8_939c_b8bb + ; check: smulhi v0, v3 + ; check: sshr_imm v4, 14 + ; check: ushr_imm v5, 63 + ; check: v7 = iadd v5, v6 + ; check: v2 -> v7 + return v2 +} diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif new file mode 100644 index 0000000000..b1225a28d5 --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif @@ -0,0 +1,266 @@ +test simple_preopt +target i686 baseline + +; -------- U32 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_udiv32_p7(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 7 + ; check: iconst.i32 0x2492_4925 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: v7 = ushr_imm v6, 2 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift) +function %t_udiv32_p125(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 125 + ; check: iconst.i32 0x1062_4dd3 + ; check: umulhi v0, v2 + ; check: v4 = ushr_imm v3, 3 + ; check: v1 -> v4 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_udiv32_p641(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 641 + ; check: iconst.i32 0x0066_3d81 + ; check: v3 = umulhi v0, v2 + ; check: v1 -> v3 + return v1 +} + + +; -------- S32 -------- + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_sdiv32_n6(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -6 + ; check: iconst.i32 0xffff_ffff_d555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv32_n5(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -5 + ; check: iconst.i32 0xffff_ffff_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 31 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_sdiv32_n3(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -3 + ; check: iconst.i32 0x5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 31 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_sdiv32_p6(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 6 + ; check: iconst.i32 0x2aaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; case d > 0 && M < 0 (mull, add, shift, add-sign-bit) +function %t_sdiv32_p7(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 7 + ; check: iconst.i32 0xffff_ffff_9249_2493 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 2 + ; check: ushr_imm v5, 31 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv32_p625(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 625 + ; check: iconst.i32 0x68db_8bad + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 8 + ; check: ushr_imm v4, 31 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + + +; -------- U64 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_udiv64_p7(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 7 + ; check: iconst.i64 0x2492_4924_9249_2493 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: v7 = ushr_imm v6, 2 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift) +function %t_udiv64_p9(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 9 + ; check: iconst.i64 0xe38e_38e3_8e38_e38f + ; check: umulhi v0, v2 + ; check: v4 = ushr_imm v3, 3 + ; check: v1 -> v4 + return v1 +} + +; complex case (mul, sub, shift, add, shift) +function %t_udiv64_p125(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 125 + ; check: iconst.i64 0x0624_dd2f_1a9f_be77 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: v7 = ushr_imm v6, 6 + ; check: v1 -> v7 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_udiv64_p274177(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 274177 + ; check: iconst.i64 0x3d30_f19c_d101 + ; check: v3 = umulhi v0, v2 + ; check: v1 -> v3 + return v1 +} + + +; -------- S64 -------- + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv64_n625(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -625 + ; check: iconst.i64 0xcb92_3a29_c779_a6b5 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_sdiv64_n6(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -6 + ; check: iconst.i64 0xd555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_sdiv64_n5(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -5 + ; check: iconst.i64 0x9999_9999_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 63 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_sdiv64_n3(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -3 + ; check: iconst.i64 0x5555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 63 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_sdiv64_p6(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 6 + ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: v5 = iadd v3, v4 + ; check: v1 -> v5 + return v1 +} + +; case d > 0 && M < 0 (mul, add, shift, add-sign-bit) +function %t_sdiv64_p15(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 15 + ; check: iconst.i64 0x8888_8888_8888_8889 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 3 + ; check: ushr_imm v5, 63 + ; check: v7 = iadd v5, v6 + ; check: v1 -> v7 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_sdiv64_p625(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 625 + ; check: iconst.i64 0x346d_c5d6_3886_594b + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: v6 = iadd v4, v5 + ; check: v1 -> v6 + return v1 +} diff --git a/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif new file mode 100644 index 0000000000..83e9f95c8a --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif @@ -0,0 +1,292 @@ +test simple_preopt +target i686 baseline + +; -------- U32 -------- + +; ignored +function %t_udiv32_p0(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 0 + ; check: udiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_udiv32_p1(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 1 + ; check: nop + return v1 +} + +; shift +function %t_udiv32_p2(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 2 + ; check: ushr_imm v0, 1 + return v1 +} + +; shift +function %t_udiv32_p2p31(i32) -> i32 { +block0(v0: i32): + v1 = udiv_imm v0, 0x8000_0000 + ; check: ushr_imm v0, 31 + return v1 +} + + +; -------- U64 -------- + +; ignored +function %t_udiv64_p0(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 0 + ; check: udiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_udiv64_p1(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 1 + ; check: nop + return v1 +} + +; shift +function %t_udiv64_p2(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 2 + ; check: ushr_imm v0, 1 + return v1 +} + +; shift +function %t_udiv64_p2p63(i64) -> i64 { +block0(v0: i64): + v1 = udiv_imm v0, 0x8000_0000_0000_0000 + ; check: ushr_imm v0, 63 + return v1 +} + + +; -------- S32 -------- + +; ignored +function %t_sdiv32_p0(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 0 + ; check: sdiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_sdiv32_p1(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 1 + ; check: nop + return v1 +} + +; ignored +function %t_sdiv32_n1(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -1 + ; check: sdiv_imm v0, -1 + return v1 +} + +; shift +function %t_sdiv32_p2(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: sshr_imm v3, 1 + ; check: v1 -> v4 + return v1 +} + +; shift +function %t_sdiv32_n2(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: sshr_imm v3, 1 + ; check: irsub_imm v4, 0 + return v1 +} + +; shift +function %t_sdiv32_p4(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 4 + ; check: v2 = sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 2 + ; check: v1 -> v5 + + return v1 +} + +; shift +function %t_sdiv32_n4(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 2 + ; check: irsub_imm v5, 0 + return v1 +} + +; shift +function %t_sdiv32_p2p30(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, 0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 30 + ; check: v1 -> v5 + return v1 +} + +; shift +function %t_sdiv32_n2p30(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 30 + ; check: irsub_imm v5, 0 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000) isn't +; representable. +function %t_sdiv32_n2p31(i32) -> i32 { +block0(v0: i32): + v1 = sdiv_imm v0, -0x8000_0000 + ; check: sshr_imm v0, 30 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 31 + ; check: irsub_imm v5, 0 + return v1 +} + + +; -------- S64 -------- + +; ignored +function %t_sdiv64_p0(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 0 + ; check: sdiv_imm v0, 0 + return v1 +} + +; converted to a nop +function %t_sdiv64_p1(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 1 + ; check: nop + return v1 +} + +; ignored +function %t_sdiv64_n1(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -1 + ; check: sdiv_imm v0, -1 + return v1 +} + +; shift +function %t_sdiv64_p2(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: v4 = sshr_imm v3, 1 + ; check: v1 -> v4 + return v1 +} + +; shift +function %t_sdiv64_n2(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: sshr_imm v3, 1 + ; check: irsub_imm v4, 0 + return v1 +} + +; shift +function %t_sdiv64_p4(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 2 + ; check: v1 -> v5 + return v1 +} + +; shift +function %t_sdiv64_n4(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 2 + ; check: irsub_imm v5, 0 + return v1 +} + +; shift +function %t_sdiv64_p2p62(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, 0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: v5 = sshr_imm v4, 62 + ; check: v1 -> v5 + return v1 +} + +; shift +function %t_sdiv64_n2p62(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 62 + ; check: irsub_imm v5, 0 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't +; representable. +function %t_sdiv64_n2p63(i64) -> i64 { +block0(v0: i64): + v1 = sdiv_imm v0, -0x8000_0000_0000_0000 + ; check: sshr_imm v0, 62 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: sshr_imm v4, 63 + ; check: irsub_imm v5, 0 + return v1 +} diff --git a/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif b/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif new file mode 100644 index 0000000000..44342481b8 --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif @@ -0,0 +1,14 @@ +test simple_preopt +target x86_64 + +function %wraparound(i64 vmctx) -> f32 system_v { + gv0 = vmctx + gv1 = iadd_imm.i64 gv0, 48 + +block35(v0: i64): + v88 = iconst.i64 0 + v89 = iconst.i64 0x8000_0000_0000_0000 + v90 = ishl_imm v88, 0x8000_0000_0000_0000 + v91 = sshr v90, v89; check: sshr_imm v90, 0x8000_0000_0000_0000 + trap user0 +} diff --git a/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif new file mode 100644 index 0000000000..00d0d9f16e --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif @@ -0,0 +1,285 @@ +test simple_preopt +target i686 baseline + +; -------- U32 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_urem32_p7(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 7 + ; check: iconst.i32 0x2492_4925 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: ushr_imm v6, 2 + ; check: imul_imm v7, 7 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift) +function %t_urem32_p125(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 125 + ; check: iconst.i32 0x1062_4dd3 + ; check: umulhi v0, v2 + ; check: ushr_imm v3, 3 + ; check: imul_imm v4, 125 + ; check: isub v0, v5 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_urem32_p641(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 641 + ; check: iconst.i32 0x0066_3d81 + ; check: umulhi v0, v2 + ; check: imul_imm v3, 641 + ; check: isub v0, v4 + return v1 +} + + +; -------- S32 -------- + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_srem32_n6(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -6 + ; check: iconst.i32 0xffff_ffff_d555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: iadd v3, v4 + ; check: imul_imm v5, -6 + ; check: isub v0, v6 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_srem32_n5(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -5 + ; check: iconst.i32 0xffff_ffff_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 31 + ; check: iadd v4, v5 + ; check: imul_imm v6, -5 + ; check: isub v0, v7 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_srem32_n3(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -3 + ; check: iconst.i32 0x5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 31 + ; check: iadd v5, v6 + ; check: imul_imm v7, -3 + ; check: isub v0, v8 + return v1 +} + +; simple case w/ shift by zero (mul, add-sign-bit) +function %t_srem32_p6(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 6 + ; check: iconst.i32 0x2aaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 31 + ; check: iadd v3, v4 + ; check: imul_imm v5, 6 + ; check: isub v0, v6 + return v1 +} + +; case d > 0 && M < 0 (mull, add, shift, add-sign-bit) +function %t_srem32_p7(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 7 + ; check: iconst.i32 0xffff_ffff_9249_2493 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 2 + ; check: ushr_imm v5, 31 + ; check: iadd v5, v6 + ; check: imul_imm v7, 7 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_srem32_p625(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 625 + ; check: iconst.i32 0x68db_8bad + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 8 + ; check: ushr_imm v4, 31 + ; check: iadd v4, v5 + ; check: imul_imm v6, 625 + ; check: isub v0, v7 + return v1 +} + + +; -------- U64 -------- + +; complex case (mul, sub, shift, add, shift) +function %t_urem64_p7(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 7 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: ushr_imm v6, 2 + ; check: imul_imm v7, 7 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift) +function %t_urem64_p9(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 9 + ; check: iconst.i64 0xe38e_38e3_8e38_e38f + ; check: umulhi v0, v2 + ; check: ushr_imm v3, 3 + ; check: imul_imm v4, 9 + ; check: isub v0, v5 + return v1 +} + +; complex case (mul, sub, shift, add, shift) +function %t_urem64_p125(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 125 + ; check: iconst.i64 0x0624_dd2f_1a9f_be77 + ; check: umulhi v0, v2 + ; check: isub v0, v3 + ; check: ushr_imm v4, 1 + ; check: iadd v5, v3 + ; check: ushr_imm v6, 6 + ; check: imul_imm v7, 125 + ; check: isub v0, v8 + return v1 +} + +; simple case w/ shift by zero (mul) +function %t_urem64_p274177(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 274177 + ; check: iconst.i64 0x3d30_f19c_d101 + ; check: umulhi v0, v2 + ; check: imul_imm v3, 0x0004_2f01 + ; check: isub v0, v4 + return v1 +} + + +; -------- S64 -------- + +; simple case (mul, shift, add-sign-bit) +function %t_srem64_n625(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -625 + ; check: iconst.i64 0xcb92_3a29_c779_a6b5 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: iadd v4, v5 + ; check: imul_imm v6, -625 + ; check: isub v0, v7 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_srem64_n6(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -6 + ; check: iconst.i64 0xd555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: iadd v3, v4 + ; check: imul_imm v5, -6 + ; check: isub v0, v6 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_srem64_n5(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -5 + ; check: iconst.i64 0x9999_9999_9999_9999 + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 1 + ; check: ushr_imm v4, 63 + ; check: iadd v4, v5 + ; check: imul_imm v6, -5 + ; check: isub v0, v7 + return v1 +} + +; case d < 0 && M > 0 (mul, sub, shift, add-sign-bit) +function %t_srem64_n3(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -3 + ; check: iconst.i64 0x5555_5555_5555_5555 + ; check: smulhi v0, v2 + ; check: isub v3, v0 + ; check: sshr_imm v4, 1 + ; check: ushr_imm v5, 63 + ; check: iadd v5, v6 + ; check: imul_imm v7, -3 + ; check: isub v0, v8 + return v1 +} + +; simple case w/ zero shift (mul, add-sign-bit) +function %t_srem64_p6(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 6 + ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab + ; check: smulhi v0, v2 + ; check: ushr_imm v3, 63 + ; check: iadd v3, v4 + ; check: imul_imm v5, 6 + ; check: isub v0, v6 + return v1 +} + +; case d > 0 && M < 0 (mul, add, shift, add-sign-bit) +function %t_srem64_p15(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 15 + ; check: iconst.i64 0x8888_8888_8888_8889 + ; check: smulhi v0, v2 + ; check: iadd v3, v0 + ; check: sshr_imm v4, 3 + ; check: ushr_imm v5, 63 + ; check: iadd v5, v6 + ; check: imul_imm v7, 15 + ; check: isub v0, v8 + return v1 +} + +; simple case (mul, shift, add-sign-bit) +function %t_srem64_p625(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 625 + ; check: iconst.i64 0x346d_c5d6_3886_594b + ; check: smulhi v0, v2 + ; check: sshr_imm v3, 7 + ; check: ushr_imm v4, 63 + ; check: iadd v4, v5 + ; check: imul_imm v6, 625 + ; check: isub v0, v7 + return v1 +} diff --git a/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif b/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif new file mode 100644 index 0000000000..1fe085e37c --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif @@ -0,0 +1,291 @@ +test simple_preopt +target i686 baseline + +; -------- U32 -------- + +; ignored +function %t_urem32_p0(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 0 + ; check: urem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_urem32_p1(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 1 + ; check: iconst.i32 0 + return v1 +} + +; shift +function %t_urem32_p2(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 2 + ; check: band_imm v0, 1 + return v1 +} + +; shift +function %t_urem32_p2p31(i32) -> i32 { +block0(v0: i32): + v1 = urem_imm v0, 0x8000_0000 + ; check: band_imm v0, 0x7fff_ffff + return v1 +} + + +; -------- U64 -------- + +; ignored +function %t_urem64_p0(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 0 + ; check: urem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_urem64_p1(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 1 + ; check: iconst.i64 0 + return v1 +} + +; shift +function %t_urem64_p2(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 2 + ; check: band_imm v0, 1 + return v1 +} + +; shift +function %t_urem64_p2p63(i64) -> i64 { +block0(v0: i64): + v1 = urem_imm v0, 0x8000_0000_0000_0000 + ; check: band_imm v0, 0x7fff_ffff_ffff_ffff + return v1 +} + + +; -------- S32 -------- + +; ignored +function %t_srem32_n1(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -1 + ; check: srem_imm v0, -1 + return v1 +} + +; ignored +function %t_srem32_p0(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 0 + ; check: srem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_srem32_p1(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 1 + ; check: iconst.i32 0 + return v1 +} + +; shift +function %t_srem32_p2(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem32_n2(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -2 + ; check: ushr_imm v0, 31 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem32_p4(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem32_n4(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 30 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem32_p2p30(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, 0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xffff_ffff_c000_0000 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem32_n2p30(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -0x4000_0000 + ; check: sshr_imm v0, 29 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xffff_ffff_c000_0000 + ; check: isub v0, v5 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000) isn't +; representable. +function %t_srem32_n2p31(i32) -> i32 { +block0(v0: i32): + v1 = srem_imm v0, -0x8000_0000 + ; check: sshr_imm v0, 30 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xffff_ffff_8000_0000 + ; check: isub v0, v5 + return v1 +} + + +; -------- S64 -------- + +; ignored +function %t_srem64_n1(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -1 + ; check: srem_imm v0, -1 + return v1 +} + +; ignored +function %t_srem64_p0(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 0 + ; check: srem_imm v0, 0 + return v1 +} + +; converted to constant zero +function %t_srem64_p1(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 1 + ; check: iconst.i64 0 + return v1 +} + +; shift +function %t_srem64_p2(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem64_n2(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -2 + ; check: ushr_imm v0, 63 + ; check: iadd v0, v2 + ; check: band_imm v3, -2 + ; check: isub v0, v4 + return v1 +} + +; shift +function %t_srem64_p4(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem64_n4(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -4 + ; check: sshr_imm v0, 1 + ; check: ushr_imm v2, 62 + ; check: iadd v0, v3 + ; check: band_imm v4, -4 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem64_p2p62(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, 0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xc000_0000_0000_0000 + ; check: isub v0, v5 + return v1 +} + +; shift +function %t_srem64_n2p62(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -0x4000_0000_0000_0000 + ; check: sshr_imm v0, 61 + ; check: ushr_imm v2, 2 + ; check: iadd v0, v3 + ; check: band_imm v4, 0xc000_0000_0000_0000 + ; check: isub v0, v5 + return v1 +} + +; there's no positive version of this, since -(-0x8000_0000_0000_0000) isn't +; representable. +function %t_srem64_n2p63(i64) -> i64 { +block0(v0: i64): + v1 = srem_imm v0, -0x8000_0000_0000_0000 + ; check: sshr_imm v0, 62 + ; check: ushr_imm v2, 1 + ; check: iadd v0, v3 + ; check: band_imm v4, 0x8000_0000_0000_0000 + ; check: isub v0, v5 + return v1 +} diff --git a/cranelift/filetests/filetests/simple_preopt/simplify32.clif b/cranelift/filetests/filetests/simple_preopt/simplify32.clif new file mode 100644 index 0000000000..2582fd69aa --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/simplify32.clif @@ -0,0 +1,61 @@ +test simple_preopt +target i686 + +;; 32-bits platforms. + +function %iadd_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = iadd v0, v1 + return v2 +} +; sameln: function %iadd_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, 2 +; nextln: return v2 +; nextln: } + +function %isub_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = isub v0, v1 + return v2 +} +; sameln: function %isub_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, -2 +; nextln: return v2 +; nextln: } + +function %icmp_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = icmp slt v0, v1 + v3 = bint.i32 v2 + return v3 +} +; sameln: function %icmp_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = icmp_imm slt v0, 2 +; nextln: v3 = bint.i32 v2 +; nextln: return v3 +; nextln: } + +;; Don't simplify operations that would get illegal because of lack of native +;; support. +function %iadd_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = iadd v0, v1 + return v2 +} +; sameln: function %iadd_imm +; nextln: block0(v0: i64): +; nextln: v1 = iconst.i64 2 +; nextln: v2 = iadd v0, v1 +; nextln: return v2 +; nextln: } + diff --git a/cranelift/filetests/filetests/simple_preopt/simplify64.clif b/cranelift/filetests/filetests/simple_preopt/simplify64.clif new file mode 100644 index 0000000000..4ceabdc335 --- /dev/null +++ b/cranelift/filetests/filetests/simple_preopt/simplify64.clif @@ -0,0 +1,295 @@ +test simple_preopt +target x86_64 + +;; 64-bits platforms. + +function %iadd_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = iadd v0, v1 + return v2 +} +; sameln: function %iadd_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, 2 +; nextln: return v2 +; nextln: } + +function %isub_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = isub v0, v1 + return v2 +} +; sameln: function %isub_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = iadd_imm v0, -2 +; nextln: return v2 +; nextln: } + +function %icmp_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = icmp slt v0, v1 + v3 = bint.i32 v2 + return v3 +} +; sameln: function %icmp_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = icmp_imm slt v0, 2 +; nextln: v3 = bint.i32 v2 +; nextln: return v3 +; nextln: } + +function %brz_bint(i32) { +block0(v0: i32): + v3 = icmp_imm slt v0, 0 + v1 = bint.i32 v3 + v2 = select v1, v1, v1 + trapz v1, user0 + brz v1, block1 + jump block2 + +block1: + return + +block2: + return +} +; sameln: function %brz_bint +; nextln: (v0: i32): +; nextln: v3 = icmp_imm slt v0, 0 +; nextln: v1 = bint.i32 v3 +; nextln: v2 = select v3, v1, v1 +; nextln: trapz v3, user0 +; nextln: brnz v3, block2 +; nextln: jump block1 + +function %irsub_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = isub v1, v0 + return v2 +} +; sameln: function %irsub_imm +; nextln: block0(v0: i32): +; nextln: v1 = iconst.i32 2 +; nextln: v2 = irsub_imm v0, 2 +; nextln: return v2 +; nextln: } + +;; Sign-extensions. + +;; 8 -> 16 +function %uextend_8_16() -> i16 { +block0: + v0 = iconst.i16 37 + v1 = ishl_imm v0, 8 + v2 = ushr_imm v1, 8 + return v2 +} +; sameln: function %uextend_8_16 +; nextln: block0: +; nextln: v0 = iconst.i16 37 +; nextln: v1 = ishl_imm v0, 8 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i16 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_16() -> i16 { +block0: + v0 = iconst.i16 37 + v1 = ishl_imm v0, 8 + v2 = sshr_imm v1, 8 + return v2 +} +; sameln: function %sextend_8_16 +; nextln: block0: +; nextln: v0 = iconst.i16 37 +; nextln: v1 = ishl_imm v0, 8 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i16 v3 +; nextln: return v2 +; nextln: } + +;; 8 -> 32 +function %uextend_8_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 24 + v2 = ushr_imm v1, 24 + return v2 +} +; sameln: function %uextend_8_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 24 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i32 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 24 + v2 = sshr_imm v1, 24 + return v2 +} +; sameln: function %sextend_8_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 24 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i32 v3 +; nextln: return v2 +; nextln: } + +;; 16 -> 32 +function %uextend_16_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 16 + v2 = ushr_imm v1, 16 + return v2 +} +; sameln: function %uextend_16_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 16 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = uextend.i32 v3 +; nextln: return v2 +; nextln: } + +function %sextend_16_32() -> i32 { +block0: + v0 = iconst.i32 37 + v1 = ishl_imm v0, 16 + v2 = sshr_imm v1, 16 + return v2 +} +; sameln: function %sextend_16_32 +; nextln: block0: +; nextln: v0 = iconst.i32 37 +; nextln: v1 = ishl_imm v0, 16 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = sextend.i32 v3 +; nextln: return v2 +; nextln: } + +;; 8 -> 64 +function %uextend_8_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 56 + v2 = ushr_imm v1, 56 + return v2 +} +; sameln: function %uextend_8_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 56 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_8_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 56 + v2 = sshr_imm v1, 56 + return v2 +} +; sameln: function %sextend_8_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 56 +; nextln: v3 = ireduce.i8 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +;; 16 -> 64 +function %uextend_16_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 48 + v2 = ushr_imm v1, 48 + return v2 +} +; sameln: function %uextend_16_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 48 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_16_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 48 + v2 = sshr_imm v1, 48 + return v2 +} +; sameln: function %sextend_16_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 48 +; nextln: v3 = ireduce.i16 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +;; 32 -> 64 +function %uextend_32_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 32 + v2 = ushr_imm v1, 32 + return v2 +} +; sameln: function %uextend_32_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 32 +; nextln: v3 = ireduce.i32 v0 +; nextln: v2 = uextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %sextend_32_64() -> i64 { +block0: + v0 = iconst.i64 37 + v1 = ishl_imm v0, 32 + v2 = sshr_imm v1, 32 + return v2 +} +; sameln: function %sextend_32_64 +; nextln: block0: +; nextln: v0 = iconst.i64 37 +; nextln: v1 = ishl_imm v0, 32 +; nextln: v3 = ireduce.i32 v0 +; nextln: v2 = sextend.i64 v3 +; nextln: return v2 +; nextln: } + +function %add_imm_fold(i32) -> i32 { +block0(v0: i32): + v1 = iadd_imm v0, 42 + v2 = iadd_imm v1, -42 + return v2 +} +; sameln: function %add_imm_fold(i32) +; nextln: block0(v0: i32): +; nextln: v2 -> v0 +; nextln: v1 = iadd_imm v0, 42 +; nextln: nop +; nextln: return v2 diff --git a/cranelift/filetests/filetests/verifier/bad_layout.clif b/cranelift/filetests/filetests/verifier/bad_layout.clif new file mode 100644 index 0000000000..0cc2d2ed6f --- /dev/null +++ b/cranelift/filetests/filetests/verifier/bad_layout.clif @@ -0,0 +1,21 @@ +test verifier + +function %test_1(i32) { + block0(v0: i32): + return ; error: terminator + return +} +function %test_2(i32) { + block0(v0: i32): + jump block2 ; error: a terminator instruction was encountered before the end of block0 + brz v0, block3 + block2: + jump block3 + block3: + return +} + +function %test_3(i32) { ; Ok + block0(v0: i32): + return +} diff --git a/cranelift/filetests/filetests/verifier/bitcast.clif b/cranelift/filetests/filetests/verifier/bitcast.clif new file mode 100644 index 0000000000..98ac9c6b35 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/bitcast.clif @@ -0,0 +1,23 @@ +test verifier + +; bitcast between two types of equal size if ok +function %valid_bitcast1(i32) -> f32 { ; Ok +block0(v0: i32): + v1 = bitcast.f32 v0 + return v1 +} + +; bitcast to a type larger than the operand is ok +function %valid_bitcast2(i32) -> i64 { ; Ok +block0(v0: i32): + v1 = bitcast.i64 v0 + return v1 +} + +; bitcast to a smaller type is not ok +function %bad_bitcast(i64) -> i32 { +block0(v0: i64): + v1 = bitcast.i32 v0 ; error: The bitcast argument v0 doesn't fit in a type of 32 bits + return v1 +} + diff --git a/cranelift/filetests/filetests/verifier/defs_dominates_uses.clif b/cranelift/filetests/filetests/verifier/defs_dominates_uses.clif new file mode 100644 index 0000000000..c7b3b752a8 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/defs_dominates_uses.clif @@ -0,0 +1,16 @@ +test verifier + +; Test verification that uses properly dominate defs. + +function %non_dominating(i32) -> i32 system_v { +block0(v0: i32): + v1 = iadd.i32 v2, v0 ; error: uses value v2 from non-dominating + v2 = iadd.i32 v1, v0 + return v2 +} + +function %inst_uses_its_own_values(i32) -> i32 system_v { +block0(v0: i32): + v1 = iadd.i32 v1, v0 ; error: uses value v1 from itself + return v1 +} diff --git a/cranelift/filetests/filetests/verifier/flags.clif b/cranelift/filetests/filetests/verifier/flags.clif new file mode 100644 index 0000000000..dc370c58cb --- /dev/null +++ b/cranelift/filetests/filetests/verifier/flags.clif @@ -0,0 +1,76 @@ +test verifier +target i686 + +; Simple, correct use of CPU flags. +function %simple(i32) -> i32 { + block0(v0: i32): + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [Op2seti_abcd#490] v2 = trueif ugt v1 + [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 + [Op1ret#c3] return v3 +} + +; Overlapping flag values of different types. +function %overlap(i32, f32) -> i32 { + block0(v0: i32, v1: f32): + [DynRexOp1rcmp#39] v2 = ifcmp v0, v0 + [Op2fcmp#42e] v3 = ffcmp v1, v1 + [Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3 + [Op2seti_abcd#490] v5 = trueif ugt v2 + [Op1rr#21] v6 = band v4, v5 + [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6 + [Op1ret#c3] return v7 +} + +; CPU flags clobbered by arithmetic. +function %clobbered(i32) -> i32 { + block0(v0: i32): + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [DynRexOp1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1 + [Op2seti_abcd#490] v3 = trueif ugt v1 + [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 + [Op1ret#c3] return v4 +} + +; CPU flags not clobbered by load. +function %live_across_load(i32) -> i32 { + block0(v0: i32): + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [Op1ld#8b] v2 = load.i32 v0 + [Op2seti_abcd#490] v3 = trueif ugt v1 + [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 + [Op1ret#c3] return v4 +} + +; Correct use of CPU flags across block. +function %live_across_block(i32) -> i32 { + block0(v0: i32): + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [Op1jmpb#eb] jump block1 + block1: + [Op2seti_abcd#490] v2 = trueif ugt v1 + [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 + [Op1ret#c3] return v3 +} + +function %live_across_block_backwards(i32) -> i32 { + block0(v0: i32): + [Op1jmpb#eb] jump block2 + block1: + [Op2seti_abcd#490] v2 = trueif ugt v1 + [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 + [Op1ret#c3] return v3 + block2: + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [Op1jmpb#eb] jump block1 +} + +; Flags live into loop. +function %live_into_loop(i32) -> i32 { + block0(v0: i32): + [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 + [Op1jmpb#eb] jump block1 + block1: + [Op2seti_abcd#490] v2 = trueif ugt v1 + [Op1jmpb#eb] jump block1 +} diff --git a/cranelift/filetests/filetests/verifier/globals.clif b/cranelift/filetests/filetests/verifier/globals.clif new file mode 100644 index 0000000000..1a44cf8001 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/globals.clif @@ -0,0 +1,19 @@ +test verifier +target x86_64 + +function %load_base_type(i64 vmctx) { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + gv2 = load.i32 notrap aligned gv1 ; error: base gv1 has type i32, which is not the pointer type i64 + +block0(v0: i64): + return +} + +function %global_value_wrong_type(i64 vmctx) { + gv0 = vmctx + +block0(v0: i64): + v1 = global_value.i32 gv0 ; error: global_value instruction with type i32 references global value with type i64 + return +} diff --git a/cranelift/filetests/filetests/verifier/heap.clif b/cranelift/filetests/filetests/verifier/heap.clif new file mode 100644 index 0000000000..ffd6bb7ac4 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/heap.clif @@ -0,0 +1,45 @@ +test verifier +target x86_64 + +function %heap_base_type(i64 vmctx) { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + heap0 = static gv1, offset_guard 0x1000, bound 0x1_0000, index_type i32 ; error: heap base has type i32, which is not the pointer type i64 + +block0(v0: i64): + return +} + +function %invalid_base(i64 vmctx) { + gv0 = vmctx + heap0 = dynamic gv1, bound gv0, offset_guard 0x1000, index_type i64 ; error: invalid base global value gv1 + +block0(v0: i64): + return +} + +function %invalid_bound(i64 vmctx) { + gv0 = vmctx + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i64 ; error: invalid bound global value gv1 + +block0(v0: i64): + return +} + +function %heap_bound_type(i64 vmctx) { + gv0 = vmctx + gv1 = load.i16 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 ; error: heap index type i32 differs from the type of its bound, i16 + +block0(v0: i64): + return +} + +function %heap_addr_index_type(i64 vmctx, i64) { + gv0 = vmctx + heap0 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32 + +block0(v0: i64, v1: i64): + v2 = heap_addr.i64 heap0, v1, 0; error: index type i64 differs from heap index type i32 + return +} diff --git a/cranelift/filetests/filetests/verifier/jump_table.clif b/cranelift/filetests/filetests/verifier/jump_table.clif new file mode 100644 index 0000000000..67cd935320 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/jump_table.clif @@ -0,0 +1,19 @@ +test verifier + +function %br_invalid_default(i64) { + jt0 = jump_table [block1, block1] + +block0(v0: i64): + br_table.i64 v0, block2, jt0 ; error: invalid block reference block2 +block1: + return +} + +function %br(i64) { + jt0 = jump_table [block1, block2] ; error: invalid block reference block2 + +block0(v0: i64): + br_table.i64 v0, block1, jt0 +block1: + return +} diff --git a/cranelift/filetests/filetests/verifier/memory.clif b/cranelift/filetests/filetests/verifier/memory.clif new file mode 100644 index 0000000000..496b71c815 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/memory.clif @@ -0,0 +1,16 @@ +test verifier + +function %cycle() { + gv0 = load.i32 notrap aligned gv1 ; error: global value cycle: [gv0, gv1] + gv1 = load.i32 notrap aligned gv0-32 + +block1: + return +} + +function %self_cycle() { + gv0 = load.i32 notrap aligned gv0 ; error: global value cycle: [gv0] + +block1: + return +} diff --git a/cranelift/filetests/filetests/verifier/scalar-to-vector.clif b/cranelift/filetests/filetests/verifier/scalar-to-vector.clif new file mode 100644 index 0000000000..1d04db9957 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/scalar-to-vector.clif @@ -0,0 +1,10 @@ +test verifier +set enable_simd=true +target x86_64 + +function %scalar_to_vector() { +block0: + v0 = iconst.i32 42 + v1 = scalar_to_vector.f32x4 v0 ; error: arg 0 (v0) has type i32, expected f32 + return +} diff --git a/cranelift/filetests/filetests/verifier/simd-lane-index.clif b/cranelift/filetests/filetests/verifier/simd-lane-index.clif new file mode 100644 index 0000000000..2f7ca8d095 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/simd-lane-index.clif @@ -0,0 +1,41 @@ +test verifier +set enable_simd +target x86_64 + +function %insertlane_i32x4() { +block0: + v0 = vconst.i32x4 [0 0 0 0] + v1 = iconst.i32 42 + v2 = insertlane v0, 4, v1 ; error: The lane 4 does not index into the type i32x4 + return +} + +function %insertlane_b16x8() { +block0: + v0 = vconst.b16x8 [false false false false false false false false] + v1 = bconst.b16 true + v2 = insertlane v0, 8, v1 ; error: The lane 8 does not index into the type b16x8 + return +} + +function %insertlane_f64x2() { +block0: + v0 = vconst.f64x2 0x00 + v1 = f64const 0x0.1 + v2 = insertlane v0, 2, v1 ; error: The lane 2 does not index into the type f64x2 + return +} + +function %extractlane_i32x4() { +block0: + v0 = vconst.i32x4 [0 0 0 0] + v1 = extractlane v0, 4 ; error: The lane 4 does not index into the type i32x4 + return +} + +function %extractlane_b8x16() { +block0: + v0 = vconst.b8x16 0x00 + v1 = extractlane v0, 16 ; error: The lane 16 does not index into the type b8x16 + return +} diff --git a/cranelift/filetests/filetests/verifier/table.clif b/cranelift/filetests/filetests/verifier/table.clif new file mode 100644 index 0000000000..204ae5c93a --- /dev/null +++ b/cranelift/filetests/filetests/verifier/table.clif @@ -0,0 +1,46 @@ +test verifier +target x86_64 + +function %table_base_type(i64 vmctx) { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + table0 = dynamic gv1, element_size 1, bound gv1, index_type i32 ; error: table base has type i32, which is not the pointer type i64 + +block0(v0: i64): + return +} + +function %invalid_base(i64 vmctx) { + gv0 = vmctx + table0 = dynamic gv1, bound gv0, element_size 1, index_type i64 ; error: invalid base global value gv1 + +block0(v0: i64): + return +} + +function %invalid_bound(i64 vmctx) { + gv0 = vmctx + table0 = dynamic gv0, bound gv1, element_size 1, index_type i64 ; error: invalid bound global value gv1 + +block0(v0: i64): + return +} + +function %table_bound_type(i64 vmctx) { + gv0 = vmctx + gv1 = load.i16 notrap aligned gv0 + table0 = dynamic gv0, bound gv1, element_size 1, index_type i32 ; error: table index type i32 differs from the type of its bound, i16 + +block0(v0: i64): + return +} + +function %table_addr_index_type(i64 vmctx, i64) { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + table0 = dynamic gv0, element_size 1, bound gv1, index_type i32 + +block0(v0: i64, v1: i64): + v2 = table_addr.i64 table0, v1, +0; error: index type i64 differs from table index type i32 + return +} diff --git a/cranelift/filetests/filetests/verifier/type_check.clif b/cranelift/filetests/filetests/verifier/type_check.clif new file mode 100644 index 0000000000..c708ca76ad --- /dev/null +++ b/cranelift/filetests/filetests/verifier/type_check.clif @@ -0,0 +1,115 @@ +test verifier + +function %entry_block_signature_mismatch(i32) { + block0: ; error: entry block parameters (0) must match function signature (1) + return +} + +function %entry_block_arg_type(i32) { + block0(v0: f32): ; error: entry block parameter 0 expected to have type i32, got f32 + return +} + +function %incorrect_arg_type(i32, b1) -> i32 { + block0(v0: i32, v1: b1): + v2 = iadd v0, v1 ; error: arg 1 (v1) has type b1, expected i32 + return v2 +} + +function %incorrect_return_type() -> f32 { + block0: + v0 = iconst.i32 1 + return v0 ; error: arg 0 (v0) has type i32, must match function signature of f32 +} + +function %too_many_return_values() { + block0: + v0 = iconst.i32 1 + return v0 ; error: arguments of return must match function signature +} + +function %too_few_return_values() -> f32, i64 { + block0: + return ; error: arguments of return must match function signature +} + +function %type_mismatch_controlling_variable() { + block0: + v0 = iconst.i32 5 + v1 = iconst.i64 6 + v2 = iadd v0, v1 ; error: arg 1 (v1) has type i64, expected i32 + return +} + +function %fn_call_too_few_args() { + fn2 = %great_fn(i32, f32) + block0: + call fn2() ; error: mismatched argument count for `call fn2()`: got 0, expected 2 + return +} + +function %fn_call_too_many_args() { + fn5 = %best_fn() + block0: + v0 = iconst.i64 56 + v1 = f32const 0.0 + call fn5(v0, v1) ; error: mismatched argument count for `call fn5(v0, v1)`: got 2, expected 0 + return +} + +function %fn_call_incorrect_arg_type(i64) { + sig9 = (f32) + block0(v0: i64): + v1 = iconst.i32 56 + call_indirect sig9, v0(v1) ; error: arg 0 (v1) has type i32, expected f32 + return +} + +; TODO: Should we instead just verify that jump tables contain no blocks that take arguments? This +; error doesn't occur if no instruction uses the jump table. +function %jump_table_args() { + jt1 = jump_table [block1] + block0: + v0 = iconst.i32 0 + br_table v0, block2, jt1 ; error: takes no arguments, but had target block1 with 1 arguments + + block1(v5: i32): + return + block2: + return +} + +function %jump_args() { + block0: + v0 = iconst.i16 10 + v3 = iconst.i64 20 + jump block1(v0, v3) ; error: arg 0 (v0) has type i16, expected i64 + ; error: arg 1 (v3) has type i64, expected i16 + block1(v10: i64, v11: i16): + return +} + +function %jump_args2() { + block0: + v0 = iconst.i16 10 + v3 = iconst.i64 20 + brz v0, block1(v0, v3) ; error: arg 0 (v0) has type i16, expected i64 + ; error: arg 1 (v3) has type i64, expected i16 + jump block1(v3, v0) + block1(v10: i64, v11: i16): + return +} + +function %bad_extend() { +block0: + v0 = iconst.i32 10 + v1 = uextend.i16 v0 ; error: input i32 must be smaller than output i16 + return +} + +function %bad_reduce() { +block0: + v0 = iconst.i32 10 + v1 = ireduce.i64 v0 ; error: input i32 must be larger than output i64 + return +} diff --git a/cranelift/filetests/filetests/verifier/undeclared_vmctx.clif b/cranelift/filetests/filetests/verifier/undeclared_vmctx.clif new file mode 100644 index 0000000000..a48e7a0ef6 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/undeclared_vmctx.clif @@ -0,0 +1,17 @@ +test verifier + +; Using a vmctx global value without declaring it first leads to an error. +function %vmglobal_err(i64) -> i64 { + gv4 = vmctx ; error: undeclared vmctx reference +block0(v0: i64): + v1 = global_value.i64 gv4 + return v1 +} + +; If it is declared, all is fine. +function %vmglobal_ok(i64 vmctx) -> i64 { + gv4 = vmctx +block0(v0: i64): + v1 = global_value.i64 gv4 + return v1 +} diff --git a/cranelift/filetests/filetests/verifier/unreachable_code.clif b/cranelift/filetests/filetests/verifier/unreachable_code.clif new file mode 100644 index 0000000000..0a12aac8d0 --- /dev/null +++ b/cranelift/filetests/filetests/verifier/unreachable_code.clif @@ -0,0 +1,45 @@ +test verifier + +function %test() -> i32 { ; Ok +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 0 + jump block2 + +block2: + jump block4 + +block4: + jump block2 + +block3(v2: i32): + v4 = iadd.i32 v1, v2 + jump block9(v4) + +block9(v7: i32): + v9 = iadd.i32 v2, v7 + return v9 + +} + +; Using a function argument in an unreachable block is ok. +function %arg(i32) -> i32 { +block0(v0: i32): + v1 = iadd_imm v0, 1 + return v1 + +block1: + v10 = iadd_imm v0, 10 + return v10 +} + +; Using a block argument from an unreachable block is not ok. +function %arg2(i32) -> i32 { +block0(v0: i32): + v1 = iadd v0, v10 ; error: uses value arg from non-dominating + return v1 + +block1(v10: i32): + v11 = iadd v0, v10 + return v11 +} diff --git a/cranelift/filetests/filetests/wasm/control.clif b/cranelift/filetests/filetests/wasm/control.clif new file mode 100644 index 0000000000..d00c5b3166 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/control.clif @@ -0,0 +1,65 @@ +; Test basic code generation for control flow WebAssembly instructions. +test compile + +target i686 haswell + +target x86_64 haswell + +function %br_if(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + brz v0, block1(v1) + jump block2 + +block1(v2: i32): + return v2 + +block2: + jump block1(v0) +} + +function %br_if_not(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + brnz v0, block1(v0) + jump block2 + +block1(v2: i32): + return v2 + +block2: + jump block1(v0) +} + +function %br_if_fallthrough(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + brz v0, block1(v1) + ; This jump gets converted to a fallthrough. + jump block1(v0) + +block1(v2: i32): + return v2 +} + +function %undefined() { +block0: + trap user0 +} + +function %br_table(i32) { +jt0 = jump_table [block3, block1, block2] + +block0(v0: i32): + br_table v0, block4, jt0 + +block4: + trap oob + +block1: + return +block2: + return +block3: + return +} diff --git a/cranelift/filetests/filetests/wasm/conversions.clif b/cranelift/filetests/filetests/wasm/conversions.clif new file mode 100644 index 0000000000..33602166b4 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/conversions.clif @@ -0,0 +1,202 @@ +; Test code generation for WebAssembly type conversion operators. +test compile + +target x86_64 haswell + +function %i32_wrap_i64(i64) -> i32 { +block0(v0: i64): + v1 = ireduce.i32 v0 + return v1 +} + +function %i64_extend_s_i32(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + return v1 +} + +function %i64_extend_u_i32(i32) -> i64 { +block0(v0: i32): + v1 = uextend.i64 v0 + return v1 +} + +function %i32_trunc_s_f32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +function %i32_trunc_u_f32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +function %i32_trunc_s_f64(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +function %i32_trunc_u_f64(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +function %i64_trunc_s_f32(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +function %i64_trunc_u_f32(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +function %i64_trunc_s_f64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +function %i64_trunc_u_f64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +function %i32_trunc_s_sat_f32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +function %i32_trunc_u_sat_f32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +function %i32_trunc_s_sat_f64(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +function %i32_trunc_u_sat_f64(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +function %i64_trunc_s_sat_f32(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +function %i64_trunc_u_sat_f32(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +function %i64_trunc_s_sat_f64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +function %i64_trunc_u_sat_f64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +function %f32_trunc_f64(f64) -> f32 { +block0(v0: f64): + v1 = fdemote.f32 v0 + return v1 +} + +function %f64_promote_f32(f32) -> f64 { +block0(v0: f32): + v1 = fpromote.f64 v0 + return v1 +} + +function %f32_convert_s_i32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +function %f32_convert_u_i32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +function %f64_convert_s_i32(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +function %f64_convert_u_i32(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +function %f32_convert_s_i64(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +function %f32_convert_u_i64(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +function %f64_convert_s_i64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +function %f64_convert_u_i64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +function %i32_reinterpret_f32(f32) -> i32 { +block0(v0: f32): + v1 = bitcast.i32 v0 + return v1 +} + +function %f32_reinterpret_i32(i32) -> f32 { +block0(v0: i32): + v1 = bitcast.f32 v0 + return v1 +} + +function %i64_reinterpret_f64(f64) -> i64 { +block0(v0: f64): + v1 = bitcast.i64 v0 + return v1 +} + +function %f64_reinterpret_i64(i64) -> f64 { +block0(v0: i64): + v1 = bitcast.f64 v0 + return v1 +} diff --git a/cranelift/filetests/filetests/wasm/f32-arith.clif b/cranelift/filetests/filetests/wasm/f32-arith.clif new file mode 100644 index 0000000000..b7a83f5434 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/f32-arith.clif @@ -0,0 +1,103 @@ +; Test basic code generation for f32 arithmetic WebAssembly instructions. +test compile + +target i686 haswell +target i686 baseline +target x86_64 haswell +target x86_64 baseline + +; Constants. + +function %f32_const() -> f32 { +block0: + v1 = f32const 0x3.0 + return v1 +} + +; Unary operations + +function %f32_abs(f32) -> f32 { +block0(v0: f32): + v1 = fabs v0 + return v1 +} + +function %f32_neg(f32) -> f32 { +block0(v0: f32): + v1 = fneg v0 + return v1 +} + +function %f32_sqrt(f32) -> f32 { +block0(v0: f32): + v1 = sqrt v0 + return v1 +} + +function %f32_ceil(f32) -> f32 { +block0(v0: f32): + v1 = ceil v0 + return v1 +} + +function %f32_floor(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} + +function %f32_trunc(f32) -> f32 { +block0(v0: f32): + v1 = trunc v0 + return v1 +} + +function %f32_nearest (f32) -> f32 { +block0(v0: f32): + v1 = nearest v0 + return v1 +} + +; Binary Operations + +function %f32_add(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fadd v0, v1 + return v2 +} + +function %f32_sub(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fsub v0, v1 + return v2 +} + +function %f32_mul(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmul v0, v1 + return v2 +} + +function %f32_div(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fdiv v0, v1 + return v2 +} + +function %f32_min(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 +} + +function %f32_max(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + return v2 +} + +function %f32_copysign(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fcopysign v0, v1 + return v2 +} diff --git a/cranelift/filetests/filetests/wasm/f32-compares.clif b/cranelift/filetests/filetests/wasm/f32-compares.clif new file mode 100644 index 0000000000..e569a94821 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/f32-compares.clif @@ -0,0 +1,48 @@ +; Test code generation for WebAssembly f32 comparison operators. +test compile + +target i686 haswell + +target x86_64 haswell + +function %f32_eq(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fcmp eq v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_ne(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fcmp ne v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_lt(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fcmp lt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_gt(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fcmp gt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_le(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fcmp le v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f32_ge(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fcmp ge v0, v1 + v3 = bint.i32 v2 + return v3 +} diff --git a/cranelift/filetests/filetests/wasm/f32-memory64.clif b/cranelift/filetests/filetests/wasm/f32-memory64.clif new file mode 100644 index 0000000000..33e3100537 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/f32-memory64.clif @@ -0,0 +1,26 @@ +; Test basic code generation for f32 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +target x86_64 haswell + +function %f32_load(i32, i64 vmctx) -> f32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.f32 v2 + return v3 +} + +function %f32_store(f32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: f32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} diff --git a/cranelift/filetests/filetests/wasm/f64-arith.clif b/cranelift/filetests/filetests/wasm/f64-arith.clif new file mode 100644 index 0000000000..cecd954f90 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/f64-arith.clif @@ -0,0 +1,101 @@ +; Test basic code generation for f64 arithmetic WebAssembly instructions. +test compile + +target x86_64 haswell +target x86_64 baseline + +; Constants. + +function %f64_const() -> f64 { +block0: + v1 = f64const 0x3.0 + return v1 +} + +; Unary operations + +function %f64_abs(f64) -> f64 { +block0(v0: f64): + v1 = fabs v0 + return v1 +} + +function %f64_neg(f64) -> f64 { +block0(v0: f64): + v1 = fneg v0 + return v1 +} + +function %f64_sqrt(f64) -> f64 { +block0(v0: f64): + v1 = sqrt v0 + return v1 +} + +function %f64_ceil(f64) -> f64 { +block0(v0: f64): + v1 = ceil v0 + return v1 +} + +function %f64_floor(f64) -> f64 { +block0(v0: f64): + v1 = floor v0 + return v1 +} + +function %f64_trunc(f64) -> f64 { +block0(v0: f64): + v1 = trunc v0 + return v1 +} + +function %f64_nearest (f64) -> f64 { +block0(v0: f64): + v1 = nearest v0 + return v1 +} + +; Binary Operations + +function %f64_add(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fadd v0, v1 + return v2 +} + +function %f64_sub(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fsub v0, v1 + return v2 +} + +function %f64_mul(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmul v0, v1 + return v2 +} + +function %f64_div(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fdiv v0, v1 + return v2 +} + +function %f64_min(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + return v2 +} + +function %f64_max(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + return v2 +} + +function %f64_copysign(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fcopysign v0, v1 + return v2 +} diff --git a/cranelift/filetests/filetests/wasm/f64-compares.clif b/cranelift/filetests/filetests/wasm/f64-compares.clif new file mode 100644 index 0000000000..b75a7634bf --- /dev/null +++ b/cranelift/filetests/filetests/wasm/f64-compares.clif @@ -0,0 +1,48 @@ +; Test code generation for WebAssembly f64 comparison operators. +test compile + +target i686 haswell + +target x86_64 haswell + +function %f64_eq(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fcmp eq v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_ne(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fcmp ne v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_lt(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fcmp lt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_gt(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fcmp gt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_le(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fcmp le v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %f64_ge(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fcmp ge v0, v1 + v3 = bint.i32 v2 + return v3 +} diff --git a/cranelift/filetests/filetests/wasm/f64-memory64.clif b/cranelift/filetests/filetests/wasm/f64-memory64.clif new file mode 100644 index 0000000000..c0a58de4a1 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/f64-memory64.clif @@ -0,0 +1,26 @@ +; Test basic code generation for f64 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +target x86_64 haswell + +function %f64_load(i32, i64 vmctx) -> f64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.f64 v2 + return v3 +} + +function %f64_store(f64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: f64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} diff --git a/cranelift/filetests/filetests/wasm/i32-arith.clif b/cranelift/filetests/filetests/wasm/i32-arith.clif new file mode 100644 index 0000000000..cb9597741b --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i32-arith.clif @@ -0,0 +1,127 @@ +; Test basic code generation for i32 arithmetic WebAssembly instructions. +test compile + +target i686 haswell +target i686 baseline +target x86_64 haswell +target x86_64 baseline + +; Constants. + +function %i32_const() -> i32 { +block0: + v0 = iconst.i32 0x8765_4321 + return v0 +} + +; Unary operations. + +function %i32_clz(i32) -> i32 { +block0(v0: i32): + v1 = clz v0 + return v1 +} + +function %i32_ctz(i32) -> i32 { +block0(v0: i32): + v1 = ctz v0 + return v1 +} + +function %i32_popcnt(i32) -> i32 { +block0(v0: i32): + v1 = popcnt v0 + return v1 +} + +; Binary operations. + +function %i32_add(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + return v2 +} + +function %i32_sub(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = isub v0, v1 + return v2 +} + +function %i32_mul(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = imul v0, v1 + return v2 +} + +function %i32_div_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv v0, v1 + return v2 +} + +function %i32_div_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv v0, v1 + return v2 +} + +function %i32_rem_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem v0, v1 + return v2 +} + +function %i32_rem_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem v0, v1 + return v2 +} + +function %i32_and(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band v0, v1 + return v2 +} + +function %i32_or(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor v0, v1 + return v2 +} + +function %i32_xor(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor v0, v1 + return v2 +} + +function %i32_shl(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ishl v0, v1 + return v2 +} + +function %i32_shr_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sshr v0, v1 + return v2 +} + +function %i32_shr_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ushr v0, v1 + return v2 +} + +function %i32_rotl(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotl v0, v1 + return v2 +} + +function %i32_rotr(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotr v0, v1 + return v2 +} diff --git a/cranelift/filetests/filetests/wasm/i32-compares.clif b/cranelift/filetests/filetests/wasm/i32-compares.clif new file mode 100644 index 0000000000..f5be0a25c1 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i32-compares.clif @@ -0,0 +1,83 @@ +; Test code generation for WebAssembly i32 comparison operators. +test compile + +target i686 haswell + +target x86_64 haswell + +function %i32_eqz(i32) -> i32 { +block0(v0: i32): + v1 = icmp_imm eq v0, 0 + v2 = bint.i32 v1 + return v2 +} + +function %i32_eq(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_ne(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ne v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_lt_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_lt_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ult v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_gt_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_gt_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ugt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_le_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sle v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_le_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_ge_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sge v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i32_ge_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp uge v0, v1 + v3 = bint.i32 v2 + return v3 +} diff --git a/cranelift/filetests/filetests/wasm/i32-memory64.clif b/cranelift/filetests/filetests/wasm/i32-memory64.clif new file mode 100644 index 0000000000..b1418c5ed1 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i32-memory64.clif @@ -0,0 +1,87 @@ +; Test basic code generation for i32 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +target x86_64 haswell + +function %i32_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.i32 v2 + return v3 +} + +function %i32_store(i32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} + +function %i32_load8_s(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload8.i32 v2 + return v3 +} + +function %i32_load8_u(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload8.i32 v2 + return v3 +} + +function %i32_store8(i32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore8 v0, v3 + return +} + +function %i32_load16_s(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload16.i32 v2 + return v3 +} + +function %i32_load16_u(i32, i64 vmctx) -> i32 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload16.i32 v2 + return v3 +} + +function %i32_store16(i32, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore16 v0, v3 + return +} + diff --git a/cranelift/filetests/filetests/wasm/i64-arith.clif b/cranelift/filetests/filetests/wasm/i64-arith.clif new file mode 100644 index 0000000000..b457f9942d --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i64-arith.clif @@ -0,0 +1,125 @@ +; Test basic code generation for i64 arithmetic WebAssembly instructions. +test compile + +target x86_64 haswell +target x86_64 baseline + +; Constants. + +function %i64_const() -> i64 { +block0: + v0 = iconst.i64 0x8765_4321 + return v0 +} + +; Unary operations. + +function %i64_clz(i64) -> i64 { +block0(v0: i64): + v1 = clz v0 + return v1 +} + +function %i64_ctz(i64) -> i64 { +block0(v0: i64): + v1 = ctz v0 + return v1 +} + +function %i64_popcnt(i64) -> i64 { +block0(v0: i64): + v1 = popcnt v0 + return v1 +} + +; Binary operations. + +function %i64_add(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd v0, v1 + return v2 +} + +function %i64_sub(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = isub v0, v1 + return v2 +} + +function %i64_mul(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = imul v0, v1 + return v2 +} + +function %i32_div_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv v0, v1 + return v2 +} + +function %i32_div_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv v0, v1 + return v2 +} + +function %i32_rem_s(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem v0, v1 + return v2 +} + +function %i32_rem_u(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem v0, v1 + return v2 +} + +function %i64_and(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band v0, v1 + return v2 +} + +function %i64_or(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor v0, v1 + return v2 +} + +function %i64_xor(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor v0, v1 + return v2 +} + +function %i64_shl(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl v0, v1 + return v2 +} + +function %i64_shr_s(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr v0, v1 + return v2 +} + +function %i64_shr_u(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr v0, v1 + return v2 +} + +function %i64_rotl(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotl v0, v1 + return v2 +} + +function %i64_rotr(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotr v0, v1 + return v2 +} diff --git a/cranelift/filetests/filetests/wasm/i64-compares.clif b/cranelift/filetests/filetests/wasm/i64-compares.clif new file mode 100644 index 0000000000..2863efb6c3 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i64-compares.clif @@ -0,0 +1,81 @@ +; Test code generation for WebAssembly i64 comparison operators. +test compile + +target x86_64 haswell + +function %i64_eqz(i64) -> i32 { +block0(v0: i64): + v1 = icmp_imm eq v0, 0 + v2 = bint.i32 v1 + return v2 +} + +function %i64_eq(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_ne(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp ne v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_lt_s(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp slt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_lt_u(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp ult v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_gt_s(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp sgt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_gt_u(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp ugt v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_le_s(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp sle v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_le_u(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp ule v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_ge_s(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp sge v0, v1 + v3 = bint.i32 v2 + return v3 +} + +function %i64_ge_u(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = icmp uge v0, v1 + v3 = bint.i32 v2 + return v3 +} diff --git a/cranelift/filetests/filetests/wasm/i64-memory64.clif b/cranelift/filetests/filetests/wasm/i64-memory64.clif new file mode 100644 index 0000000000..f2b34fc8b0 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/i64-memory64.clif @@ -0,0 +1,116 @@ +; Test basic code generation for i32 memory WebAssembly instructions. +test compile + +; We only test on 64-bit since the heap_addr instructions and vmctx parameters +; explicitly mention the pointer width. +target x86_64 haswell + +function %i64_load(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = load.i64 v2 + return v3 +} + +function %i64_store(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + store v0, v3 + return +} + +function %i64_load8_s(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload8.i64 v2 + return v3 +} + +function %i64_load8_u(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload8.i64 v2 + return v3 +} + +function %i64_store8(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore8 v0, v3 + return +} + +function %i64_load16_s(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload16.i64 v2 + return v3 +} + +function %i64_load16_u(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload16.i64 v2 + return v3 +} + +function %i64_store16(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore16 v0, v3 + return +} + +function %i64_load32_s(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = sload32.i64 v2 + return v3 +} + +function %i64_load32_u(i32, i64 vmctx) -> i64 { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i32, v1: i64): + v2 = heap_addr.i64 heap0, v0, 1 + v3 = uload32.i64 v2 + return v3 +} + +function %i64_store32(i64, i32, i64 vmctx) { + gv0 = vmctx + heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 + +block0(v0: i64, v1: i32, v2: i64): + v3 = heap_addr.i64 heap0, v1, 1 + istore32 v0, v3 + return +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif new file mode 100644 index 0000000000..7a4d4d02b0 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-b1.clif @@ -0,0 +1,68 @@ +test compile +target x86_64 haswell + +;; `b1` return values need to be legalized into bytes so that they can be stored +;; in memory. + +function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 { +;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast { + +block0(v0: b1, v1: b1, v2: b1, v3: b1): +; check: block0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]): + + return v0, v1, v2, v3 + ; check: v5 = bint.i8 v0 + ; nextln: v9 = uextend.i32 v5 + ; nextln: istore8 notrap aligned v9, v4 + ; nextln: v6 = bint.i8 v1 + ; nextln: v10 = uextend.i32 v6 + ; nextln: istore8 notrap aligned v10, v4+1 + ; nextln: v7 = bint.i8 v2 + ; nextln: v11 = uextend.i32 v7 + ; nextln: istore8 notrap aligned v11, v4+2 + ; nextln: v8 = bint.i8 v3 + ; nextln: v12 = uextend.i32 v8 + ; nextln: istore8 notrap aligned v12, v4+3 +} + +function %call_4_b1s() { +; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast { +; nextln: ss0 = sret_slot 4, offset -28 + + fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 + ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast + +block0: +; check: block0(v26: i64 [%rbp], v27: i64 [%rbx]): + + v0 = bconst.b1 true + v1 = bconst.b1 false + v2 = bconst.b1 true + v3 = bconst.b1 false + + ; check: v8 = stack_addr.i64 ss0 + v4, v5, v6, v7 = call fn0(v0, v1, v2, v3) + ; check: v9 = call fn0(v0, v1, v2, v3, v8) + ; nextln: v22 = uload8.i32 notrap aligned v9 + ; nextln: v10 = ireduce.i8 v22 + ; nextln: v11 = raw_bitcast.b8 v10 + ; nextln: v12 = breduce.b1 v11 + ; nextln: v4 -> v12 + ; nextln: v23 = uload8.i32 notrap aligned v9+1 + ; nextln: v13 = ireduce.i8 v23 + ; nextln: v14 = raw_bitcast.b8 v13 + ; nextln: v15 = breduce.b1 v14 + ; nextln: v5 -> v15 + ; nextln: v24 = uload8.i32 notrap aligned v9+2 + ; nextln: v16 = ireduce.i8 v24 + ; nextln: v17 = raw_bitcast.b8 v16 + ; nextln: v18 = breduce.b1 v17 + ; nextln: v6 -> v18 + ; nextln: v25 = uload8.i32 notrap aligned v9+3 + ; nextln: v19 = ireduce.i8 v25 + ; nextln: v20 = raw_bitcast.b8 v19 + ; nextln: v21 = breduce.b1 v20 + ; nextln: v7 -> v21 + + return +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif new file mode 100644 index 0000000000..6f5afd4700 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif @@ -0,0 +1,26 @@ +test legalizer +target x86_64 haswell + +;; Indirect calls with many returns. + +function %call_indirect_many_rets(i64) { + ; check: ss0 = sret_slot 32 + + sig0 = () -> i64, i64, i64, i64 + ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast + +block0(v0: i64): + v1, v2, v3, v4 = call_indirect sig0, v0() + ; check: v5 = stack_addr.i64 ss0 + ; nextln: v6 = call_indirect sig0, v0(v5) + ; nextln: v7 = load.i64 notrap aligned v6 + ; nextln: v1 -> v7 + ; nextln: v8 = load.i64 notrap aligned v6+8 + ; nextln: v2 -> v8 + ; nextln: v9 = load.i64 notrap aligned v6+16 + ; nextln: v3 -> v9 + ; nextln: v10 = load.i64 notrap aligned v6+24 + ; nextln: v4 -> v10 + + return +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-f32.clif b/cranelift/filetests/filetests/wasm/multi-val-f32.clif new file mode 100644 index 0000000000..b69b71e047 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-f32.clif @@ -0,0 +1,44 @@ +test compile +target x86_64 haswell + +;; Returning many f32s + +function %return_2_f32s() -> f32, f32 { +block0: + v0 = f32const 0x0.0 + v1 = f32const 0x1.0 + return v0, v1 +} + +function %return_3_f32s() -> f32, f32, f32 { +block0: + v0 = f32const 0x0.0 + v1 = f32const 0x1.0 + v2 = f32const 0x2.0 + return v0, v1, v2 +} + +function %return_4_f32s() -> f32, f32, f32, f32 { +block0: + v0 = f32const 0x0.0 + v1 = f32const 0x1.0 + v2 = f32const 0x2.0 + v3 = f32const 0x3.0 + return v0, v1, v2, v3 +} + +;; Calling functions that return many f32s + +function %call() -> f32 { + fn0 = %a() -> f32, f32 + fn1 = %b(f32, f32) -> f32, f32, f32 + fn2 = %c(f32, f32, f32) -> f32, f32, f32, f32 +block0: + v0, v1 = call fn0() + v2, v3, v4 = call fn1(v0, v1) + v5, v6, v7, v8 = call fn2(v2, v3, v4) + v9 = fadd v5, v6 + v10 = fadd v7, v8 + v11 = fadd v9, v10 + return v11 +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-f64.clif b/cranelift/filetests/filetests/wasm/multi-val-f64.clif new file mode 100644 index 0000000000..afb6585efc --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-f64.clif @@ -0,0 +1,44 @@ +test compile +target x86_64 haswell + +;; Returning many f64s + +function %return_2_f64s() -> f64, f64 { +block0: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + return v0, v1 +} + +function %return_3_f64s() -> f64, f64, f64 { +block0: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + v2 = f64const 0x2.0 + return v0, v1, v2 +} + +function %return_4_f64s() -> f64, f64, f64, f64 { +block0: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + v2 = f64const 0x2.0 + v3 = f64const 0x3.0 + return v0, v1, v2, v3 +} + +;; Calling functions that return many f64s + +function %call() -> f64 { + fn0 = %a() -> f64, f64 + fn1 = %b(f64, f64) -> f64, f64, f64 + fn2 = %c(f64, f64, f64) -> f64, f64, f64, f64 +block0: + v0, v1 = call fn0() + v2, v3, v4 = call fn1(v0, v1) + v5, v6, v7, v8 = call fn2(v2, v3, v4) + v9 = fadd v5, v6 + v10 = fadd v7, v8 + v11 = fadd v9, v10 + return v11 +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-i32.clif b/cranelift/filetests/filetests/wasm/multi-val-i32.clif new file mode 100644 index 0000000000..035cc2e332 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-i32.clif @@ -0,0 +1,44 @@ +test compile +target x86_64 haswell + +;; Returning many i32s + +function %return_2_i32s() -> i32, i32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + return v0, v1 +} + +function %return_3_i32s() -> i32, i32, i32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + return v0, v1, v2 +} + +function %return_4_i32s() -> i32, i32, i32, i32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + return v0, v1, v2, v3 +} + +;; Calling functions that return many i32s + +function %call() -> i32 { + fn0 = %a() -> i32, i32 + fn1 = %b(i32, i32) -> i32, i32, i32 + fn2 = %c(i32, i32, i32) -> i32, i32, i32, i32 +block0: + v0, v1 = call fn0() + v2, v3, v4 = call fn1(v0, v1) + v5, v6, v7, v8 = call fn2(v2, v3, v4) + v9 = iadd v5, v6 + v10 = iadd v7, v8 + v11 = iadd v9, v10 + return v11 +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-i64.clif b/cranelift/filetests/filetests/wasm/multi-val-i64.clif new file mode 100644 index 0000000000..bacaf8240f --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-i64.clif @@ -0,0 +1,44 @@ +test compile +target x86_64 haswell + +;; Returning many i64s + +function %return_2_i64s() -> i64, i64 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i64 1 + return v0, v1 +} + +function %return_3_i64s() -> i64, i64, i64 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i64 1 + v2 = iconst.i64 2 + return v0, v1, v2 +} + +function %return_4_i64s() -> i64, i64, i64, i64 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i64 1 + v2 = iconst.i64 2 + v3 = iconst.i64 3 + return v0, v1, v2, v3 +} + +;; Calling functions that return many i64s + +function %call() -> i64 { + fn0 = %a() -> i64, i64 + fn1 = %b(i64, i64) -> i64, i64, i64 + fn2 = %c(i64, i64, i64) -> i64, i64, i64, i64 +block0: + v0, v1 = call fn0() + v2, v3, v4 = call fn1(v0, v1) + v5, v6, v7, v8 = call fn2(v2, v3, v4) + v9 = iadd v5, v6 + v10 = iadd v7, v8 + v11 = iadd v9, v10 + return v11 +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-mixed.clif b/cranelift/filetests/filetests/wasm/multi-val-mixed.clif new file mode 100644 index 0000000000..e7289332c7 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-mixed.clif @@ -0,0 +1,2098 @@ +test compile +target x86_64 haswell + +;; Returning many mixed values. +;; +;; This test was generated programmatically with this python script: +;; +;; ``` +;; from itertools import permutations +;; +;; def make_val(i, r): +;; val = None +;; op = None +;; if r == "f32": +;; val = "0x0.0" +;; op = "f32const" +;; elif r == "f64": +;; val = "0x0.0" +;; op = "f64const" +;; elif r == "i32": +;; val = "0" +;; op = "iconst.i32" +;; elif r == "i64": +;; val = "0" +;; op = "iconst.i64" +;; elif r == "b1": +;; val = "true" +;; op = "bconst.b1" +;; else: +;; raise Exception("bad r = " + str(r)) +;; return " v" + str(i) + " = " + op + " " + val +;; +;; def make_returner(results): +;; results = list(results) +;; head = "function %return_" + "_".join(results) + "() -> " + ", ".join(results) + " {\n" +;; block = "block0:\n" +;; vals = [make_val(i, r) for i, r in enumerate(results)] +;; ret = " return " + ", ".join(("v" + str(i) for i in range(0, len(results)))) +;; return head + block + "\n".join(vals) + "\n" + ret + "\n}\n" +;; +;; def make_caller(results): +;; results = list(results) +;; head = "function %call_" + "_".join(results) + "() {\n" +;; fn_decl = " fn0 = %foo() -> " + ",".join(results) + "\n" +;; block = "block0:\n" +;; ret_vars = ["v" + str(i) for i, r in enumerate(results)] +;; call = " " + ",".join(ret_vars) + " = call fn0()\n" +;; ret = " return\n" +;; tail = "}\n" +;; return head + fn_decl + block + call + ret + tail +;; +;; for results in permutations(["i32", "i64", "f32", "f64", "b1"]): +;; print make_returner(results) +;; print make_caller(results) +;; ``` +;; +;; If you're modifying this test, it is likely easier to modify the script and +;; regenerate the test. + +function %return_i32_i64_f32_f64_b1() -> i32, i64, f32, f64, b1 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 0 + v2 = f32const 0x0.0 + v3 = f64const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i32_i64_f32_f64_b1() { + fn0 = %foo() -> i32,i64,f32,f64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_i64_f32_b1_f64() -> i32, i64, f32, b1, f64 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 0 + v2 = f32const 0x0.0 + v3 = bconst.b1 true + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_i64_f32_b1_f64() { + fn0 = %foo() -> i32,i64,f32,b1,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_i64_f64_f32_b1() -> i32, i64, f64, f32, b1 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 0 + v2 = f64const 0x0.0 + v3 = f32const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i32_i64_f64_f32_b1() { + fn0 = %foo() -> i32,i64,f64,f32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_i64_f64_b1_f32() -> i32, i64, f64, b1, f32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 0 + v2 = f64const 0x0.0 + v3 = bconst.b1 true + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_i64_f64_b1_f32() { + fn0 = %foo() -> i32,i64,f64,b1,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_i64_b1_f32_f64() -> i32, i64, b1, f32, f64 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 0 + v2 = bconst.b1 true + v3 = f32const 0x0.0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_i64_b1_f32_f64() { + fn0 = %foo() -> i32,i64,b1,f32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_i64_b1_f64_f32() -> i32, i64, b1, f64, f32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 0 + v2 = bconst.b1 true + v3 = f64const 0x0.0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_i64_b1_f64_f32() { + fn0 = %foo() -> i32,i64,b1,f64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f32_i64_f64_b1() -> i32, f32, i64, f64, b1 { +block0: + v0 = iconst.i32 0 + v1 = f32const 0x0.0 + v2 = iconst.i64 0 + v3 = f64const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i32_f32_i64_f64_b1() { + fn0 = %foo() -> i32,f32,i64,f64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f32_i64_b1_f64() -> i32, f32, i64, b1, f64 { +block0: + v0 = iconst.i32 0 + v1 = f32const 0x0.0 + v2 = iconst.i64 0 + v3 = bconst.b1 true + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f32_i64_b1_f64() { + fn0 = %foo() -> i32,f32,i64,b1,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f32_f64_i64_b1() -> i32, f32, f64, i64, b1 { +block0: + v0 = iconst.i32 0 + v1 = f32const 0x0.0 + v2 = f64const 0x0.0 + v3 = iconst.i64 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i32_f32_f64_i64_b1() { + fn0 = %foo() -> i32,f32,f64,i64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f32_f64_b1_i64() -> i32, f32, f64, b1, i64 { +block0: + v0 = iconst.i32 0 + v1 = f32const 0x0.0 + v2 = f64const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f32_f64_b1_i64() { + fn0 = %foo() -> i32,f32,f64,b1,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f32_b1_i64_f64() -> i32, f32, b1, i64, f64 { +block0: + v0 = iconst.i32 0 + v1 = f32const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i64 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f32_b1_i64_f64() { + fn0 = %foo() -> i32,f32,b1,i64,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f32_b1_f64_i64() -> i32, f32, b1, f64, i64 { +block0: + v0 = iconst.i32 0 + v1 = f32const 0x0.0 + v2 = bconst.b1 true + v3 = f64const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f32_b1_f64_i64() { + fn0 = %foo() -> i32,f32,b1,f64,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f64_i64_f32_b1() -> i32, f64, i64, f32, b1 { +block0: + v0 = iconst.i32 0 + v1 = f64const 0x0.0 + v2 = iconst.i64 0 + v3 = f32const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i32_f64_i64_f32_b1() { + fn0 = %foo() -> i32,f64,i64,f32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f64_i64_b1_f32() -> i32, f64, i64, b1, f32 { +block0: + v0 = iconst.i32 0 + v1 = f64const 0x0.0 + v2 = iconst.i64 0 + v3 = bconst.b1 true + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f64_i64_b1_f32() { + fn0 = %foo() -> i32,f64,i64,b1,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f64_f32_i64_b1() -> i32, f64, f32, i64, b1 { +block0: + v0 = iconst.i32 0 + v1 = f64const 0x0.0 + v2 = f32const 0x0.0 + v3 = iconst.i64 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i32_f64_f32_i64_b1() { + fn0 = %foo() -> i32,f64,f32,i64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f64_f32_b1_i64() -> i32, f64, f32, b1, i64 { +block0: + v0 = iconst.i32 0 + v1 = f64const 0x0.0 + v2 = f32const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f64_f32_b1_i64() { + fn0 = %foo() -> i32,f64,f32,b1,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f64_b1_i64_f32() -> i32, f64, b1, i64, f32 { +block0: + v0 = iconst.i32 0 + v1 = f64const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i64 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f64_b1_i64_f32() { + fn0 = %foo() -> i32,f64,b1,i64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_f64_b1_f32_i64() -> i32, f64, b1, f32, i64 { +block0: + v0 = iconst.i32 0 + v1 = f64const 0x0.0 + v2 = bconst.b1 true + v3 = f32const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_f64_b1_f32_i64() { + fn0 = %foo() -> i32,f64,b1,f32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_b1_i64_f32_f64() -> i32, b1, i64, f32, f64 { +block0: + v0 = iconst.i32 0 + v1 = bconst.b1 true + v2 = iconst.i64 0 + v3 = f32const 0x0.0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_b1_i64_f32_f64() { + fn0 = %foo() -> i32,b1,i64,f32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_b1_i64_f64_f32() -> i32, b1, i64, f64, f32 { +block0: + v0 = iconst.i32 0 + v1 = bconst.b1 true + v2 = iconst.i64 0 + v3 = f64const 0x0.0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_b1_i64_f64_f32() { + fn0 = %foo() -> i32,b1,i64,f64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_b1_f32_i64_f64() -> i32, b1, f32, i64, f64 { +block0: + v0 = iconst.i32 0 + v1 = bconst.b1 true + v2 = f32const 0x0.0 + v3 = iconst.i64 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_b1_f32_i64_f64() { + fn0 = %foo() -> i32,b1,f32,i64,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_b1_f32_f64_i64() -> i32, b1, f32, f64, i64 { +block0: + v0 = iconst.i32 0 + v1 = bconst.b1 true + v2 = f32const 0x0.0 + v3 = f64const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_b1_f32_f64_i64() { + fn0 = %foo() -> i32,b1,f32,f64,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_b1_f64_i64_f32() -> i32, b1, f64, i64, f32 { +block0: + v0 = iconst.i32 0 + v1 = bconst.b1 true + v2 = f64const 0x0.0 + v3 = iconst.i64 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_b1_f64_i64_f32() { + fn0 = %foo() -> i32,b1,f64,i64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i32_b1_f64_f32_i64() -> i32, b1, f64, f32, i64 { +block0: + v0 = iconst.i32 0 + v1 = bconst.b1 true + v2 = f64const 0x0.0 + v3 = f32const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_i32_b1_f64_f32_i64() { + fn0 = %foo() -> i32,b1,f64,f32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_i32_f32_f64_b1() -> i64, i32, f32, f64, b1 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 0 + v2 = f32const 0x0.0 + v3 = f64const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i64_i32_f32_f64_b1() { + fn0 = %foo() -> i64,i32,f32,f64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_i32_f32_b1_f64() -> i64, i32, f32, b1, f64 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 0 + v2 = f32const 0x0.0 + v3 = bconst.b1 true + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_i32_f32_b1_f64() { + fn0 = %foo() -> i64,i32,f32,b1,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_i32_f64_f32_b1() -> i64, i32, f64, f32, b1 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 0 + v2 = f64const 0x0.0 + v3 = f32const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i64_i32_f64_f32_b1() { + fn0 = %foo() -> i64,i32,f64,f32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_i32_f64_b1_f32() -> i64, i32, f64, b1, f32 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 0 + v2 = f64const 0x0.0 + v3 = bconst.b1 true + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_i32_f64_b1_f32() { + fn0 = %foo() -> i64,i32,f64,b1,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_i32_b1_f32_f64() -> i64, i32, b1, f32, f64 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 0 + v2 = bconst.b1 true + v3 = f32const 0x0.0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_i32_b1_f32_f64() { + fn0 = %foo() -> i64,i32,b1,f32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_i32_b1_f64_f32() -> i64, i32, b1, f64, f32 { +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 0 + v2 = bconst.b1 true + v3 = f64const 0x0.0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_i32_b1_f64_f32() { + fn0 = %foo() -> i64,i32,b1,f64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f32_i32_f64_b1() -> i64, f32, i32, f64, b1 { +block0: + v0 = iconst.i64 0 + v1 = f32const 0x0.0 + v2 = iconst.i32 0 + v3 = f64const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i64_f32_i32_f64_b1() { + fn0 = %foo() -> i64,f32,i32,f64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f32_i32_b1_f64() -> i64, f32, i32, b1, f64 { +block0: + v0 = iconst.i64 0 + v1 = f32const 0x0.0 + v2 = iconst.i32 0 + v3 = bconst.b1 true + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f32_i32_b1_f64() { + fn0 = %foo() -> i64,f32,i32,b1,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f32_f64_i32_b1() -> i64, f32, f64, i32, b1 { +block0: + v0 = iconst.i64 0 + v1 = f32const 0x0.0 + v2 = f64const 0x0.0 + v3 = iconst.i32 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i64_f32_f64_i32_b1() { + fn0 = %foo() -> i64,f32,f64,i32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f32_f64_b1_i32() -> i64, f32, f64, b1, i32 { +block0: + v0 = iconst.i64 0 + v1 = f32const 0x0.0 + v2 = f64const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f32_f64_b1_i32() { + fn0 = %foo() -> i64,f32,f64,b1,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f32_b1_i32_f64() -> i64, f32, b1, i32, f64 { +block0: + v0 = iconst.i64 0 + v1 = f32const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i32 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f32_b1_i32_f64() { + fn0 = %foo() -> i64,f32,b1,i32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f32_b1_f64_i32() -> i64, f32, b1, f64, i32 { +block0: + v0 = iconst.i64 0 + v1 = f32const 0x0.0 + v2 = bconst.b1 true + v3 = f64const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f32_b1_f64_i32() { + fn0 = %foo() -> i64,f32,b1,f64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f64_i32_f32_b1() -> i64, f64, i32, f32, b1 { +block0: + v0 = iconst.i64 0 + v1 = f64const 0x0.0 + v2 = iconst.i32 0 + v3 = f32const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i64_f64_i32_f32_b1() { + fn0 = %foo() -> i64,f64,i32,f32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f64_i32_b1_f32() -> i64, f64, i32, b1, f32 { +block0: + v0 = iconst.i64 0 + v1 = f64const 0x0.0 + v2 = iconst.i32 0 + v3 = bconst.b1 true + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f64_i32_b1_f32() { + fn0 = %foo() -> i64,f64,i32,b1,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f64_f32_i32_b1() -> i64, f64, f32, i32, b1 { +block0: + v0 = iconst.i64 0 + v1 = f64const 0x0.0 + v2 = f32const 0x0.0 + v3 = iconst.i32 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_i64_f64_f32_i32_b1() { + fn0 = %foo() -> i64,f64,f32,i32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f64_f32_b1_i32() -> i64, f64, f32, b1, i32 { +block0: + v0 = iconst.i64 0 + v1 = f64const 0x0.0 + v2 = f32const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f64_f32_b1_i32() { + fn0 = %foo() -> i64,f64,f32,b1,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f64_b1_i32_f32() -> i64, f64, b1, i32, f32 { +block0: + v0 = iconst.i64 0 + v1 = f64const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i32 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f64_b1_i32_f32() { + fn0 = %foo() -> i64,f64,b1,i32,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_f64_b1_f32_i32() -> i64, f64, b1, f32, i32 { +block0: + v0 = iconst.i64 0 + v1 = f64const 0x0.0 + v2 = bconst.b1 true + v3 = f32const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_f64_b1_f32_i32() { + fn0 = %foo() -> i64,f64,b1,f32,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_b1_i32_f32_f64() -> i64, b1, i32, f32, f64 { +block0: + v0 = iconst.i64 0 + v1 = bconst.b1 true + v2 = iconst.i32 0 + v3 = f32const 0x0.0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_b1_i32_f32_f64() { + fn0 = %foo() -> i64,b1,i32,f32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_b1_i32_f64_f32() -> i64, b1, i32, f64, f32 { +block0: + v0 = iconst.i64 0 + v1 = bconst.b1 true + v2 = iconst.i32 0 + v3 = f64const 0x0.0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_b1_i32_f64_f32() { + fn0 = %foo() -> i64,b1,i32,f64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_b1_f32_i32_f64() -> i64, b1, f32, i32, f64 { +block0: + v0 = iconst.i64 0 + v1 = bconst.b1 true + v2 = f32const 0x0.0 + v3 = iconst.i32 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_b1_f32_i32_f64() { + fn0 = %foo() -> i64,b1,f32,i32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_b1_f32_f64_i32() -> i64, b1, f32, f64, i32 { +block0: + v0 = iconst.i64 0 + v1 = bconst.b1 true + v2 = f32const 0x0.0 + v3 = f64const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_b1_f32_f64_i32() { + fn0 = %foo() -> i64,b1,f32,f64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_b1_f64_i32_f32() -> i64, b1, f64, i32, f32 { +block0: + v0 = iconst.i64 0 + v1 = bconst.b1 true + v2 = f64const 0x0.0 + v3 = iconst.i32 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_b1_f64_i32_f32() { + fn0 = %foo() -> i64,b1,f64,i32,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_i64_b1_f64_f32_i32() -> i64, b1, f64, f32, i32 { +block0: + v0 = iconst.i64 0 + v1 = bconst.b1 true + v2 = f64const 0x0.0 + v3 = f32const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_i64_b1_f64_f32_i32() { + fn0 = %foo() -> i64,b1,f64,f32,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i32_i64_f64_b1() -> f32, i32, i64, f64, b1 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i32 0 + v2 = iconst.i64 0 + v3 = f64const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f32_i32_i64_f64_b1() { + fn0 = %foo() -> f32,i32,i64,f64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i32_i64_b1_f64() -> f32, i32, i64, b1, f64 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i32 0 + v2 = iconst.i64 0 + v3 = bconst.b1 true + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i32_i64_b1_f64() { + fn0 = %foo() -> f32,i32,i64,b1,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i32_f64_i64_b1() -> f32, i32, f64, i64, b1 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i32 0 + v2 = f64const 0x0.0 + v3 = iconst.i64 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f32_i32_f64_i64_b1() { + fn0 = %foo() -> f32,i32,f64,i64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i32_f64_b1_i64() -> f32, i32, f64, b1, i64 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i32 0 + v2 = f64const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i32_f64_b1_i64() { + fn0 = %foo() -> f32,i32,f64,b1,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i32_b1_i64_f64() -> f32, i32, b1, i64, f64 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i32 0 + v2 = bconst.b1 true + v3 = iconst.i64 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i32_b1_i64_f64() { + fn0 = %foo() -> f32,i32,b1,i64,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i32_b1_f64_i64() -> f32, i32, b1, f64, i64 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i32 0 + v2 = bconst.b1 true + v3 = f64const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i32_b1_f64_i64() { + fn0 = %foo() -> f32,i32,b1,f64,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i64_i32_f64_b1() -> f32, i64, i32, f64, b1 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i64 0 + v2 = iconst.i32 0 + v3 = f64const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f32_i64_i32_f64_b1() { + fn0 = %foo() -> f32,i64,i32,f64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i64_i32_b1_f64() -> f32, i64, i32, b1, f64 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i64 0 + v2 = iconst.i32 0 + v3 = bconst.b1 true + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i64_i32_b1_f64() { + fn0 = %foo() -> f32,i64,i32,b1,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i64_f64_i32_b1() -> f32, i64, f64, i32, b1 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i64 0 + v2 = f64const 0x0.0 + v3 = iconst.i32 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f32_i64_f64_i32_b1() { + fn0 = %foo() -> f32,i64,f64,i32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i64_f64_b1_i32() -> f32, i64, f64, b1, i32 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i64 0 + v2 = f64const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i64_f64_b1_i32() { + fn0 = %foo() -> f32,i64,f64,b1,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i64_b1_i32_f64() -> f32, i64, b1, i32, f64 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i64 0 + v2 = bconst.b1 true + v3 = iconst.i32 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i64_b1_i32_f64() { + fn0 = %foo() -> f32,i64,b1,i32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_i64_b1_f64_i32() -> f32, i64, b1, f64, i32 { +block0: + v0 = f32const 0x0.0 + v1 = iconst.i64 0 + v2 = bconst.b1 true + v3 = f64const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_i64_b1_f64_i32() { + fn0 = %foo() -> f32,i64,b1,f64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_f64_i32_i64_b1() -> f32, f64, i32, i64, b1 { +block0: + v0 = f32const 0x0.0 + v1 = f64const 0x0.0 + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f32_f64_i32_i64_b1() { + fn0 = %foo() -> f32,f64,i32,i64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_f64_i32_b1_i64() -> f32, f64, i32, b1, i64 { +block0: + v0 = f32const 0x0.0 + v1 = f64const 0x0.0 + v2 = iconst.i32 0 + v3 = bconst.b1 true + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_f64_i32_b1_i64() { + fn0 = %foo() -> f32,f64,i32,b1,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_f64_i64_i32_b1() -> f32, f64, i64, i32, b1 { +block0: + v0 = f32const 0x0.0 + v1 = f64const 0x0.0 + v2 = iconst.i64 0 + v3 = iconst.i32 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f32_f64_i64_i32_b1() { + fn0 = %foo() -> f32,f64,i64,i32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_f64_i64_b1_i32() -> f32, f64, i64, b1, i32 { +block0: + v0 = f32const 0x0.0 + v1 = f64const 0x0.0 + v2 = iconst.i64 0 + v3 = bconst.b1 true + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_f64_i64_b1_i32() { + fn0 = %foo() -> f32,f64,i64,b1,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_f64_b1_i32_i64() -> f32, f64, b1, i32, i64 { +block0: + v0 = f32const 0x0.0 + v1 = f64const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i32 0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_f64_b1_i32_i64() { + fn0 = %foo() -> f32,f64,b1,i32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_f64_b1_i64_i32() -> f32, f64, b1, i64, i32 { +block0: + v0 = f32const 0x0.0 + v1 = f64const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i64 0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_f64_b1_i64_i32() { + fn0 = %foo() -> f32,f64,b1,i64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_b1_i32_i64_f64() -> f32, b1, i32, i64, f64 { +block0: + v0 = f32const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_b1_i32_i64_f64() { + fn0 = %foo() -> f32,b1,i32,i64,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_b1_i32_f64_i64() -> f32, b1, i32, f64, i64 { +block0: + v0 = f32const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i32 0 + v3 = f64const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_b1_i32_f64_i64() { + fn0 = %foo() -> f32,b1,i32,f64,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_b1_i64_i32_f64() -> f32, b1, i64, i32, f64 { +block0: + v0 = f32const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i64 0 + v3 = iconst.i32 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_b1_i64_i32_f64() { + fn0 = %foo() -> f32,b1,i64,i32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_b1_i64_f64_i32() -> f32, b1, i64, f64, i32 { +block0: + v0 = f32const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i64 0 + v3 = f64const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_b1_i64_f64_i32() { + fn0 = %foo() -> f32,b1,i64,f64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_b1_f64_i32_i64() -> f32, b1, f64, i32, i64 { +block0: + v0 = f32const 0x0.0 + v1 = bconst.b1 true + v2 = f64const 0x0.0 + v3 = iconst.i32 0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_b1_f64_i32_i64() { + fn0 = %foo() -> f32,b1,f64,i32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f32_b1_f64_i64_i32() -> f32, b1, f64, i64, i32 { +block0: + v0 = f32const 0x0.0 + v1 = bconst.b1 true + v2 = f64const 0x0.0 + v3 = iconst.i64 0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f32_b1_f64_i64_i32() { + fn0 = %foo() -> f32,b1,f64,i64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i32_i64_f32_b1() -> f64, i32, i64, f32, b1 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i32 0 + v2 = iconst.i64 0 + v3 = f32const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f64_i32_i64_f32_b1() { + fn0 = %foo() -> f64,i32,i64,f32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i32_i64_b1_f32() -> f64, i32, i64, b1, f32 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i32 0 + v2 = iconst.i64 0 + v3 = bconst.b1 true + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i32_i64_b1_f32() { + fn0 = %foo() -> f64,i32,i64,b1,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i32_f32_i64_b1() -> f64, i32, f32, i64, b1 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i32 0 + v2 = f32const 0x0.0 + v3 = iconst.i64 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f64_i32_f32_i64_b1() { + fn0 = %foo() -> f64,i32,f32,i64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i32_f32_b1_i64() -> f64, i32, f32, b1, i64 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i32 0 + v2 = f32const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i32_f32_b1_i64() { + fn0 = %foo() -> f64,i32,f32,b1,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i32_b1_i64_f32() -> f64, i32, b1, i64, f32 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i32 0 + v2 = bconst.b1 true + v3 = iconst.i64 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i32_b1_i64_f32() { + fn0 = %foo() -> f64,i32,b1,i64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i32_b1_f32_i64() -> f64, i32, b1, f32, i64 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i32 0 + v2 = bconst.b1 true + v3 = f32const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i32_b1_f32_i64() { + fn0 = %foo() -> f64,i32,b1,f32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i64_i32_f32_b1() -> f64, i64, i32, f32, b1 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i64 0 + v2 = iconst.i32 0 + v3 = f32const 0x0.0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f64_i64_i32_f32_b1() { + fn0 = %foo() -> f64,i64,i32,f32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i64_i32_b1_f32() -> f64, i64, i32, b1, f32 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i64 0 + v2 = iconst.i32 0 + v3 = bconst.b1 true + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i64_i32_b1_f32() { + fn0 = %foo() -> f64,i64,i32,b1,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i64_f32_i32_b1() -> f64, i64, f32, i32, b1 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i64 0 + v2 = f32const 0x0.0 + v3 = iconst.i32 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f64_i64_f32_i32_b1() { + fn0 = %foo() -> f64,i64,f32,i32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i64_f32_b1_i32() -> f64, i64, f32, b1, i32 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i64 0 + v2 = f32const 0x0.0 + v3 = bconst.b1 true + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i64_f32_b1_i32() { + fn0 = %foo() -> f64,i64,f32,b1,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i64_b1_i32_f32() -> f64, i64, b1, i32, f32 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i64 0 + v2 = bconst.b1 true + v3 = iconst.i32 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i64_b1_i32_f32() { + fn0 = %foo() -> f64,i64,b1,i32,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_i64_b1_f32_i32() -> f64, i64, b1, f32, i32 { +block0: + v0 = f64const 0x0.0 + v1 = iconst.i64 0 + v2 = bconst.b1 true + v3 = f32const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_i64_b1_f32_i32() { + fn0 = %foo() -> f64,i64,b1,f32,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_f32_i32_i64_b1() -> f64, f32, i32, i64, b1 { +block0: + v0 = f64const 0x0.0 + v1 = f32const 0x0.0 + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f64_f32_i32_i64_b1() { + fn0 = %foo() -> f64,f32,i32,i64,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_f32_i32_b1_i64() -> f64, f32, i32, b1, i64 { +block0: + v0 = f64const 0x0.0 + v1 = f32const 0x0.0 + v2 = iconst.i32 0 + v3 = bconst.b1 true + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_f32_i32_b1_i64() { + fn0 = %foo() -> f64,f32,i32,b1,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_f32_i64_i32_b1() -> f64, f32, i64, i32, b1 { +block0: + v0 = f64const 0x0.0 + v1 = f32const 0x0.0 + v2 = iconst.i64 0 + v3 = iconst.i32 0 + v4 = bconst.b1 true + return v0, v1, v2, v3, v4 +} + +function %call_f64_f32_i64_i32_b1() { + fn0 = %foo() -> f64,f32,i64,i32,b1 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_f32_i64_b1_i32() -> f64, f32, i64, b1, i32 { +block0: + v0 = f64const 0x0.0 + v1 = f32const 0x0.0 + v2 = iconst.i64 0 + v3 = bconst.b1 true + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_f32_i64_b1_i32() { + fn0 = %foo() -> f64,f32,i64,b1,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_f32_b1_i32_i64() -> f64, f32, b1, i32, i64 { +block0: + v0 = f64const 0x0.0 + v1 = f32const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i32 0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_f32_b1_i32_i64() { + fn0 = %foo() -> f64,f32,b1,i32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_f32_b1_i64_i32() -> f64, f32, b1, i64, i32 { +block0: + v0 = f64const 0x0.0 + v1 = f32const 0x0.0 + v2 = bconst.b1 true + v3 = iconst.i64 0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_f32_b1_i64_i32() { + fn0 = %foo() -> f64,f32,b1,i64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_b1_i32_i64_f32() -> f64, b1, i32, i64, f32 { +block0: + v0 = f64const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_b1_i32_i64_f32() { + fn0 = %foo() -> f64,b1,i32,i64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_b1_i32_f32_i64() -> f64, b1, i32, f32, i64 { +block0: + v0 = f64const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i32 0 + v3 = f32const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_b1_i32_f32_i64() { + fn0 = %foo() -> f64,b1,i32,f32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_b1_i64_i32_f32() -> f64, b1, i64, i32, f32 { +block0: + v0 = f64const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i64 0 + v3 = iconst.i32 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_b1_i64_i32_f32() { + fn0 = %foo() -> f64,b1,i64,i32,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_b1_i64_f32_i32() -> f64, b1, i64, f32, i32 { +block0: + v0 = f64const 0x0.0 + v1 = bconst.b1 true + v2 = iconst.i64 0 + v3 = f32const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_b1_i64_f32_i32() { + fn0 = %foo() -> f64,b1,i64,f32,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_b1_f32_i32_i64() -> f64, b1, f32, i32, i64 { +block0: + v0 = f64const 0x0.0 + v1 = bconst.b1 true + v2 = f32const 0x0.0 + v3 = iconst.i32 0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_b1_f32_i32_i64() { + fn0 = %foo() -> f64,b1,f32,i32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_f64_b1_f32_i64_i32() -> f64, b1, f32, i64, i32 { +block0: + v0 = f64const 0x0.0 + v1 = bconst.b1 true + v2 = f32const 0x0.0 + v3 = iconst.i64 0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_f64_b1_f32_i64_i32() { + fn0 = %foo() -> f64,b1,f32,i64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i32_i64_f32_f64() -> b1, i32, i64, f32, f64 { +block0: + v0 = bconst.b1 true + v1 = iconst.i32 0 + v2 = iconst.i64 0 + v3 = f32const 0x0.0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i32_i64_f32_f64() { + fn0 = %foo() -> b1,i32,i64,f32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i32_i64_f64_f32() -> b1, i32, i64, f64, f32 { +block0: + v0 = bconst.b1 true + v1 = iconst.i32 0 + v2 = iconst.i64 0 + v3 = f64const 0x0.0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i32_i64_f64_f32() { + fn0 = %foo() -> b1,i32,i64,f64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i32_f32_i64_f64() -> b1, i32, f32, i64, f64 { +block0: + v0 = bconst.b1 true + v1 = iconst.i32 0 + v2 = f32const 0x0.0 + v3 = iconst.i64 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i32_f32_i64_f64() { + fn0 = %foo() -> b1,i32,f32,i64,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i32_f32_f64_i64() -> b1, i32, f32, f64, i64 { +block0: + v0 = bconst.b1 true + v1 = iconst.i32 0 + v2 = f32const 0x0.0 + v3 = f64const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i32_f32_f64_i64() { + fn0 = %foo() -> b1,i32,f32,f64,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i32_f64_i64_f32() -> b1, i32, f64, i64, f32 { +block0: + v0 = bconst.b1 true + v1 = iconst.i32 0 + v2 = f64const 0x0.0 + v3 = iconst.i64 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i32_f64_i64_f32() { + fn0 = %foo() -> b1,i32,f64,i64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i32_f64_f32_i64() -> b1, i32, f64, f32, i64 { +block0: + v0 = bconst.b1 true + v1 = iconst.i32 0 + v2 = f64const 0x0.0 + v3 = f32const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i32_f64_f32_i64() { + fn0 = %foo() -> b1,i32,f64,f32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i64_i32_f32_f64() -> b1, i64, i32, f32, f64 { +block0: + v0 = bconst.b1 true + v1 = iconst.i64 0 + v2 = iconst.i32 0 + v3 = f32const 0x0.0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i64_i32_f32_f64() { + fn0 = %foo() -> b1,i64,i32,f32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i64_i32_f64_f32() -> b1, i64, i32, f64, f32 { +block0: + v0 = bconst.b1 true + v1 = iconst.i64 0 + v2 = iconst.i32 0 + v3 = f64const 0x0.0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i64_i32_f64_f32() { + fn0 = %foo() -> b1,i64,i32,f64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i64_f32_i32_f64() -> b1, i64, f32, i32, f64 { +block0: + v0 = bconst.b1 true + v1 = iconst.i64 0 + v2 = f32const 0x0.0 + v3 = iconst.i32 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i64_f32_i32_f64() { + fn0 = %foo() -> b1,i64,f32,i32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i64_f32_f64_i32() -> b1, i64, f32, f64, i32 { +block0: + v0 = bconst.b1 true + v1 = iconst.i64 0 + v2 = f32const 0x0.0 + v3 = f64const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i64_f32_f64_i32() { + fn0 = %foo() -> b1,i64,f32,f64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i64_f64_i32_f32() -> b1, i64, f64, i32, f32 { +block0: + v0 = bconst.b1 true + v1 = iconst.i64 0 + v2 = f64const 0x0.0 + v3 = iconst.i32 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i64_f64_i32_f32() { + fn0 = %foo() -> b1,i64,f64,i32,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_i64_f64_f32_i32() -> b1, i64, f64, f32, i32 { +block0: + v0 = bconst.b1 true + v1 = iconst.i64 0 + v2 = f64const 0x0.0 + v3 = f32const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_i64_f64_f32_i32() { + fn0 = %foo() -> b1,i64,f64,f32,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f32_i32_i64_f64() -> b1, f32, i32, i64, f64 { +block0: + v0 = bconst.b1 true + v1 = f32const 0x0.0 + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f32_i32_i64_f64() { + fn0 = %foo() -> b1,f32,i32,i64,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f32_i32_f64_i64() -> b1, f32, i32, f64, i64 { +block0: + v0 = bconst.b1 true + v1 = f32const 0x0.0 + v2 = iconst.i32 0 + v3 = f64const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f32_i32_f64_i64() { + fn0 = %foo() -> b1,f32,i32,f64,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f32_i64_i32_f64() -> b1, f32, i64, i32, f64 { +block0: + v0 = bconst.b1 true + v1 = f32const 0x0.0 + v2 = iconst.i64 0 + v3 = iconst.i32 0 + v4 = f64const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f32_i64_i32_f64() { + fn0 = %foo() -> b1,f32,i64,i32,f64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f32_i64_f64_i32() -> b1, f32, i64, f64, i32 { +block0: + v0 = bconst.b1 true + v1 = f32const 0x0.0 + v2 = iconst.i64 0 + v3 = f64const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f32_i64_f64_i32() { + fn0 = %foo() -> b1,f32,i64,f64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f32_f64_i32_i64() -> b1, f32, f64, i32, i64 { +block0: + v0 = bconst.b1 true + v1 = f32const 0x0.0 + v2 = f64const 0x0.0 + v3 = iconst.i32 0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f32_f64_i32_i64() { + fn0 = %foo() -> b1,f32,f64,i32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f32_f64_i64_i32() -> b1, f32, f64, i64, i32 { +block0: + v0 = bconst.b1 true + v1 = f32const 0x0.0 + v2 = f64const 0x0.0 + v3 = iconst.i64 0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f32_f64_i64_i32() { + fn0 = %foo() -> b1,f32,f64,i64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f64_i32_i64_f32() -> b1, f64, i32, i64, f32 { +block0: + v0 = bconst.b1 true + v1 = f64const 0x0.0 + v2 = iconst.i32 0 + v3 = iconst.i64 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f64_i32_i64_f32() { + fn0 = %foo() -> b1,f64,i32,i64,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f64_i32_f32_i64() -> b1, f64, i32, f32, i64 { +block0: + v0 = bconst.b1 true + v1 = f64const 0x0.0 + v2 = iconst.i32 0 + v3 = f32const 0x0.0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f64_i32_f32_i64() { + fn0 = %foo() -> b1,f64,i32,f32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f64_i64_i32_f32() -> b1, f64, i64, i32, f32 { +block0: + v0 = bconst.b1 true + v1 = f64const 0x0.0 + v2 = iconst.i64 0 + v3 = iconst.i32 0 + v4 = f32const 0x0.0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f64_i64_i32_f32() { + fn0 = %foo() -> b1,f64,i64,i32,f32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f64_i64_f32_i32() -> b1, f64, i64, f32, i32 { +block0: + v0 = bconst.b1 true + v1 = f64const 0x0.0 + v2 = iconst.i64 0 + v3 = f32const 0x0.0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f64_i64_f32_i32() { + fn0 = %foo() -> b1,f64,i64,f32,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f64_f32_i32_i64() -> b1, f64, f32, i32, i64 { +block0: + v0 = bconst.b1 true + v1 = f64const 0x0.0 + v2 = f32const 0x0.0 + v3 = iconst.i32 0 + v4 = iconst.i64 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f64_f32_i32_i64() { + fn0 = %foo() -> b1,f64,f32,i32,i64 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} + +function %return_b1_f64_f32_i64_i32() -> b1, f64, f32, i64, i32 { +block0: + v0 = bconst.b1 true + v1 = f64const 0x0.0 + v2 = f32const 0x0.0 + v3 = iconst.i64 0 + v4 = iconst.i32 0 + return v0, v1, v2, v3, v4 +} + +function %call_b1_f64_f32_i64_i32() { + fn0 = %foo() -> b1,f64,f32,i64,i32 +block0: + v0,v1,v2,v3,v4 = call fn0() + return +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif new file mode 100644 index 0000000000..d712bf21ce --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif @@ -0,0 +1,61 @@ +test legalizer +target x86_64 haswell + +;; Test that we don't reuse `sret` stack slots for multiple calls. We could do +;; this one day, but it would require some care to ensure that we don't have +;; subsequent calls overwrite the results of previous calls. + +function %foo() -> i32, f32 { + ; check: ss0 = sret_slot 20 + ; nextln: ss1 = sret_slot 20 + + fn0 = %f() -> i32, i32, i32, i32, i32 + fn1 = %g() -> f32, f32, f32, f32, f32 + ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast + ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast + ; nextln: fn0 = %f sig0 + ; nextln: fn1 = %g sig1 + +block0: + v0, v1, v2, v3, v4 = call fn0() + ; check: v18 = stack_addr.i64 ss0 + ; nextln: v25 = func_addr.i64 fn0 + ; nextln: v19 = call_indirect sig0, v25(v18) + ; nextln: v20 = load.i32 notrap aligned v19 + ; nextln: v0 -> v20 + ; nextln: v21 = load.i32 notrap aligned v19+4 + ; nextln: v1 -> v21 + ; nextln: v22 = load.i32 notrap aligned v19+8 + ; nextln: v2 -> v22 + ; nextln: v23 = load.i32 notrap aligned v19+12 + ; nextln: v3 -> v23 + ; nextln: v24 = load.i32 notrap aligned v19+16 + ; nextln: v4 -> v24 + + v5, v6, v7, v8, v9 = call fn1() + ; check: v26 = stack_addr.i64 ss1 + ; nextln: v33 = func_addr.i64 fn1 + ; nextln: v27 = call_indirect sig1, v33(v26) + ; nextln: v28 = load.f32 notrap aligned v27 + ; nextln: v5 -> v28 + ; nextln: v29 = load.f32 notrap aligned v27+4 + ; nextln: v6 -> v29 + ; nextln: v30 = load.f32 notrap aligned v27+8 + ; nextln: v7 -> v30 + ; nextln: v31 = load.f32 notrap aligned v27+12 + ; nextln: v8 -> v31 + ; nextln: v32 = load.f32 notrap aligned v27+16 + ; nextln: v9 -> v32 + + v10 = iadd v0, v1 + v11 = iadd v2, v3 + v12 = iadd v10, v11 + v13 = iadd v12, v4 + + v14 = fadd v5, v6 + v15 = fadd v7, v8 + v16 = fadd v14, v15 + v17 = fadd v16, v9 + + return v13, v17 +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif new file mode 100644 index 0000000000..5004ebbe54 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif @@ -0,0 +1,51 @@ +test legalizer +target x86_64 haswell + +;; Need to insert padding after the `i8`s so that the `i32` and `i64` are +;; aligned. + +function %returner() -> i8, i32, i8, i64 { +; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast { + +block0: +; check: block0(v4: i64): + + v0 = iconst.i8 0 + v1 = iconst.i32 1 + v2 = iconst.i8 2 + v3 = iconst.i64 3 + return v0, v1, v2, v3 + ; check: v6 = uextend.i32 v0 + ; nextln: istore8 notrap aligned v6, v4 + ; nextln: store notrap aligned v1, v4+4 + ; nextln: v7 = uextend.i32 v2 + ; nextln: istore8 notrap aligned v7, v4+8 + ; nextln: store notrap aligned v3, v4+16 + ; nextln: return v4 +} + +function %caller() { + ; check: ss0 = sret_slot 24 + + fn0 = %returner() -> i8, i32, i8, i64 + ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast + ; nextln: fn0 = %returner sig0 + +block0: + v0, v1, v2, v3 = call fn0() + ; check: v4 = stack_addr.i64 ss0 + ; nextln: v10 = func_addr.i64 fn0 + ; nextln: v5 = call_indirect sig0, v10(v4) + ; nextln: v11 = uload8.i32 notrap aligned v5 + ; nextln: v6 = ireduce.i8 v11 + ; nextln: v0 -> v6 + ; nextln: v7 = load.i32 notrap aligned v5+4 + ; nextln: v1 -> v7 + ; nextln: v12 = uload8.i32 notrap aligned v5+8 + ; nextln: v8 = ireduce.i8 v12 + ; nextln: v2 -> v8 + ; nextln: v9 = load.i64 notrap aligned v5+16 + ; nextln: v3 -> v9 + + return +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif b/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif new file mode 100644 index 0000000000..17f2f306d4 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif @@ -0,0 +1,18 @@ +test compile +target x86_64 haswell + +function %returner(i32, i64, f32, f64) -> i32, i64, f32, f64 { +block0(v0: i32, v1: i64, v2: f32, v3: f64): + return v0, v1, v2, v3 +} + +function %caller() { + fn0 = %returner(i32, i64, f32, f64) -> i32, i64, f32, f64 +block0: + v0 = iconst.i32 0 + v1 = iconst.i64 1 + v2 = f32const 0x2.0 + v3 = f64const 0x3.0 + v4, v5, v6, v7 = call fn0(v0, v1, v2, v3) + return +} diff --git a/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif new file mode 100644 index 0000000000..f394bdd904 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/multi-val-tons-of-results.clif @@ -0,0 +1,34 @@ +test compile +target x86_64 haswell + +function %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 { +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + v4 = iconst.i32 4 + v5 = iconst.i32 5 + v6 = iconst.i32 6 + v7 = iconst.i32 7 + v8 = iconst.i32 8 + v9 = iconst.i32 9 + v10 = iconst.i32 10 + v11 = iconst.i32 11 + v12 = iconst.i32 12 + v13 = iconst.i32 13 + v14 = iconst.i32 14 + v15 = iconst.i32 15 + v16 = iconst.i32 16 + v17 = iconst.i32 17 + v18 = iconst.i32 18 + v19 = iconst.i32 19 + return v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19 +} + +function %call_20_i32s() { + fn0 = %return_20_i32s() -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 +block0: + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19 = call fn0() + return +} diff --git a/cranelift/filetests/filetests/wasm/r32.clif b/cranelift/filetests/filetests/wasm/r32.clif new file mode 100644 index 0000000000..7e1622246a --- /dev/null +++ b/cranelift/filetests/filetests/wasm/r32.clif @@ -0,0 +1,75 @@ +; Test basic code generation for 32-bit reftypes +; This test is the 32-bit version of r64.clif. If you change this test you +; should most likely update that test as well. +test compile +set enable_safepoints=true + +target i686 haswell + +function %select_ref(i32, r32, r32) -> r32 { +block0(v0: i32, v1: r32, v2: r32): + brz v0, block1(v2) + jump block1(v1) + +block1(v3: r32): + return v3 +} + +function %table_set(i32, r32, i32 vmctx) { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + gv2 = load.i32 notrap aligned gv0 +4 + table0 = dynamic gv1, element_size 1, bound gv2, index_type i32 + +block0(v0: i32, v1: r32, v2: i32): + v3 = table_addr.i32 table0, v0, +0; + store.r32 notrap aligned v1, v3 + return +} + +function %table_get(i32, i32 vmctx) -> r32 { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + gv2 = load.i32 notrap aligned gv0 +4 + table0 = dynamic gv1, element_size 1, bound gv2, index_type i32 + +block0(v0: i32, v1: i32): + v2 = table_addr.i32 table0, v0, +0; + v3 = load.r32 notrap aligned v2 + return v3 +} + +function %test_refs(r32, r32, r32, i32 vmctx) { + fn0 = %select_ref(i32, r32, r32) -> r32 + fn1 = %table_set(i32, r32, i32 vmctx) + fn2 = %table_get(i32, i32 vmctx) -> r32 + +block0(v0: r32, v1: r32, v2: r32, v3: i32): + v4 = iconst.i32 0 + v5 = iconst.i32 1 + v8 = iconst.i32 2 + + ; Shuffle around the first two refs + v6 = call fn0(v4, v0, v1) + v7 = call fn0(v5, v0, v1) + + ; Store in the table + call fn1(v4, v6, v3) + call fn1(v5, v7, v3) + call fn1(v8, v2, v3) + + ; Load from the table + v9 = call fn2(v4, v3) + v10 = call fn2(v5, v3) + v11 = call fn2(v8, v3) + + ; Compare the results + v12 = is_null v9 + trapnz v12, user0 + v13 = is_null v10 + trapnz v13, user0 + v14 = is_invalid v11 + trapnz v14, user0 + + return +} diff --git a/cranelift/filetests/filetests/wasm/r64.clif b/cranelift/filetests/filetests/wasm/r64.clif new file mode 100644 index 0000000000..9fab27fbb5 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/r64.clif @@ -0,0 +1,75 @@ +; Test basic code generation for 64-bit reftypes +; This test is the 64-bit version of r32.clif. If you change this test you +; should most likely update that test as well. +test compile +set enable_safepoints=true + +target x86_64 haswell + +function %select_ref(i32, r64, r64) -> r64 { +block0(v0: i32, v1: r64, v2: r64): + brz v0, block1(v2) + jump block1(v1) + +block1(v3: r64): + return v3 +} + +function %table_set(i32, r64, i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i32 notrap aligned gv0 +8 + table0 = dynamic gv1, element_size 1, bound gv2, index_type i32 + +block0(v0: i32, v1: r64, v2: i64): + v3 = table_addr.i64 table0, v0, +0; + store.r64 notrap aligned v1, v3 + return +} + +function %table_get(i32, i64 vmctx) -> r64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i32 notrap aligned gv0 +8 + table0 = dynamic gv1, element_size 1, bound gv2, index_type i32 + +block0(v0: i32, v1: i64): + v2 = table_addr.i64 table0, v0, +0; + v3 = load.r64 notrap aligned v2 + return v3 +} + +function %test_refs(r64, r64, r64, i64 vmctx) { + fn0 = %select_ref(i32, r64, r64) -> r64 + fn1 = %table_set(i32, r64, i64 vmctx) + fn2 = %table_get(i32, i64 vmctx) -> r64 + +block0(v0: r64, v1: r64, v2: r64, v3: i64): + v4 = iconst.i32 0 + v5 = iconst.i32 1 + v8 = iconst.i32 2 + + ; Shuffle around the first two refs + v6 = call fn0(v4, v0, v1) + v7 = call fn0(v5, v0, v1) + + ; Store in the table + call fn1(v4, v6, v3) + call fn1(v5, v7, v3) + call fn1(v8, v2, v3) + + ; Load from the table + v9 = call fn2(v4, v3) + v10 = call fn2(v5, v3) + v11 = call fn2(v8, v3) + + ; Compare the results + v12 = is_null v9 + trapnz v12, user0 + v13 = is_null v10 + trapnz v13, user0 + v14 = is_invalid v11 + trapnz v14, user0 + + return +} diff --git a/cranelift/filetests/filetests/wasm/select.clif b/cranelift/filetests/filetests/wasm/select.clif new file mode 100644 index 0000000000..b2508ef6e5 --- /dev/null +++ b/cranelift/filetests/filetests/wasm/select.clif @@ -0,0 +1,30 @@ +; Test basic code generation for the select WebAssembly instruction. +test compile + +target i686 haswell + +target x86_64 haswell + +function %select_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = select v2, v0, v1 + return v3 +} + +function %select_i64(i64, i64, i32) -> i64 { +block0(v0: i64, v1: i64, v2: i32): + v3 = select v2, v0, v1 + return v3 +} + +function %select_f32(f32, f32, i32) -> f32 { +block0(v0: f32, v1: f32, v2: i32): + v3 = select v2, v0, v1 + return v3 +} + +function %select_f64(f64, f64, i32) -> f64 { +block0(v0: f64, v1: f64, v2: i32): + v3 = select v2, v0, v1 + return v3 +} diff --git a/cranelift/filetests/src/concurrent.rs b/cranelift/filetests/src/concurrent.rs new file mode 100644 index 0000000000..30e2c94cfe --- /dev/null +++ b/cranelift/filetests/src/concurrent.rs @@ -0,0 +1,161 @@ +//! Run tests concurrently. +//! +//! This module provides the `ConcurrentRunner` struct which uses a pool of threads to run tests +//! concurrently. + +use crate::runone; +use crate::TestResult; +use cranelift_codegen::dbg::LOG_FILENAME_PREFIX; +use cranelift_codegen::timing; +use file_per_thread_logger; +use log::error; +use num_cpus; +use std::panic::catch_unwind; +use std::path::{Path, PathBuf}; +use std::sync::mpsc::{channel, Receiver, Sender}; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Duration; + +/// Request sent to worker threads contains jobid and path. +struct Request(usize, PathBuf); + +/// Reply from worker thread, +pub enum Reply { + Starting { jobid: usize, thread_num: usize }, + Done { jobid: usize, result: TestResult }, + Tick, +} + +/// Manage threads that run test jobs concurrently. +pub struct ConcurrentRunner { + /// Channel for sending requests to the worker threads. + /// The workers are sharing the receiver with an `Arc>`. + /// This is `None` when shutting down. + request_tx: Option>, + + /// Channel for receiving replies from the workers. + /// Workers have their own `Sender`. + reply_rx: Receiver, + + handles: Vec>, +} + +impl ConcurrentRunner { + /// Create a new `ConcurrentRunner` with threads spun up. + pub fn new() -> Self { + let (request_tx, request_rx) = channel(); + let request_mutex = Arc::new(Mutex::new(request_rx)); + let (reply_tx, reply_rx) = channel(); + + heartbeat_thread(reply_tx.clone()); + + let handles = (0..num_cpus::get()) + .map(|num| worker_thread(num, request_mutex.clone(), reply_tx.clone())) + .collect(); + + Self { + request_tx: Some(request_tx), + reply_rx, + handles, + } + } + + /// Shut down worker threads orderly. They will finish any queued jobs first. + pub fn shutdown(&mut self) { + self.request_tx = None; + } + + /// Join all the worker threads. + /// Transfer pass timings from the worker threads to the current thread. + pub fn join(&mut self) { + assert!(self.request_tx.is_none(), "must shutdown before join"); + for h in self.handles.drain(..) { + match h.join() { + Ok(t) => timing::add_to_current(&t), + Err(e) => println!("worker panicked: {:?}", e), + } + } + } + + /// Add a new job to the queues. + pub fn put(&mut self, jobid: usize, path: &Path) { + self.request_tx + .as_ref() + .expect("cannot push after shutdown") + .send(Request(jobid, path.to_owned())) + .expect("all the worker threads are gone"); + } + + /// Get a job reply without blocking. + pub fn try_get(&mut self) -> Option { + self.reply_rx.try_recv().ok() + } + + /// Get a job reply, blocking until one is available. + pub fn get(&mut self) -> Option { + self.reply_rx.recv().ok() + } +} + +/// Spawn a heartbeat thread which sends ticks down the reply channel every second. +/// This lets us implement timeouts without the not yet stable `recv_timeout`. +fn heartbeat_thread(replies: Sender) -> thread::JoinHandle<()> { + thread::Builder::new() + .name("heartbeat".to_string()) + .spawn(move || { + file_per_thread_logger::initialize(LOG_FILENAME_PREFIX); + while replies.send(Reply::Tick).is_ok() { + thread::sleep(Duration::from_secs(1)); + } + }) + .unwrap() +} + +/// Spawn a worker thread running tests. +fn worker_thread( + thread_num: usize, + requests: Arc>>, + replies: Sender, +) -> thread::JoinHandle { + thread::Builder::new() + .name(format!("worker #{}", thread_num)) + .spawn(move || { + file_per_thread_logger::initialize(LOG_FILENAME_PREFIX); + loop { + // Lock the mutex only long enough to extract a request. + let Request(jobid, path) = match requests.lock().unwrap().recv() { + Err(..) => break, // TX end shut down. exit thread. + Ok(req) => req, + }; + + // Tell them we're starting this job. + // The receiver should always be present for this as long as we have jobs. + replies.send(Reply::Starting { jobid, thread_num }).unwrap(); + + let result = catch_unwind(|| runone::run(path.as_path(), None, None)) + .unwrap_or_else(|e| { + // The test panicked, leaving us a `Box`. + // Panics are usually strings. + if let Some(msg) = e.downcast_ref::() { + Err(format!("panicked in worker #{}: {}", thread_num, msg)) + } else if let Some(msg) = e.downcast_ref::<&'static str>() { + Err(format!("panicked in worker #{}: {}", thread_num, msg)) + } else { + Err(format!("panicked in worker #{}", thread_num)) + } + }); + + if let Err(ref msg) = result { + error!("FAIL: {}", msg); + } + + replies.send(Reply::Done { jobid, result }).unwrap(); + } + + // Timing is accumulated independently per thread. + // Timings from this worker thread will be aggregated by `ConcurrentRunner::join()`. + timing::take_current() + }) + .unwrap() +} diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs new file mode 100644 index 0000000000..302acf0cb4 --- /dev/null +++ b/cranelift/filetests/src/function_runner.rs @@ -0,0 +1,117 @@ +use core::mem; +use cranelift_codegen::binemit::{NullRelocSink, NullStackmapSink, NullTrapSink}; +use cranelift_codegen::ir::Function; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::{settings, Context}; +use cranelift_native::builder as host_isa_builder; +use memmap::MmapMut; + +/// Run a function on a host +pub struct FunctionRunner { + function: Function, + isa: Box, +} + +impl FunctionRunner { + /// Build a function runner from a function and the ISA to run on (must be the host machine's ISA) + pub fn new(function: Function, isa: Box) -> Self { + Self { function, isa } + } + + /// Build a function runner using the host machine's ISA and the passed flags + pub fn with_host_isa(function: Function, flags: settings::Flags) -> Self { + let builder = host_isa_builder().expect("Unable to build a TargetIsa for the current host"); + let isa = builder.finish(flags); + Self::new(function, isa) + } + + /// Build a function runner using the host machine's ISA and the default flags for this ISA + pub fn with_default_host_isa(function: Function) -> Self { + let flags = settings::Flags::new(settings::builder()); + Self::with_host_isa(function, flags) + } + + /// Compile and execute a single function, expecting a boolean to be returned; a 'true' value is + /// interpreted as a successful test execution and mapped to Ok whereas a 'false' value is + /// interpreted as a failed test and mapped to Err. + pub fn run(&self) -> Result<(), String> { + let func = self.function.clone(); + if !(func.signature.params.is_empty() + && func.signature.returns.len() == 1 + && func.signature.returns.first().unwrap().value_type.is_bool()) + { + return Err(String::from( + "Functions must have a signature like: () -> boolean", + )); + } + + if func.signature.call_conv != self.isa.default_call_conv() { + return Err(String::from( + "Functions only run on the host's default calling convention; remove the specified calling convention in the function signature to use the host's default.", + )); + } + + // set up the context + let mut context = Context::new(); + context.func = func; + + // compile and encode the result to machine code + let relocs = &mut NullRelocSink {}; + let traps = &mut NullTrapSink {}; + let stackmaps = &mut NullStackmapSink {}; + let code_info = context + .compile(self.isa.as_ref()) + .map_err(|e| e.to_string())?; + let mut code_page = + MmapMut::map_anon(code_info.total_size as usize).map_err(|e| e.to_string())?; + + unsafe { + context.emit_to_memory( + self.isa.as_ref(), + code_page.as_mut_ptr(), + relocs, + traps, + stackmaps, + ); + }; + + let code_page = code_page.make_exec().map_err(|e| e.to_string())?; + let callable_fn: fn() -> bool = unsafe { mem::transmute(code_page.as_ptr()) }; + + // execute + if callable_fn() { + Ok(()) + } else { + Err(format!("Failed: {}", context.func.name.to_string())) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use cranelift_reader::{parse_test, ParseOptions}; + + #[test] + fn nop() { + let code = String::from( + " + test run + function %test() -> b8 { + block0: + nop + v1 = bconst.b8 true + return v1 + }", + ); + + // extract function + let test_file = parse_test(code.as_str(), ParseOptions::default()).unwrap(); + assert_eq!(1, test_file.functions.len()); + let function = test_file.functions[0].0.clone(); + + // execute function + let runner = FunctionRunner::with_default_host_isa(function); + runner.run().unwrap() // will panic if execution fails + } +} diff --git a/cranelift/filetests/src/lib.rs b/cranelift/filetests/src/lib.rs new file mode 100644 index 0000000000..0d3b12e458 --- /dev/null +++ b/cranelift/filetests/src/lib.rs @@ -0,0 +1,144 @@ +//! File tests. +//! +//! This crate contains the main test driver as well as implementations of the +//! available filetest commands. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::type_complexity))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +pub use crate::function_runner::FunctionRunner; +use crate::runner::TestRunner; +use cranelift_codegen::timing; +use cranelift_reader::TestCommand; +use std::path::Path; +use std::time; + +mod concurrent; +mod function_runner; +mod match_directive; +mod runner; +mod runone; +mod subtest; + +mod test_binemit; +mod test_cat; +mod test_compile; +mod test_dce; +mod test_domtree; +mod test_fde; +mod test_legalizer; +mod test_licm; +mod test_postopt; +mod test_preopt; +mod test_print_cfg; +mod test_regalloc; +mod test_rodata; +mod test_run; +mod test_safepoint; +mod test_shrink; +mod test_simple_gvn; +mod test_simple_preopt; +mod test_unwind; +mod test_verifier; + +/// The result of running the test in a file. +type TestResult = Result; + +/// Main entry point for `clif-util test`. +/// +/// Take a list of filenames which can be either `.clif` files or directories. +/// +/// Files are interpreted as test cases and executed immediately. +/// +/// Directories are scanned recursively for test cases ending in `.clif`. These test cases are +/// executed on background threads. +/// +pub fn run(verbose: bool, report_times: bool, files: &[String]) -> TestResult { + let mut runner = TestRunner::new(verbose, report_times); + + for path in files.iter().map(Path::new) { + if path.is_file() { + runner.push_test(path); + } else { + runner.push_dir(path); + } + } + + runner.start_threads(); + runner.run() +} + +/// Used for 'pass' subcommand. +/// Commands are interpreted as test and executed. +/// +/// Directories are scanned recursively for test cases ending in `.clif`. +/// +pub fn run_passes( + verbose: bool, + report_times: bool, + passes: &[String], + target: &str, + file: &str, +) -> TestResult { + let mut runner = TestRunner::new(verbose, /* report_times */ false); + + let path = Path::new(file); + if path == Path::new("-") || path.is_file() { + runner.push_test(path); + } else { + runner.push_dir(path); + } + + let result = runner.run_passes(passes, target); + if report_times { + println!("{}", timing::take_current()); + } + result +} + +/// Create a new subcommand trait object to match `parsed.command`. +/// +/// This function knows how to create all of the possible `test ` commands that can appear in +/// a `.clif` test file. +fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult> { + match parsed.command { + "binemit" => test_binemit::subtest(parsed), + "cat" => test_cat::subtest(parsed), + "compile" => test_compile::subtest(parsed), + "rodata" => test_rodata::subtest(parsed), + "dce" => test_dce::subtest(parsed), + "domtree" => test_domtree::subtest(parsed), + "legalizer" => test_legalizer::subtest(parsed), + "licm" => test_licm::subtest(parsed), + "postopt" => test_postopt::subtest(parsed), + "simple_preopt" => test_simple_preopt::subtest(parsed), + "print-cfg" => test_print_cfg::subtest(parsed), + "regalloc" => test_regalloc::subtest(parsed), + "run" => test_run::subtest(parsed), + "shrink" => test_shrink::subtest(parsed), + "simple-gvn" => test_simple_gvn::subtest(parsed), + "verifier" => test_verifier::subtest(parsed), + "preopt" => test_preopt::subtest(parsed), + "safepoint" => test_safepoint::subtest(parsed), + "unwind" => test_unwind::subtest(parsed), + "fde" => test_fde::subtest(parsed), + _ => Err(format!("unknown test command '{}'", parsed.command)), + } +} diff --git a/cranelift/filetests/src/match_directive.rs b/cranelift/filetests/src/match_directive.rs new file mode 100644 index 0000000000..bb379f25c9 --- /dev/null +++ b/cranelift/filetests/src/match_directive.rs @@ -0,0 +1,27 @@ +/// Look for a directive in a comment string. +/// The directive is of the form "foo:" and should follow the leading `;` in the comment: +/// +/// ; dominates: block3 block4 +/// +/// Return the comment text following the directive. +pub fn match_directive<'a>(comment: &'a str, directive: &str) -> Option<&'a str> { + assert!( + directive.ends_with(':'), + "Directive must include trailing colon" + ); + let text = comment.trim_start_matches(';').trim_start(); + if text.starts_with(directive) { + Some(text[directive.len()..].trim()) + } else { + None + } +} + +#[test] +fn test_match_directive() { + assert_eq!(match_directive("; foo: bar ", "foo:"), Some("bar")); + assert_eq!(match_directive(" foo:bar", "foo:"), Some("bar")); + assert_eq!(match_directive("foo:bar", "foo:"), Some("bar")); + assert_eq!(match_directive(";x foo: bar", "foo:"), None); + assert_eq!(match_directive(";;; foo: bar", "foo:"), Some("bar")); +} diff --git a/cranelift/filetests/src/runner.rs b/cranelift/filetests/src/runner.rs new file mode 100644 index 0000000000..d11ffab79e --- /dev/null +++ b/cranelift/filetests/src/runner.rs @@ -0,0 +1,388 @@ +//! Test runner. +//! +//! This module implements the `TestRunner` struct which manages executing tests as well as +//! scanning directories for tests. + +use crate::concurrent::{ConcurrentRunner, Reply}; +use crate::runone; +use crate::TestResult; +use cranelift_codegen::timing; +use std::error::Error; +use std::ffi::OsStr; +use std::fmt::{self, Display}; +use std::path::{Path, PathBuf}; +use std::time; + +/// Timeout in seconds when we're not making progress. +const TIMEOUT_PANIC: usize = 10; + +/// Timeout for reporting slow tests without panicking. +const TIMEOUT_SLOW: usize = 3; + +struct QueueEntry { + path: PathBuf, + state: State, +} + +#[derive(PartialEq, Eq, Debug)] +enum State { + New, + Queued, + Running, + Done(TestResult), +} + +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub enum IsPass { + Pass, + NotPass, +} + +impl QueueEntry { + pub fn path(&self) -> &Path { + self.path.as_path() + } +} + +impl Display for QueueEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let p = self.path.to_string_lossy(); + match self.state { + State::Done(Ok(dur)) => write!(f, "{}.{:03} {}", dur.as_secs(), dur.subsec_millis(), p), + State::Done(Err(ref e)) => write!(f, "FAIL {}: {}", p, e), + _ => write!(f, "{}", p), + } + } +} + +pub struct TestRunner { + verbose: bool, + + // Should we print the timings out? + report_times: bool, + + // Directories that have not yet been scanned. + dir_stack: Vec, + + // Filenames of tests to run. + tests: Vec, + + // Pointer into `tests` where the `New` entries begin. + new_tests: usize, + + // Number of contiguous reported tests at the front of `tests`. + reported_tests: usize, + + // Number of errors seen so far. + errors: usize, + + // Number of ticks received since we saw any progress. + ticks_since_progress: usize, + + threads: Option, +} + +impl TestRunner { + /// Create a new blank TestRunner. + pub fn new(verbose: bool, report_times: bool) -> Self { + Self { + verbose, + report_times, + dir_stack: Vec::new(), + tests: Vec::new(), + new_tests: 0, + reported_tests: 0, + errors: 0, + ticks_since_progress: 0, + threads: None, + } + } + + /// Add a directory path to be scanned later. + /// + /// If `dir` turns out to be a regular file, it is silently ignored. + /// Otherwise, any problems reading the directory are reported. + pub fn push_dir>(&mut self, dir: P) { + self.dir_stack.push(dir.into()); + } + + /// Add a test to be executed later. + /// + /// Any problems reading `file` as a test case file will be reported as a test failure. + pub fn push_test>(&mut self, file: P) { + self.tests.push(QueueEntry { + path: file.into(), + state: State::New, + }); + } + + /// Begin running tests concurrently. + pub fn start_threads(&mut self) { + assert!(self.threads.is_none()); + self.threads = Some(ConcurrentRunner::new()); + } + + /// Scan any directories pushed so far. + /// Push any potential test cases found. + pub fn scan_dirs(&mut self, pass_status: IsPass) { + // This recursive search tries to minimize statting in a directory hierarchy containing + // mostly test cases. + // + // - Directory entries with a "clif" extension are presumed to be test case files. + // - Directory entries with no extension are presumed to be subdirectories. + // - Anything else is ignored. + // + while let Some(dir) = self.dir_stack.pop() { + match dir.read_dir() { + Err(err) => { + // Fail silently if `dir` was actually a regular file. + // This lets us skip spurious extensionless files without statting everything + // needlessly. + if !dir.is_file() { + self.path_error(&dir, &err); + } + } + Ok(entries) => { + // Read all directory entries. Avoid statting. + for entry_result in entries { + match entry_result { + Err(err) => { + // Not sure why this would happen. `read_dir` succeeds, but there's + // a problem with an entry. I/O error during a getdirentries + // syscall seems to be the reason. The implementation in + // libstd/sys/unix/fs.rs seems to suggest that breaking now would + // be a good idea, or the iterator could keep returning the same + // error forever. + self.path_error(&dir, &err); + break; + } + Ok(entry) => { + let path = entry.path(); + // Recognize directories and tests by extension. + // Yes, this means we ignore directories with '.' in their name. + match path.extension().and_then(OsStr::to_str) { + Some("clif") => self.push_test(path), + Some(_) => {} + None => self.push_dir(path), + } + } + } + } + } + } + if pass_status == IsPass::Pass { + continue; + } else { + // Get the new jobs running before moving on to the next directory. + self.schedule_jobs(); + } + } + } + + /// Report an error related to a path. + fn path_error(&mut self, path: &PathBuf, err: &E) { + self.errors += 1; + println!("{}: {}", path.to_string_lossy(), err); + } + + /// Report on the next in-order job, if it's done. + fn report_job(&self) -> bool { + let jobid = self.reported_tests; + if let Some(&QueueEntry { + state: State::Done(ref result), + .. + }) = self.tests.get(jobid) + { + if self.verbose || result.is_err() { + println!("{}", self.tests[jobid]); + } + true + } else { + false + } + } + + /// Schedule any new jobs to run. + fn schedule_jobs(&mut self) { + for jobid in self.new_tests..self.tests.len() { + assert_eq!(self.tests[jobid].state, State::New); + if let Some(ref mut conc) = self.threads { + // Queue test for concurrent execution. + self.tests[jobid].state = State::Queued; + conc.put(jobid, self.tests[jobid].path()); + } else { + // Run test synchronously. + self.tests[jobid].state = State::Running; + let result = runone::run(self.tests[jobid].path(), None, None); + self.finish_job(jobid, result); + } + self.new_tests = jobid + 1; + } + + // Check for any asynchronous replies without blocking. + while let Some(reply) = self.threads.as_mut().and_then(ConcurrentRunner::try_get) { + self.handle_reply(reply); + } + } + + /// Schedule any new job to run for the pass command. + fn schedule_pass_job(&mut self, passes: &[String], target: &str) { + self.tests[0].state = State::Running; + let result: Result; + + let specified_target = match target { + "" => None, + targ => Some(targ), + }; + + result = runone::run(self.tests[0].path(), Some(passes), specified_target); + self.finish_job(0, result); + } + + /// Report the end of a job. + fn finish_job(&mut self, jobid: usize, result: TestResult) { + assert_eq!(self.tests[jobid].state, State::Running); + if result.is_err() { + self.errors += 1; + } + self.tests[jobid].state = State::Done(result); + + // Reports jobs in order. + while self.report_job() { + self.reported_tests += 1; + } + } + + /// Handle a reply from the async threads. + fn handle_reply(&mut self, reply: Reply) { + match reply { + Reply::Starting { jobid, .. } => { + assert_eq!(self.tests[jobid].state, State::Queued); + self.tests[jobid].state = State::Running; + } + Reply::Done { jobid, result } => { + self.ticks_since_progress = 0; + self.finish_job(jobid, result) + } + Reply::Tick => { + self.ticks_since_progress += 1; + if self.ticks_since_progress == TIMEOUT_SLOW { + println!( + "STALLED for {} seconds with {}/{} tests finished", + self.ticks_since_progress, + self.reported_tests, + self.tests.len() + ); + for jobid in self.reported_tests..self.tests.len() { + if self.tests[jobid].state == State::Running { + println!("slow: {}", self.tests[jobid]); + } + } + } + if self.ticks_since_progress >= TIMEOUT_PANIC { + panic!( + "worker threads stalled for {} seconds.", + self.ticks_since_progress + ); + } + } + } + } + + /// Drain the async jobs and shut down the threads. + fn drain_threads(&mut self) { + if let Some(mut conc) = self.threads.take() { + conc.shutdown(); + while self.reported_tests < self.tests.len() { + match conc.get() { + Some(reply) => self.handle_reply(reply), + None => break, + } + } + conc.join(); + if self.report_times { + println!("{}", timing::take_current()); + } + } + } + + /// Print out a report of slow tests. + fn report_slow_tests(&self) { + // Collect runtimes of succeeded tests. + let mut times = self + .tests + .iter() + .filter_map(|entry| match *entry { + QueueEntry { + state: State::Done(Ok(dur)), + .. + } => Some(dur), + _ => None, + }) + .collect::>(); + + // Get me some real data, kid. + let len = times.len(); + if len < 4 { + return; + } + + // Compute quartiles. + times.sort(); + let qlen = len / 4; + let q1 = times[qlen]; + let q3 = times[len - 1 - qlen]; + // Inter-quartile range. + let iqr = q3 - q1; + + // Cut-off for what we consider a 'slow' test: 3 IQR from the 75% quartile. + // + // Q3 + 1.5 IQR are the data points that would be plotted as outliers outside a box plot, + // but we have a wider distribution of test times, so double it to 3 IQR. + let cut = q3 + iqr * 3; + if cut > *times.last().unwrap() { + return; + } + + for t in self.tests.iter().filter(|entry| match **entry { + QueueEntry { + state: State::Done(Ok(dur)), + .. + } => dur > cut, + _ => false, + }) { + println!("slow: {}", t) + } + } + + /// Scan pushed directories for tests and run them. + pub fn run(&mut self) -> TestResult { + let started = time::Instant::now(); + self.scan_dirs(IsPass::NotPass); + self.schedule_jobs(); + self.report_slow_tests(); + self.drain_threads(); + + println!("{} tests", self.tests.len()); + match self.errors { + 0 => Ok(started.elapsed()), + 1 => Err("1 failure".to_string()), + n => Err(format!("{} failures", n)), + } + } + + /// Scan pushed directories for tests and run specified passes from commandline on them. + pub fn run_passes(&mut self, passes: &[String], target: &str) -> TestResult { + let started = time::Instant::now(); + self.scan_dirs(IsPass::Pass); + self.schedule_pass_job(passes, target); + self.report_slow_tests(); + + println!("{} tests", self.tests.len()); + match self.errors { + 0 => Ok(started.elapsed()), + 1 => Err("1 failure".to_string()), + n => Err(format!("{} failures", n)), + } + } +} diff --git a/cranelift/filetests/src/runone.rs b/cranelift/filetests/src/runone.rs new file mode 100644 index 0000000000..8d1c36dff8 --- /dev/null +++ b/cranelift/filetests/src/runone.rs @@ -0,0 +1,158 @@ +//! Run the tests in a single test file. + +use crate::subtest::{Context, SubTest, SubtestResult}; +use crate::{new_subtest, TestResult}; +use cranelift_codegen::ir::Function; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::print_errors::pretty_verifier_error; +use cranelift_codegen::settings::Flags; +use cranelift_codegen::timing; +use cranelift_codegen::verify_function; +use cranelift_reader::{parse_test, IsaSpec, ParseOptions}; +use log::info; +use std::borrow::Cow; +use std::fs; +use std::io::{self, Read}; +use std::path::Path; +use std::time; + +/// Read an entire file into a string. +fn read_to_string>(path: P) -> io::Result { + let mut file = fs::File::open(path)?; + let mut buffer = String::new(); + file.read_to_string(&mut buffer)?; + Ok(buffer) +} + +/// Load `path` and run the test in it. +/// +/// If running this test causes a panic, it will propagate as normal. +pub fn run(path: &Path, passes: Option<&[String]>, target: Option<&str>) -> TestResult { + let _tt = timing::process_file(); + info!("---\nFile: {}", path.to_string_lossy()); + let started = time::Instant::now(); + let buffer = read_to_string(path).map_err(|e| e.to_string())?; + let options = ParseOptions { + target, + passes, + ..ParseOptions::default() + }; + + let testfile = match parse_test(&buffer, options) { + Ok(testfile) => testfile, + Err(e) => { + if e.is_warning { + println!( + "skipping test {:?} (line {}): {}", + path, e.location.line_number, e.message + ); + return Ok(started.elapsed()); + } + return Err(e.to_string()); + } + }; + + if testfile.functions.is_empty() { + return Err("no functions found".to_string()); + } + + // Parse the test commands. + let mut tests = testfile + .commands + .iter() + .map(new_subtest) + .collect::>>()?; + + // Flags to use for those tests that don't need an ISA. + // This is the cumulative effect of all the `set` commands in the file. + let flags = match testfile.isa_spec { + IsaSpec::None(ref f) => f, + IsaSpec::Some(ref v) => v.last().expect("Empty ISA list").flags(), + }; + + // Sort the tests so the mutators are at the end, and those that don't need the verifier are at + // the front. + tests.sort_by_key(|st| (st.is_mutating(), st.needs_verifier())); + + // Expand the tests into (test, flags, isa) tuples. + let mut tuples = test_tuples(&tests, &testfile.isa_spec, flags)?; + + // Isolate the last test in the hope that this is the only mutating test. + // If so, we can completely avoid cloning functions. + let last_tuple = match tuples.pop() { + None => return Err("no test commands found".to_string()), + Some(t) => t, + }; + + for (func, details) in testfile.functions { + let mut context = Context { + preamble_comments: &testfile.preamble_comments, + details, + verified: false, + flags, + isa: None, + }; + + for tuple in &tuples { + run_one_test(*tuple, Cow::Borrowed(&func), &mut context)?; + } + // Run the last test with an owned function which means it won't need to clone it before + // mutating. + run_one_test(last_tuple, Cow::Owned(func), &mut context)?; + } + + Ok(started.elapsed()) +} + +// Given a slice of tests, generate a vector of (test, flags, isa) tuples. +fn test_tuples<'a>( + tests: &'a [Box], + isa_spec: &'a IsaSpec, + no_isa_flags: &'a Flags, +) -> SubtestResult)>> { + let mut out = Vec::new(); + for test in tests { + if test.needs_isa() { + match *isa_spec { + IsaSpec::None(_) => { + // TODO: Generate a list of default ISAs. + return Err(format!("test {} requires an ISA", test.name())); + } + IsaSpec::Some(ref isas) => { + for isa in isas { + out.push((&**test, isa.flags(), Some(&**isa))); + } + } + } + } else { + // This test doesn't require an ISA, and we only want to run one instance of it. + // Still, give it an ISA ref if we happen to have a unique one. + // For example, `test cat` can use this to print encodings and register names. + out.push((&**test, no_isa_flags, isa_spec.unique_isa())); + } + } + Ok(out) +} + +fn run_one_test<'a>( + tuple: (&'a dyn SubTest, &'a Flags, Option<&'a dyn TargetIsa>), + func: Cow, + context: &mut Context<'a>, +) -> SubtestResult<()> { + let (test, flags, isa) = tuple; + let name = format!("{}({})", test.name(), func.name); + info!("Test: {} {}", name, isa.map_or("-", TargetIsa::name)); + + context.flags = flags; + context.isa = isa; + + // Should we run the verifier before this test? + if !context.verified && test.needs_verifier() { + verify_function(&func, context.flags_or_isa()) + .map_err(|errors| pretty_verifier_error(&func, isa, None, errors))?; + context.verified = true; + } + + test.run(func, context) + .map_err(|e| format!("{}:\n{}", name, e)) +} diff --git a/cranelift/filetests/src/subtest.rs b/cranelift/filetests/src/subtest.rs new file mode 100644 index 0000000000..0264169e4c --- /dev/null +++ b/cranelift/filetests/src/subtest.rs @@ -0,0 +1,101 @@ +//! `SubTest` trait. + +use cranelift_codegen::ir::Function; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::settings::{Flags, FlagsOrIsa}; +use cranelift_reader::{Comment, Details}; +use filecheck::{Checker, CheckerBuilder, NO_VARIABLES}; +use std::borrow::Cow; + +pub type SubtestResult = Result; + +/// Context for running a test on a single function. +pub struct Context<'a> { + /// Comments from the preamble f the test file. These apply to all functions. + pub preamble_comments: &'a [Comment<'a>], + + /// Additional details about the function from the parser. + pub details: Details<'a>, + + /// Was the function verified before running this test? + pub verified: bool, + + /// ISA-independent flags for this test. + pub flags: &'a Flags, + + /// Target ISA to test against. Only guaranteed to be present for sub-tests whose `needs_isa` + /// method returned `true`. For other sub-tests, this is set if the test file has a unique ISA. + pub isa: Option<&'a dyn TargetIsa>, +} + +impl<'a> Context<'a> { + /// Get a `FlagsOrIsa` object for passing to the verifier. + pub fn flags_or_isa(&self) -> FlagsOrIsa<'a> { + FlagsOrIsa { + flags: self.flags, + isa: self.isa, + } + } +} + +/// Common interface for implementations of test commands. +/// +/// Each `.clif` test file may contain multiple test commands, each represented by a `SubTest` +/// trait object. +pub trait SubTest { + /// Name identifying this subtest. Typically the same as the test command. + fn name(&self) -> &'static str; + + /// Should the verifier be run on the function before running the test? + fn needs_verifier(&self) -> bool { + true + } + + /// Does this test mutate the function when it runs? + /// This is used as a hint to avoid cloning the function needlessly. + fn is_mutating(&self) -> bool { + false + } + + /// Does this test need a `TargetIsa` trait object? + fn needs_isa(&self) -> bool { + false + } + + /// Run this test on `func`. + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()>; +} + +/// Run filecheck on `text`, using directives extracted from `context`. +pub fn run_filecheck(text: &str, context: &Context) -> SubtestResult<()> { + let checker = build_filechecker(context)?; + if checker + .check(text, NO_VARIABLES) + .map_err(|e| format!("filecheck: {}", e))? + { + Ok(()) + } else { + // Filecheck mismatch. Emit an explanation as output. + let (_, explain) = checker + .explain(text, NO_VARIABLES) + .map_err(|e| format!("explain: {}", e))?; + Err(format!("filecheck failed:\n{}{}", checker, explain)) + } +} + +/// Build a filechecker using the directives in the file preamble and the function's comments. +pub fn build_filechecker(context: &Context) -> SubtestResult { + let mut builder = CheckerBuilder::new(); + // Preamble comments apply to all functions. + for comment in context.preamble_comments { + builder + .directive(comment.text) + .map_err(|e| format!("filecheck: {}", e))?; + } + for comment in &context.details.comments { + builder + .directive(comment.text) + .map_err(|e| format!("filecheck: {}", e))?; + } + Ok(builder.finish()) +} diff --git a/cranelift/filetests/src/test_binemit.rs b/cranelift/filetests/src/test_binemit.rs new file mode 100644 index 0000000000..d2cfdd97ca --- /dev/null +++ b/cranelift/filetests/src/test_binemit.rs @@ -0,0 +1,346 @@ +//! Test command for testing the binary machine code emission. +//! +//! The `binemit` test command generates binary machine code for every instruction in the input +//! functions and compares the results to the expected output. + +use crate::match_directive::match_directive; +use crate::subtest::{Context, SubTest, SubtestResult}; +use cranelift_codegen::binemit::{self, CodeInfo, CodeSink, RegDiversions}; +use cranelift_codegen::dbg::DisplayList; +use cranelift_codegen::dominator_tree::DominatorTree; +use cranelift_codegen::flowgraph::ControlFlowGraph; +use cranelift_codegen::ir; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::isa; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_codegen::settings::OptLevel; +use cranelift_reader::TestCommand; +use std::borrow::Cow; +use std::collections::HashMap; +use std::fmt::Write; + +struct TestBinEmit; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "binemit"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestBinEmit)) + } +} + +/// Code sink that generates text. +struct TextSink { + code_size: binemit::CodeOffset, + offset: binemit::CodeOffset, + text: String, +} + +impl TextSink { + /// Create a new empty TextSink. + pub fn new() -> Self { + Self { + code_size: 0, + offset: 0, + text: String::new(), + } + } +} + +impl binemit::CodeSink for TextSink { + fn offset(&self) -> binemit::CodeOffset { + self.offset + } + + fn put1(&mut self, x: u8) { + write!(self.text, "{:02x} ", x).unwrap(); + self.offset += 1; + } + + fn put2(&mut self, x: u16) { + write!(self.text, "{:04x} ", x).unwrap(); + self.offset += 2; + } + + fn put4(&mut self, x: u32) { + write!(self.text, "{:08x} ", x).unwrap(); + self.offset += 4; + } + + fn put8(&mut self, x: u64) { + write!(self.text, "{:016x} ", x).unwrap(); + self.offset += 8; + } + + fn reloc_block(&mut self, reloc: binemit::Reloc, block_offset: binemit::CodeOffset) { + write!(self.text, "{}({}) ", reloc, block_offset).unwrap(); + } + + fn reloc_external( + &mut self, + reloc: binemit::Reloc, + name: &ir::ExternalName, + addend: binemit::Addend, + ) { + write!(self.text, "{}({}", reloc, name).unwrap(); + if addend != 0 { + write!(self.text, "{:+}", addend).unwrap(); + } + write!(self.text, ") ").unwrap(); + } + + fn reloc_constant(&mut self, reloc: binemit::Reloc, constant: ir::ConstantOffset) { + write!(self.text, "{}({}) ", reloc, constant).unwrap(); + } + + fn reloc_jt(&mut self, reloc: binemit::Reloc, jt: ir::JumpTable) { + write!(self.text, "{}({}) ", reloc, jt).unwrap(); + } + + fn trap(&mut self, code: ir::TrapCode, _srcloc: ir::SourceLoc) { + write!(self.text, "{} ", code).unwrap(); + } + + fn begin_jumptables(&mut self) { + self.code_size = self.offset + } + fn begin_rodata(&mut self) {} + fn end_codegen(&mut self) {} + fn add_stackmap( + &mut self, + _: &[ir::entities::Value], + _: &ir::Function, + _: &dyn isa::TargetIsa, + ) { + } +} + +impl SubTest for TestBinEmit { + fn name(&self) -> &'static str { + "binemit" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("binemit needs an ISA"); + let encinfo = isa.encoding_info(); + // TODO: Run a verifier pass over the code first to detect any bad encodings or missing/bad + // value locations. The current error reporting is just crashing... + let mut func = func.into_owned(); + + // Fix the stack frame layout so we can test spill/fill encodings. + let min_offset = func + .stack_slots + .values() + .map(|slot| slot.offset.unwrap()) + .min(); + func.stack_slots.layout_info = min_offset.map(|off| ir::StackLayoutInfo { + frame_size: (-off) as u32, + inbound_args_size: 0, + }); + + let opt_level = isa.flags().opt_level(); + + // Give an encoding to any instruction that doesn't already have one. + let mut divert = RegDiversions::new(); + for block in func.layout.blocks() { + divert.clear(); + for inst in func.layout.block_insts(block) { + if !func.encodings[inst].is_legal() { + // Find an encoding that satisfies both immediate field and register + // constraints. + if let Some(enc) = { + let mut legal_encodings = isa + .legal_encodings(&func, &func.dfg[inst], func.dfg.ctrl_typevar(inst)) + .filter(|e| { + let recipe_constraints = &encinfo.constraints[e.recipe()]; + recipe_constraints.satisfied(inst, &divert, &func) + }); + + if opt_level == OptLevel::SpeedAndSize { + // Get the smallest legal encoding + legal_encodings + .min_by_key(|&e| encinfo.byte_size(e, inst, &divert, &func)) + } else { + // If not optimizing, just use the first encoding. + legal_encodings.next() + } + } { + func.encodings[inst] = enc; + } + } + divert.apply(&func.dfg[inst]); + } + } + + // Relax branches and compute block offsets based on the encodings. + let mut cfg = ControlFlowGraph::with_function(&func); + let mut domtree = DominatorTree::with_function(&func, &cfg); + let CodeInfo { total_size, .. } = + binemit::relax_branches(&mut func, &mut cfg, &mut domtree, isa) + .map_err(|e| pretty_error(&func, context.isa, e))?; + + // Collect all of the 'bin:' directives on instructions. + let mut bins = HashMap::new(); + for comment in &context.details.comments { + if let Some(want) = match_directive(comment.text, "bin:") { + match comment.entity { + AnyEntity::Inst(inst) => { + if let Some(prev) = bins.insert(inst, want) { + return Err(format!( + "multiple 'bin:' directives on {}: '{}' and '{}'", + func.dfg.display_inst(inst, isa), + prev, + want + )); + } + } + _ => { + return Err(format!( + "'bin:' directive on non-inst {}: {}", + comment.entity, comment.text + )); + } + } + } + } + if bins.is_empty() { + return Err("No 'bin:' directives found".to_string()); + } + + // Now emit all instructions. + let mut sink = TextSink::new(); + for block in func.layout.blocks() { + divert.clear(); + // Correct header offsets should have been computed by `relax_branches()`. + assert_eq!( + sink.offset, func.offsets[block], + "Inconsistent {} header offset", + block + ); + for (offset, inst, enc_bytes) in func.inst_offsets(block, &encinfo) { + assert_eq!(sink.offset, offset); + sink.text.clear(); + let enc = func.encodings[inst]; + + // Send legal encodings into the emitter. + if enc.is_legal() { + // Generate a better error message if output locations are not specified. + validate_location_annotations(&func, inst, isa, false)?; + + let before = sink.offset; + isa.emit_inst(&func, inst, &mut divert, &mut sink); + let emitted = sink.offset - before; + // Verify the encoding recipe sizes against the ISAs emit_inst implementation. + assert_eq!( + emitted, + enc_bytes, + "Inconsistent size for [{}] {}", + encinfo.display(enc), + func.dfg.display_inst(inst, isa) + ); + } + + // Check against bin: directives. + if let Some(want) = bins.remove(&inst) { + if !enc.is_legal() { + // A possible cause of an unencoded instruction is a missing location for + // one of the input/output operands. + validate_location_annotations(&func, inst, isa, true)?; + validate_location_annotations(&func, inst, isa, false)?; + + // Do any encodings exist? + let encodings = isa + .legal_encodings(&func, &func.dfg[inst], func.dfg.ctrl_typevar(inst)) + .map(|e| encinfo.display(e)) + .collect::>(); + + if encodings.is_empty() { + return Err(format!( + "No encodings found for: {}", + func.dfg.display_inst(inst, isa) + )); + } + return Err(format!( + "No matching encodings for {} in {}", + func.dfg.display_inst(inst, isa), + DisplayList(&encodings), + )); + } + let have = sink.text.trim(); + if have != want { + return Err(format!( + "Bad machine code for {}: {}\nWant: {}\nGot: {}", + inst, + func.dfg.display_inst(inst, isa), + want, + have + )); + } + } + } + } + + sink.begin_jumptables(); + + for (jt, jt_data) in func.jump_tables.iter() { + let jt_offset = func.jt_offsets[jt]; + for block in jt_data.iter() { + let rel_offset: i32 = func.offsets[*block] as i32 - jt_offset as i32; + sink.put4(rel_offset as u32) + } + } + + sink.begin_rodata(); + + // output constants + for (_, constant_data) in func.dfg.constants.iter() { + for byte in constant_data.iter() { + sink.put1(*byte) + } + } + + sink.end_codegen(); + + if sink.offset != total_size { + return Err(format!( + "Expected code size {}, got {}", + total_size, sink.offset + )); + } + + Ok(()) + } +} + +/// Validate registers/stack slots are correctly annotated. +fn validate_location_annotations( + func: &ir::Function, + inst: ir::Inst, + isa: &dyn isa::TargetIsa, + validate_inputs: bool, +) -> SubtestResult<()> { + let values = if validate_inputs { + func.dfg.inst_args(inst) + } else { + func.dfg.inst_results(inst) + }; + + if let Some(&v) = values.iter().find(|&&v| !func.locations[v].is_assigned()) { + Err(format!( + "Need register/stack slot annotation for {} in {}", + v, + func.dfg.display_inst(inst, isa) + )) + } else { + Ok(()) + } +} diff --git a/cranelift/filetests/src/test_cat.rs b/cranelift/filetests/src/test_cat.rs new file mode 100644 index 0000000000..756b5e5700 --- /dev/null +++ b/cranelift/filetests/src/test_cat.rs @@ -0,0 +1,37 @@ +//! The `cat` subtest. + +use crate::subtest::{self, Context, SubTest, SubtestResult}; +use cranelift_codegen::ir::Function; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +/// Object implementing the `test cat` sub-test. +/// +/// This command is used for testing the parser and function printer. It simply parses a function +/// and prints it out again. +/// +/// The result is verified by filecheck. +struct TestCat; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "cat"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestCat)) + } +} + +impl SubTest for TestCat { + fn name(&self) -> &'static str { + "cat" + } + + fn needs_verifier(&self) -> bool { + false + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + subtest::run_filecheck(&func.display(context.isa).to_string(), context) + } +} diff --git a/cranelift/filetests/src/test_compile.rs b/cranelift/filetests/src/test_compile.rs new file mode 100644 index 0000000000..0ac8c48c5e --- /dev/null +++ b/cranelift/filetests/src/test_compile.rs @@ -0,0 +1,122 @@ +//! Test command for testing the code generator pipeline +//! +//! The `compile` test command runs each function through the full code generator pipeline + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::binemit::{self, CodeInfo}; +use cranelift_codegen::ir; +use cranelift_codegen::isa; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use log::info; +use std::borrow::Cow; + +struct TestCompile; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "compile"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestCompile)) + } +} + +impl SubTest for TestCompile { + fn name(&self) -> &'static str { + "compile" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("compile needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + let CodeInfo { total_size, .. } = comp_ctx + .compile(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + + info!( + "Generated {} bytes of code:\n{}", + total_size, + comp_ctx.func.display(isa) + ); + + // Verify that the returned code size matches the emitted bytes. + let mut sink = SizeSink { offset: 0 }; + binemit::emit_function( + &comp_ctx.func, + |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), + &mut sink, + isa, + ); + + if sink.offset != total_size { + return Err(format!( + "Expected code size {}, got {}", + total_size, sink.offset + )); + } + + // Run final code through filecheck. + let text = comp_ctx.func.display(Some(isa)).to_string(); + run_filecheck(&text, context) + } +} + +/// Code sink that simply counts bytes. +struct SizeSink { + offset: binemit::CodeOffset, +} + +impl binemit::CodeSink for SizeSink { + fn offset(&self) -> binemit::CodeOffset { + self.offset + } + + fn put1(&mut self, _: u8) { + self.offset += 1; + } + + fn put2(&mut self, _: u16) { + self.offset += 2; + } + + fn put4(&mut self, _: u32) { + self.offset += 4; + } + + fn put8(&mut self, _: u64) { + self.offset += 8; + } + + fn reloc_block(&mut self, _reloc: binemit::Reloc, _block_offset: binemit::CodeOffset) {} + fn reloc_external( + &mut self, + _reloc: binemit::Reloc, + _name: &ir::ExternalName, + _addend: binemit::Addend, + ) { + } + fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} + fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} + fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} + fn begin_jumptables(&mut self) {} + fn begin_rodata(&mut self) {} + fn end_codegen(&mut self) {} + fn add_stackmap( + &mut self, + _: &[ir::entities::Value], + _: &ir::Function, + _: &dyn isa::TargetIsa, + ) { + } +} diff --git a/cranelift/filetests/src/test_dce.rs b/cranelift/filetests/src/test_dce.rs new file mode 100644 index 0000000000..b7b72d2a77 --- /dev/null +++ b/cranelift/filetests/src/test_dce.rs @@ -0,0 +1,47 @@ +//! Test command for testing the DCE pass. +//! +//! The `dce` test command runs each function through the DCE pass after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestDCE; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "dce"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestDCE)) + } +} + +impl SubTest for TestDCE { + fn name(&self) -> &'static str { + "dce" + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx.flowgraph(); + comp_ctx.compute_loop_analysis(); + comp_ctx + .dce(context.flags_or_isa()) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + + let text = comp_ctx.func.display(context.isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_domtree.rs b/cranelift/filetests/src/test_domtree.rs new file mode 100644 index 0000000000..f5f81ed03a --- /dev/null +++ b/cranelift/filetests/src/test_domtree.rs @@ -0,0 +1,146 @@ +//! Test command for verifying dominator trees. +//! +//! The `test domtree` test command looks for annotations on instructions like this: +//! +//! ```clif +//! jump block3 ; dominates: block3 +//! ``` +//! +//! This annotation means that the jump instruction is expected to be the immediate dominator of +//! `block3`. +//! +//! We verify that the dominator tree annotations are complete and correct. +//! + +use crate::match_directive::match_directive; +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen::dominator_tree::{DominatorTree, DominatorTreePreorder}; +use cranelift_codegen::flowgraph::ControlFlowGraph; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::Function; +use cranelift_reader::TestCommand; +use std::borrow::{Borrow, Cow}; +use std::collections::HashMap; +use std::fmt::{self, Write}; + +struct TestDomtree; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "domtree"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestDomtree)) + } +} + +impl SubTest for TestDomtree { + fn name(&self) -> &'static str { + "domtree" + } + + // Extract our own dominator tree from + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let func = func.borrow(); + let cfg = ControlFlowGraph::with_function(func); + let domtree = DominatorTree::with_function(func, &cfg); + + // Build an expected domtree from the source annotations. + let mut expected = HashMap::new(); + for comment in &context.details.comments { + if let Some(tail) = match_directive(comment.text, "dominates:") { + let inst = match comment.entity { + AnyEntity::Inst(inst) => inst, + _ => { + return Err(format!( + "annotation on non-inst {}: {}", + comment.entity, comment.text + )); + } + }; + for src_block in tail.split_whitespace() { + let block = match context.details.map.lookup_str(src_block) { + Some(AnyEntity::Block(block)) => block, + _ => return Err(format!("expected defined block, got {}", src_block)), + }; + + // Annotations say that `inst` is the idom of `block`. + if expected.insert(block, inst).is_some() { + return Err(format!("multiple dominators for {}", src_block)); + } + + // Compare to computed domtree. + match domtree.idom(block) { + Some(got_inst) if got_inst != inst => { + return Err(format!( + "mismatching idoms for {}:\n\ + want: {}, got: {}", + src_block, inst, got_inst + )); + } + None => { + return Err(format!( + "mismatching idoms for {}:\n\ + want: {}, got: unreachable", + src_block, inst + )); + } + _ => {} + } + } + } + } + + // Now we know that everything in `expected` is consistent with `domtree`. + // All other block's should be either unreachable or the entry block. + for block in func + .layout + .blocks() + .skip(1) + .filter(|block| !expected.contains_key(block)) + { + if let Some(got_inst) = domtree.idom(block) { + return Err(format!( + "mismatching idoms for renumbered {}:\n\ + want: unrechable, got: {}", + block, got_inst + )); + } + } + + let text = filecheck_text(func, &domtree).expect("formatting error"); + run_filecheck(&text, context) + } +} + +// Generate some output for filecheck testing +fn filecheck_text(func: &Function, domtree: &DominatorTree) -> Result { + let mut s = String::new(); + + write!(s, "cfg_postorder:")?; + for &block in domtree.cfg_postorder() { + write!(s, " {}", block)?; + } + writeln!(s)?; + + // Compute and print out a pre-order of the dominator tree. + writeln!(s, "domtree_preorder {{")?; + let mut dtpo = DominatorTreePreorder::new(); + dtpo.compute(domtree, &func.layout); + let mut stack = Vec::new(); + stack.extend(func.layout.entry_block()); + while let Some(block) = stack.pop() { + write!(s, " {}:", block)?; + let i = stack.len(); + for ch in dtpo.children(block) { + write!(s, " {}", ch)?; + stack.push(ch); + } + writeln!(s)?; + // Reverse the children we just pushed so we'll pop them in order. + stack[i..].reverse(); + } + writeln!(s, "}}")?; + + Ok(s) +} diff --git a/cranelift/filetests/src/test_fde.rs b/cranelift/filetests/src/test_fde.rs new file mode 100644 index 0000000000..3e3747fdde --- /dev/null +++ b/cranelift/filetests/src/test_fde.rs @@ -0,0 +1,415 @@ +//! Test command for verifying the unwind emitted for each function. +//! +//! The `unwind` test command runs each function through the full code generator pipeline. +#![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::binemit::{FrameUnwindKind, FrameUnwindOffset, FrameUnwindSink, Reloc}; +use cranelift_codegen::ir; +use cranelift_reader::TestCommand; +use std::borrow::Cow; +use std::fmt::Write; + +struct TestUnwind; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "fde"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestUnwind)) + } +} + +impl SubTest for TestUnwind { + fn name(&self) -> &'static str { + "fde" + } + + fn is_mutating(&self) -> bool { + false + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("unwind needs an ISA"); + + if func.signature.call_conv != cranelift_codegen::isa::CallConv::SystemV { + return run_filecheck(&"No unwind information.", context); + } + + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + comp_ctx.func.collect_frame_layout_info(); + + comp_ctx.compile(isa).expect("failed to compile function"); + + struct SimpleUnwindSink(pub Vec, pub usize, pub Vec<(Reloc, usize)>); + impl FrameUnwindSink for SimpleUnwindSink { + fn len(&self) -> FrameUnwindOffset { + self.0.len() + } + fn bytes(&mut self, b: &[u8]) { + self.0.extend_from_slice(b); + } + fn reloc(&mut self, r: Reloc, off: FrameUnwindOffset) { + self.2.push((r, off)); + } + fn set_entry_offset(&mut self, off: FrameUnwindOffset) { + self.1 = off; + } + } + + let mut sink = SimpleUnwindSink(Vec::new(), 0, Vec::new()); + comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Libunwind, &mut sink); + + let mut text = String::new(); + if sink.0.is_empty() { + writeln!(text, "No unwind information.").unwrap(); + } else { + print_unwind_info(&mut text, &sink.0, isa.pointer_bytes()); + writeln!(text, "Entry: {}", sink.1).unwrap(); + writeln!(text, "Relocs: {:?}", sink.2).unwrap(); + } + + run_filecheck(&text, context) + } +} + +fn register_name<'a>(register: gimli::Register) -> std::borrow::Cow<'a, str> { + Cow::Owned(format!("r{}", register.0)) +} + +fn print_unwind_info(text: &mut String, mem: &[u8], address_size: u8) { + let mut eh_frame = gimli::EhFrame::new(mem, gimli::LittleEndian); + eh_frame.set_address_size(address_size); + let bases = gimli::BaseAddresses::default(); + dwarfdump::dump_eh_frame(text, &eh_frame, &bases, ®ister_name).unwrap(); +} + +mod dwarfdump { + // Copied from https://github.com/gimli-rs/gimli/blob/1e49ffc9af4ec64a1b7316924d73c933dd7157c5/examples/dwarfdump.rs + use gimli::UnwindSection; + use std::borrow::Cow; + use std::collections::HashMap; + use std::fmt::{self, Debug, Write}; + use std::result; + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub(super) enum Error { + GimliError(gimli::Error), + IoError, + } + + impl fmt::Display for Error { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> ::std::result::Result<(), fmt::Error> { + Debug::fmt(self, f) + } + } + + impl From for Error { + fn from(err: gimli::Error) -> Self { + Self::GimliError(err) + } + } + + impl From for Error { + fn from(_: fmt::Error) -> Self { + Self::IoError + } + } + + pub(super) type Result = result::Result; + + pub(super) trait Reader: gimli::Reader + Send + Sync {} + + impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where + Endian: gimli::Endianity + Send + Sync + { + } + + pub(super) fn dump_eh_frame( + w: &mut W, + eh_frame: &gimli::EhFrame, + bases: &gimli::BaseAddresses, + register_name: &dyn Fn(gimli::Register) -> Cow<'static, str>, + ) -> Result<()> { + let mut cies = HashMap::new(); + + let mut entries = eh_frame.entries(bases); + loop { + match entries.next()? { + None => return Ok(()), + Some(gimli::CieOrFde::Cie(cie)) => { + writeln!(w, "{:#010x}: CIE", cie.offset())?; + writeln!(w, " length: {:#010x}", cie.entry_len())?; + // TODO: CIE_id + writeln!(w, " version: {:#04x}", cie.version())?; + // TODO: augmentation + writeln!(w, " code_align: {}", cie.code_alignment_factor())?; + writeln!(w, " data_align: {}", cie.data_alignment_factor())?; + writeln!(w, " ra_register: {:#x}", cie.return_address_register().0)?; + if let Some(encoding) = cie.lsda_encoding() { + writeln!(w, " lsda_encoding: {:#02x}", encoding.0)?; + } + if let Some((encoding, personality)) = cie.personality_with_encoding() { + write!(w, " personality: {:#02x} ", encoding.0)?; + dump_pointer(w, personality)?; + writeln!(w)?; + } + if let Some(encoding) = cie.fde_address_encoding() { + writeln!(w, " fde_encoding: {:#02x}", encoding.0)?; + } + dump_cfi_instructions( + w, + cie.instructions(eh_frame, bases), + true, + register_name, + )?; + writeln!(w)?; + } + Some(gimli::CieOrFde::Fde(partial)) => { + let mut offset = None; + let fde = partial.parse(|_, bases, o| { + offset = Some(o); + cies.entry(o) + .or_insert_with(|| eh_frame.cie_from_offset(bases, o)) + .clone() + })?; + + writeln!(w)?; + writeln!(w, "{:#010x}: FDE", fde.offset())?; + writeln!(w, " length: {:#010x}", fde.entry_len())?; + writeln!(w, " CIE_pointer: {:#010x}", offset.unwrap().0)?; + // TODO: symbolicate the start address like the canonical dwarfdump does. + writeln!(w, " start_addr: {:#018x}", fde.initial_address())?; + writeln!( + w, + " range_size: {:#018x} (end_addr = {:#018x})", + fde.len(), + fde.initial_address() + fde.len() + )?; + if let Some(lsda) = fde.lsda() { + write!(w, " lsda: ")?; + dump_pointer(w, lsda)?; + writeln!(w)?; + } + dump_cfi_instructions( + w, + fde.instructions(eh_frame, bases), + false, + register_name, + )?; + writeln!(w)?; + } + } + } + } + + fn dump_pointer(w: &mut W, p: gimli::Pointer) -> Result<()> { + match p { + gimli::Pointer::Direct(p) => { + write!(w, "{:#018x}", p)?; + } + gimli::Pointer::Indirect(p) => { + write!(w, "({:#018x})", p)?; + } + } + Ok(()) + } + + #[allow(clippy::unneeded_field_pattern)] + fn dump_cfi_instructions( + w: &mut W, + mut insns: gimli::CallFrameInstructionIter, + is_initial: bool, + register_name: &dyn Fn(gimli::Register) -> Cow<'static, str>, + ) -> Result<()> { + use gimli::CallFrameInstruction::*; + + // TODO: we need to actually evaluate these instructions as we iterate them + // so we can print the initialized state for CIEs, and each unwind row's + // registers for FDEs. + // + // TODO: We should print DWARF expressions for the CFI instructions that + // embed DWARF expressions within themselves. + + if !is_initial { + writeln!(w, " Instructions:")?; + } + + loop { + match insns.next() { + Err(e) => { + writeln!(w, "Failed to decode CFI instruction: {}", e)?; + return Ok(()); + } + Ok(None) => { + if is_initial { + writeln!(w, " Instructions: Init State:")?; + } + return Ok(()); + } + Ok(Some(op)) => match op { + SetLoc { address } => { + writeln!(w, " DW_CFA_set_loc ({:#x})", address)?; + } + AdvanceLoc { delta } => { + writeln!(w, " DW_CFA_advance_loc ({})", delta)?; + } + DefCfa { register, offset } => { + writeln!( + w, + " DW_CFA_def_cfa ({}, {})", + register_name(register), + offset + )?; + } + DefCfaSf { + register, + factored_offset, + } => { + writeln!( + w, + " DW_CFA_def_cfa_sf ({}, {})", + register_name(register), + factored_offset + )?; + } + DefCfaRegister { register } => { + writeln!( + w, + " DW_CFA_def_cfa_register ({})", + register_name(register) + )?; + } + DefCfaOffset { offset } => { + writeln!(w, " DW_CFA_def_cfa_offset ({})", offset)?; + } + DefCfaOffsetSf { factored_offset } => { + writeln!( + w, + " DW_CFA_def_cfa_offset_sf ({})", + factored_offset + )?; + } + DefCfaExpression { expression: _ } => { + writeln!(w, " DW_CFA_def_cfa_expression (...)")?; + } + Undefined { register } => { + writeln!( + w, + " DW_CFA_undefined ({})", + register_name(register) + )?; + } + SameValue { register } => { + writeln!( + w, + " DW_CFA_same_value ({})", + register_name(register) + )?; + } + Offset { + register, + factored_offset, + } => { + writeln!( + w, + " DW_CFA_offset ({}, {})", + register_name(register), + factored_offset + )?; + } + OffsetExtendedSf { + register, + factored_offset, + } => { + writeln!( + w, + " DW_CFA_offset_extended_sf ({}, {})", + register_name(register), + factored_offset + )?; + } + ValOffset { + register, + factored_offset, + } => { + writeln!( + w, + " DW_CFA_val_offset ({}, {})", + register_name(register), + factored_offset + )?; + } + ValOffsetSf { + register, + factored_offset, + } => { + writeln!( + w, + " DW_CFA_val_offset_sf ({}, {})", + register_name(register), + factored_offset + )?; + } + Register { + dest_register, + src_register, + } => { + writeln!( + w, + " DW_CFA_register ({}, {})", + register_name(dest_register), + register_name(src_register) + )?; + } + Expression { + register, + expression: _, + } => { + writeln!( + w, + " DW_CFA_expression ({}, ...)", + register_name(register) + )?; + } + ValExpression { + register, + expression: _, + } => { + writeln!( + w, + " DW_CFA_val_expression ({}, ...)", + register_name(register) + )?; + } + Restore { register } => { + writeln!( + w, + " DW_CFA_restore ({})", + register_name(register) + )?; + } + RememberState => { + writeln!(w, " DW_CFA_remember_state")?; + } + RestoreState => { + writeln!(w, " DW_CFA_restore_state")?; + } + ArgsSize { size } => { + writeln!(w, " DW_CFA_GNU_args_size ({})", size)?; + } + Nop => { + writeln!(w, " DW_CFA_nop")?; + } + }, + } + } + } +} diff --git a/cranelift/filetests/src/test_legalizer.rs b/cranelift/filetests/src/test_legalizer.rs new file mode 100644 index 0000000000..ec182f4092 --- /dev/null +++ b/cranelift/filetests/src/test_legalizer.rs @@ -0,0 +1,49 @@ +//! Test command for checking the IR legalizer. +//! +//! The `test legalizer` test command runs each function through `legalize_function()` and sends +//! the result to filecheck. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestLegalizer; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "legalizer"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestLegalizer)) + } +} + +impl SubTest for TestLegalizer { + fn name(&self) -> &'static str { + "legalizer" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + let isa = context.isa.expect("legalizer needs an ISA"); + + comp_ctx.compute_cfg(); + comp_ctx + .legalize(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + + let text = comp_ctx.func.display(Some(isa)).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_licm.rs b/cranelift/filetests/src/test_licm.rs new file mode 100644 index 0000000000..31385f8081 --- /dev/null +++ b/cranelift/filetests/src/test_licm.rs @@ -0,0 +1,48 @@ +//! Test command for testing the LICM pass. +//! +//! The `licm` test command runs each function through the LICM pass after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestLICM; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "licm"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestLICM)) + } +} + +impl SubTest for TestLICM { + fn name(&self) -> &'static str { + "licm" + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("LICM needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx.flowgraph(); + comp_ctx.compute_loop_analysis(); + comp_ctx + .licm(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + + let text = comp_ctx.func.display(context.isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_postopt.rs b/cranelift/filetests/src/test_postopt.rs new file mode 100644 index 0000000000..5d5486e912 --- /dev/null +++ b/cranelift/filetests/src/test_postopt.rs @@ -0,0 +1,44 @@ +//! Test command for testing the postopt pass. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestPostopt; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "postopt"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestPostopt)) + } +} + +impl SubTest for TestPostopt { + fn name(&self) -> &'static str { + "postopt" + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + let isa = context.isa.expect("postopt needs an ISA"); + + comp_ctx.flowgraph(); + comp_ctx + .postopt(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + + let text = comp_ctx.func.display(isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_preopt.rs b/cranelift/filetests/src/test_preopt.rs new file mode 100644 index 0000000000..7e0e23475a --- /dev/null +++ b/cranelift/filetests/src/test_preopt.rs @@ -0,0 +1,50 @@ +//! Test command for testing the constant folding pass. +//! +//! The `dce` test command runs each function through the constant folding pass after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_preopt::optimize; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestPreopt; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "preopt"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestPreopt)) + } +} + +impl SubTest for TestPreopt { + fn name(&self) -> &'static str { + "preopt" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("compile needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + optimize(&mut comp_ctx, isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + + let text = comp_ctx.func.display(context.isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_print_cfg.rs b/cranelift/filetests/src/test_print_cfg.rs new file mode 100644 index 0000000000..4483c23939 --- /dev/null +++ b/cranelift/filetests/src/test_print_cfg.rs @@ -0,0 +1,37 @@ +//! The `print-cfg` sub-command. +//! +//! Read a series of Cranelift IR files and print their control flow graphs +//! in graphviz format. + +use std::borrow::Cow; + +use crate::subtest::{self, Context, SubTest, SubtestResult}; +use cranelift_codegen::cfg_printer::CFGPrinter; +use cranelift_codegen::ir::Function; +use cranelift_reader::TestCommand; + +/// Object implementing the `test print-cfg` sub-test. +struct TestPrintCfg; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "print-cfg"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestPrintCfg)) + } +} + +impl SubTest for TestPrintCfg { + fn name(&self) -> &'static str { + "print-cfg" + } + + fn needs_verifier(&self) -> bool { + false + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + subtest::run_filecheck(&CFGPrinter::new(&func).to_string(), context) + } +} diff --git a/cranelift/filetests/src/test_regalloc.rs b/cranelift/filetests/src/test_regalloc.rs new file mode 100644 index 0000000000..5a316c022f --- /dev/null +++ b/cranelift/filetests/src/test_regalloc.rs @@ -0,0 +1,56 @@ +//! Test command for testing the register allocator. +//! +//! The `regalloc` test command runs each function through the register allocator after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestRegalloc; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "regalloc"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestRegalloc)) + } +} + +impl SubTest for TestRegalloc { + fn name(&self) -> &'static str { + "regalloc" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("register allocator needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx.compute_cfg(); + // TODO: Should we have an option to skip legalization? + comp_ctx + .legalize(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + comp_ctx.compute_domtree(); + comp_ctx + .regalloc(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + + let text = comp_ctx.func.display(Some(isa)).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_rodata.rs b/cranelift/filetests/src/test_rodata.rs new file mode 100644 index 0000000000..4254d9939b --- /dev/null +++ b/cranelift/filetests/src/test_rodata.rs @@ -0,0 +1,123 @@ +//! Test command for verifying the rodata emitted after each function +//! +//! The `rodata` test command runs each function through the full code generator pipeline + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::binemit::{self, CodeInfo}; +use cranelift_codegen::ir; +use cranelift_codegen::ir::{Function, Value}; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use log::info; +use std::borrow::Cow; + +struct TestRodata; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "rodata"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestRodata)) + } +} + +impl SubTest for TestRodata { + fn name(&self) -> &'static str { + "rodata" + } + + fn is_mutating(&self) -> bool { + true + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("rodata needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + let CodeInfo { total_size, .. } = comp_ctx + .compile(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + + info!( + "Generated {} bytes of code:\n{}", + total_size, + comp_ctx.func.display(isa) + ); + + // Verify that the returned code size matches the emitted bytes. + let mut sink = RodataSink { + rodata: Vec::new(), + in_rodata: false, + }; + binemit::emit_function( + &comp_ctx.func, + |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), + &mut sink, + isa, + ); + + // Run final code through filecheck. + let text = format!("{:X?}", sink.rodata); + info!("Found rodata: {}", text); + run_filecheck(&text, context) + } +} + +/// Code sink that only captures emitted rodata +struct RodataSink { + in_rodata: bool, + rodata: Vec, +} + +impl binemit::CodeSink for RodataSink { + fn offset(&self) -> binemit::CodeOffset { + 0 + } + + fn put1(&mut self, byte: u8) { + if self.in_rodata { + self.rodata.push(byte); + } + } + + fn put2(&mut self, bytes: u16) { + if self.in_rodata { + self.rodata.extend_from_slice(&bytes.to_be_bytes()); + } + } + + fn put4(&mut self, bytes: u32) { + if self.in_rodata { + self.rodata.extend_from_slice(&bytes.to_be_bytes()); + } + } + + fn put8(&mut self, bytes: u64) { + if self.in_rodata { + self.rodata.extend_from_slice(&bytes.to_be_bytes()); + } + } + + fn reloc_block(&mut self, _reloc: binemit::Reloc, _block_offset: binemit::CodeOffset) {} + fn reloc_external(&mut self, _: binemit::Reloc, _: &ir::ExternalName, _: binemit::Addend) {} + fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} + fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} + fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} + fn begin_jumptables(&mut self) { + assert!(!self.in_rodata); + } + fn begin_rodata(&mut self) { + self.in_rodata = true; + } + fn end_codegen(&mut self) { + assert!(self.in_rodata); + } + fn add_stackmap(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa) {} +} diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs new file mode 100644 index 0000000000..6e34bfebfa --- /dev/null +++ b/cranelift/filetests/src/test_run.rs @@ -0,0 +1,46 @@ +//! Test command for running CLIF files and verifying their results +//! +//! The `run` test command compiles each function on the host machine and executes it + +use crate::function_runner::FunctionRunner; +use crate::subtest::{Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestRun; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "run"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestRun)) + } +} + +impl SubTest for TestRun { + fn name(&self) -> &'static str { + "run" + } + + fn is_mutating(&self) -> bool { + false + } + + fn needs_isa(&self) -> bool { + false + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + for comment in context.details.comments.iter() { + if comment.text.contains("run") { + let runner = + FunctionRunner::with_host_isa(func.clone().into_owned(), context.flags.clone()); + runner.run()? + } + } + Ok(()) + } +} diff --git a/cranelift/filetests/src/test_safepoint.rs b/cranelift/filetests/src/test_safepoint.rs new file mode 100644 index 0000000000..b213fb274d --- /dev/null +++ b/cranelift/filetests/src/test_safepoint.rs @@ -0,0 +1,39 @@ +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestSafepoint; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "safepoint"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestSafepoint)) + } +} + +impl SubTest for TestSafepoint { + fn name(&self) -> &'static str { + "safepoint" + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + let isa = context.isa.expect("register allocator needs an ISA"); + comp_ctx.compute_cfg(); + comp_ctx + .legalize(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + comp_ctx.compute_domtree(); + comp_ctx + .regalloc(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?; + + let text = comp_ctx.func.display(context.isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_shrink.rs b/cranelift/filetests/src/test_shrink.rs new file mode 100644 index 0000000000..d32da3c384 --- /dev/null +++ b/cranelift/filetests/src/test_shrink.rs @@ -0,0 +1,46 @@ +//! Test command for testing the Shrink pass. +//! +//! The `shrink` test command runs each function through the Shrink pass after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestShrink; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "shrink"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestShrink)) + } +} + +impl SubTest for TestShrink { + fn name(&self) -> &'static str { + "shrink" + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("shrink needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx + .shrink_instructions(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + + let text = comp_ctx.func.display(isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_simple_gvn.rs b/cranelift/filetests/src/test_simple_gvn.rs new file mode 100644 index 0000000000..831a632951 --- /dev/null +++ b/cranelift/filetests/src/test_simple_gvn.rs @@ -0,0 +1,46 @@ +//! Test command for testing the simple GVN pass. +//! +//! The `simple-gvn` test command runs each function through the simple GVN pass after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestSimpleGVN; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "simple-gvn"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestSimpleGVN)) + } +} + +impl SubTest for TestSimpleGVN { + fn name(&self) -> &'static str { + "simple-gvn" + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx.flowgraph(); + comp_ctx + .simple_gvn(context.flags_or_isa()) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + + let text = comp_ctx.func.display(context.isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_simple_preopt.rs b/cranelift/filetests/src/test_simple_preopt.rs new file mode 100644 index 0000000000..286a86ba23 --- /dev/null +++ b/cranelift/filetests/src/test_simple_preopt.rs @@ -0,0 +1,43 @@ +//! Test command for testing the preopt pass. +//! +//! The resulting function is sent to `filecheck`. + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use cranelift_codegen; +use cranelift_codegen::ir::Function; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_reader::TestCommand; +use std::borrow::Cow; + +struct TestSimplePreopt; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "simple_preopt"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestSimplePreopt)) + } +} + +impl SubTest for TestSimplePreopt { + fn name(&self) -> &'static str { + "simple_preopt" + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + let isa = context.isa.expect("preopt needs an ISA"); + + comp_ctx.compute_cfg(); + comp_ctx + .preopt(isa) + .map_err(|e| pretty_error(&comp_ctx.func, context.isa, Into::into(e)))?; + let text = &comp_ctx.func.display(isa).to_string(); + run_filecheck(&text, context) + } +} diff --git a/cranelift/filetests/src/test_unwind.rs b/cranelift/filetests/src/test_unwind.rs new file mode 100644 index 0000000000..3db1cbf829 --- /dev/null +++ b/cranelift/filetests/src/test_unwind.rs @@ -0,0 +1,214 @@ +//! Test command for verifying the unwind emitted for each function. +//! +//! The `unwind` test command runs each function through the full code generator pipeline. +#![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + +use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult}; +use byteorder::{ByteOrder, LittleEndian}; +use cranelift_codegen; +use cranelift_codegen::binemit::{FrameUnwindKind, FrameUnwindOffset, FrameUnwindSink, Reloc}; +use cranelift_codegen::ir; +use cranelift_reader::TestCommand; +use std::borrow::Cow; +use std::fmt::Write; + +struct TestUnwind; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "unwind"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestUnwind)) + } +} + +impl SubTest for TestUnwind { + fn name(&self) -> &'static str { + "unwind" + } + + fn is_mutating(&self) -> bool { + false + } + + fn needs_isa(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let isa = context.isa.expect("unwind needs an ISA"); + let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); + + comp_ctx.compile(isa).expect("failed to compile function"); + + struct Sink(Vec); + impl FrameUnwindSink for Sink { + fn len(&self) -> FrameUnwindOffset { + self.0.len() + } + fn bytes(&mut self, b: &[u8]) { + self.0.extend_from_slice(b); + } + fn reloc(&mut self, _: Reloc, _: FrameUnwindOffset) { + unimplemented!(); + } + fn set_entry_offset(&mut self, _: FrameUnwindOffset) { + unimplemented!(); + } + } + + let mut sink = Sink(Vec::new()); + comp_ctx.emit_unwind_info(isa, FrameUnwindKind::Fastcall, &mut sink); + + let mut text = String::new(); + if sink.0.is_empty() { + writeln!(text, "No unwind information.").unwrap(); + } else { + print_unwind_info(&mut text, &sink.0); + } + + run_filecheck(&text, context) + } +} + +fn print_unwind_info(text: &mut String, mem: &[u8]) { + let info = UnwindInfo::from_slice(mem); + + // Assert correct alignment and padding of the unwind information + assert!(mem.len() % 4 == 0); + assert_eq!( + mem.len(), + 4 + ((info.unwind_code_count_raw as usize) * 2) + + if (info.unwind_code_count_raw & 1) == 1 { + 2 + } else { + 0 + } + ); + + writeln!(text, "{:#?}", info).unwrap(); +} + +#[derive(Debug)] +struct UnwindInfo { + pub version: u8, + pub flags: u8, + pub prologue_size: u8, + pub unwind_code_count_raw: u8, + pub frame_register: u8, + pub frame_register_offset: u8, + pub unwind_codes: Vec, +} + +impl UnwindInfo { + fn from_slice(mem: &[u8]) -> Self { + let version_and_flags = mem[0]; + let prologue_size = mem[1]; + let unwind_code_count_raw = mem[2]; + let frame_register_and_offset = mem[3]; + let mut unwind_codes = Vec::new(); + + let mut i = 0; + while i < unwind_code_count_raw { + let code = UnwindCode::from_slice(&mem[(4 + (i * 2) as usize)..]); + + i += match &code.value { + UnwindValue::None => 1, + UnwindValue::U16(_) => 2, + UnwindValue::U32(_) => 3, + }; + + unwind_codes.push(code); + } + + Self { + version: version_and_flags & 0x3, + flags: (version_and_flags & 0xF8) >> 3, + prologue_size, + unwind_code_count_raw, + frame_register: frame_register_and_offset & 0xF, + frame_register_offset: (frame_register_and_offset & 0xF0) >> 4, + unwind_codes, + } + } +} + +#[derive(Debug)] +struct UnwindCode { + pub offset: u8, + pub op: UnwindOperation, + pub info: u8, + pub value: UnwindValue, +} + +impl UnwindCode { + fn from_slice(mem: &[u8]) -> Self { + let offset = mem[0]; + let op_and_info = mem[1]; + let op = UnwindOperation::from(op_and_info & 0xF); + let info = (op_and_info & 0xF0) >> 4; + + let value = match op { + UnwindOperation::LargeStackAlloc => match info { + 0 => UnwindValue::U16(LittleEndian::read_u16(&mem[2..])), + 1 => UnwindValue::U32(LittleEndian::read_u32(&mem[2..])), + _ => panic!("unexpected stack alloc info value"), + }, + UnwindOperation::SaveNonvolatileRegister => { + UnwindValue::U16(LittleEndian::read_u16(&mem[2..])) + } + UnwindOperation::SaveNonvolatileRegisterFar => { + UnwindValue::U32(LittleEndian::read_u32(&mem[2..])) + } + UnwindOperation::SaveXmm128 => UnwindValue::U16(LittleEndian::read_u16(&mem[2..])), + UnwindOperation::SaveXmm128Far => UnwindValue::U32(LittleEndian::read_u32(&mem[2..])), + _ => UnwindValue::None, + }; + + Self { + offset, + op, + info, + value, + } + } +} + +#[derive(Debug)] +enum UnwindOperation { + PushNonvolatileRegister, + LargeStackAlloc, + SmallStackAlloc, + SetFramePointer, + SaveNonvolatileRegister, + SaveNonvolatileRegisterFar, + SaveXmm128, + SaveXmm128Far, + PushMachineFrame, +} + +impl From for UnwindOperation { + fn from(value: u8) -> Self { + // The numerical value is specified as part of the Windows x64 ABI + match value { + 0 => Self::PushNonvolatileRegister, + 1 => Self::LargeStackAlloc, + 2 => Self::SmallStackAlloc, + 3 => Self::SetFramePointer, + 4 => Self::SaveNonvolatileRegister, + 5 => Self::SaveNonvolatileRegisterFar, + 6 => Self::SaveXmm128, + 7 => Self::SaveXmm128Far, + 8 => Self::PushMachineFrame, + _ => panic!("unsupported unwind operation"), + } + } +} + +#[derive(Debug)] +enum UnwindValue { + None, + U16(u16), + U32(u32), +} diff --git a/cranelift/filetests/src/test_verifier.rs b/cranelift/filetests/src/test_verifier.rs new file mode 100644 index 0000000000..322361e33f --- /dev/null +++ b/cranelift/filetests/src/test_verifier.rs @@ -0,0 +1,94 @@ +//! Test command for checking the IR verifier. +//! +//! The `test verifier` test command looks for annotations on instructions like this: +//! +//! ```clif +//! jump block3 ; error: jump to non-existent block +//! ``` +//! +//! This annotation means that the verifier is expected to given an error for the jump instruction +//! containing the substring "jump to non-existent block". + +use crate::match_directive::match_directive; +use crate::subtest::{Context, SubTest, SubtestResult}; +use cranelift_codegen::ir::Function; +use cranelift_codegen::verify_function; +use cranelift_reader::TestCommand; +use std::borrow::{Borrow, Cow}; +use std::fmt::Write; + +struct TestVerifier; + +pub fn subtest(parsed: &TestCommand) -> SubtestResult> { + assert_eq!(parsed.command, "verifier"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestVerifier)) + } +} + +impl SubTest for TestVerifier { + fn name(&self) -> &'static str { + "verifier" + } + + fn needs_verifier(&self) -> bool { + // Running the verifier before this test would defeat its purpose. + false + } + + fn run(&self, func: Cow, context: &Context) -> SubtestResult<()> { + let func = func.borrow(); + + // Scan source annotations for "error:" directives. + let mut expected = Vec::new(); + + for comment in &context.details.comments { + if let Some(tail) = match_directive(comment.text, "error:") { + expected.push((comment.entity, tail)); + } + } + + match verify_function(func, context.flags_or_isa()) { + Ok(()) if expected.is_empty() => Ok(()), + Ok(()) => Err(format!("passed, but expected errors: {:?}", expected)), + + Err(ref errors) if expected.is_empty() => { + Err(format!("expected no error, but got:\n{}", errors)) + } + + Err(errors) => { + let mut errors = errors.0; + let mut msg = String::new(); + + // For each expected error, find a suitable match. + for expect in expected { + let pos = errors + .iter() + .position(|err| err.location == expect.0 && err.message.contains(expect.1)); + + match pos { + None => { + writeln!(msg, " expected error {}: {}", expect.0, expect.1).unwrap(); + } + Some(pos) => { + errors.swap_remove(pos); + } + } + } + + // Report remaining errors. + for err in errors { + writeln!(msg, "unexpected error {}", err).unwrap(); + } + + if msg.is_empty() { + Ok(()) + } else { + Err(msg) + } + } + } + } +} diff --git a/cranelift/frontend/Cargo.toml b/cranelift/frontend/Cargo.toml new file mode 100644 index 0000000000..0b6a0dc2b5 --- /dev/null +++ b/cranelift/frontend/Cargo.toml @@ -0,0 +1,27 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-frontend" +version = "0.59.0" +description = "Cranelift IR builder helper" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +categories = ["no-std"] +repository = "https://github.com/bytecodealliance/cranelift" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0", default-features = false } +target-lexicon = "0.10" +log = { version = "0.4.6", default-features = false } +hashbrown = { version = "0.6", optional = true } +smallvec = { version = "1.0.0" } + +[features] +default = ["std"] +std = ["cranelift-codegen/std"] +core = ["hashbrown", "cranelift-codegen/core"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/frontend/LICENSE b/cranelift/frontend/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/frontend/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/frontend/README.md b/cranelift/frontend/README.md new file mode 100644 index 0000000000..e43ad48f45 --- /dev/null +++ b/cranelift/frontend/README.md @@ -0,0 +1,5 @@ +This crate provides a straightforward way to create a +[Cranelift](https://crates.io/crates/cranelift) IR function and fill it with +instructions translated from another language. It contains an SSA construction +module that provides convenient methods for translating non-SSA variables into +SSA Cranelift IR values via `use_var` and `def_var` calls. diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs new file mode 100644 index 0000000000..a5803cbcf6 --- /dev/null +++ b/cranelift/frontend/src/frontend.rs @@ -0,0 +1,1275 @@ +//! A frontend for building Cranelift IR from other languages. +use crate::ssa::{SSABlock, SSABuilder, SideEffects}; +use crate::variable::Variable; +use cranelift_codegen::cursor::{Cursor, FuncCursor}; +use cranelift_codegen::entity::{EntitySet, SecondaryMap}; +use cranelift_codegen::ir; +use cranelift_codegen::ir::function::DisplayFunction; +use cranelift_codegen::ir::{ + types, AbiParam, Block, DataFlowGraph, ExtFuncData, ExternalName, FuncRef, Function, + GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, + InstructionData, JumpTable, JumpTableData, LibCall, MemFlags, SigRef, Signature, StackSlot, + StackSlotData, Type, Value, ValueLabel, ValueLabelAssignments, ValueLabelStart, +}; +use cranelift_codegen::isa::{TargetFrontendConfig, TargetIsa}; +use cranelift_codegen::packed_option::PackedOption; + +/// Structure used for translating a series of functions into Cranelift IR. +/// +/// In order to reduce memory reallocations when compiling multiple functions, +/// `FunctionBuilderContext` holds various data structures which are cleared between +/// functions, rather than dropped, preserving the underlying allocations. +pub struct FunctionBuilderContext { + ssa: SSABuilder, + blocks: SecondaryMap, + types: SecondaryMap, +} + +/// Temporary object used to build a single Cranelift IR `Function`. +pub struct FunctionBuilder<'a> { + /// The function currently being built. + /// This field is public so the function can be re-borrowed. + pub func: &'a mut Function, + + /// Source location to assign to all new instructions. + srcloc: ir::SourceLoc, + + func_ctx: &'a mut FunctionBuilderContext, + position: Position, +} + +#[derive(Clone, Default)] +struct BlockData { + /// An Block is "pristine" iff no instructions have been added since the last + /// call to `switch_to_block()`. + pristine: bool, + + /// An Block is "filled" iff a terminator instruction has been inserted since + /// the last call to `switch_to_block()`. + /// + /// A filled block cannot be pristine. + filled: bool, + + /// Count of parameters not supplied implicitly by the SSABuilder. + user_param_count: usize, +} + +#[derive(Default)] +struct Position { + block: PackedOption, + basic_block: PackedOption, +} + +impl Position { + fn at(block: Block, basic_block: SSABlock) -> Self { + Self { + block: PackedOption::from(block), + basic_block: PackedOption::from(basic_block), + } + } + + fn is_default(&self) -> bool { + self.block.is_none() && self.basic_block.is_none() + } +} + +impl FunctionBuilderContext { + /// Creates a FunctionBuilderContext structure. The structure is automatically cleared after + /// each [`FunctionBuilder`](struct.FunctionBuilder.html) completes translating a function. + pub fn new() -> Self { + Self { + ssa: SSABuilder::new(), + blocks: SecondaryMap::new(), + types: SecondaryMap::new(), + } + } + + fn clear(&mut self) { + self.ssa.clear(); + self.blocks.clear(); + self.types.clear(); + } + + fn is_empty(&self) -> bool { + self.ssa.is_empty() && self.blocks.is_empty() && self.types.is_empty() + } +} + +/// Implementation of the [`InstBuilder`](cranelift_codegen::ir::InstBuilder) that has +/// one convenience method per Cranelift IR instruction. +pub struct FuncInstBuilder<'short, 'long: 'short> { + builder: &'short mut FunctionBuilder<'long>, + block: Block, +} + +impl<'short, 'long> FuncInstBuilder<'short, 'long> { + fn new(builder: &'short mut FunctionBuilder<'long>, block: Block) -> Self { + Self { builder, block } + } +} + +impl<'short, 'long> InstBuilderBase<'short> for FuncInstBuilder<'short, 'long> { + fn data_flow_graph(&self) -> &DataFlowGraph { + &self.builder.func.dfg + } + + fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph { + &mut self.builder.func.dfg + } + + // This implementation is richer than `InsertBuilder` because we use the data of the + // instruction being inserted to add related info to the DFG and the SSA building system, + // and perform debug sanity checks. + fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'short mut DataFlowGraph) { + // We only insert the Block in the layout when an instruction is added to it + self.builder.ensure_inserted_block(); + + let inst = self.builder.func.dfg.make_inst(data.clone()); + self.builder.func.dfg.make_inst_results(inst, ctrl_typevar); + self.builder.func.layout.append_inst(inst, self.block); + if !self.builder.srcloc.is_default() { + self.builder.func.srclocs[inst] = self.builder.srcloc; + } + + if data.opcode().is_branch() { + match data.branch_destination() { + Some(dest_block) => { + // If the user has supplied jump arguments we must adapt the arguments of + // the destination block + self.builder.declare_successor(dest_block, inst); + } + None => { + // branch_destination() doesn't detect jump_tables + // If jump table we declare all entries successor + if let InstructionData::BranchTable { + table, destination, .. + } = data + { + // Unlike all other jumps/branches, jump tables are + // capable of having the same successor appear + // multiple times, so we must deduplicate. + let mut unique = EntitySet::::new(); + for dest_block in self + .builder + .func + .jump_tables + .get(table) + .expect("you are referencing an undeclared jump table") + .iter() + .filter(|&dest_block| unique.insert(*dest_block)) + { + self.builder.func_ctx.ssa.declare_block_predecessor( + *dest_block, + self.builder.position.basic_block.unwrap(), + inst, + ); + } + self.builder.func_ctx.ssa.declare_block_predecessor( + destination, + self.builder.position.basic_block.unwrap(), + inst, + ); + } + } + } + } + if data.opcode().is_terminator() { + self.builder.fill_current_block() + } else if data.opcode().is_branch() { + self.builder.move_to_next_basic_block() + } + (inst, &mut self.builder.func.dfg) + } +} + +/// This module allows you to create a function in Cranelift IR in a straightforward way, hiding +/// all the complexity of its internal representation. +/// +/// The module is parametrized by one type which is the representation of variables in your +/// origin language. It offers a way to conveniently append instruction to your program flow. +/// You are responsible to split your instruction flow into extended blocks (declared with +/// `create_block`) whose properties are: +/// +/// - branch and jump instructions can only point at the top of extended blocks; +/// - the last instruction of each block is a terminator instruction which has no natural successor, +/// and those instructions can only appear at the end of extended blocks. +/// +/// The parameters of Cranelift IR instructions are Cranelift IR values, which can only be created +/// as results of other Cranelift IR instructions. To be able to create variables redefined multiple +/// times in your program, use the `def_var` and `use_var` command, that will maintain the +/// correspondence between your variables and Cranelift IR SSA values. +/// +/// The first block for which you call `switch_to_block` will be assumed to be the beginning of +/// the function. +/// +/// At creation, a `FunctionBuilder` instance borrows an already allocated `Function` which it +/// modifies with the information stored in the mutable borrowed +/// [`FunctionBuilderContext`](struct.FunctionBuilderContext.html). The function passed in +/// argument should be newly created with +/// [`Function::with_name_signature()`](Function::with_name_signature), whereas the +/// `FunctionBuilderContext` can be kept as is between two function translations. +/// +/// # Errors +/// +/// The functions below will panic in debug mode whenever you try to modify the Cranelift IR +/// function in a way that violate the coherence of the code. For instance: switching to a new +/// `Block` when you haven't filled the current one with a terminator instruction, inserting a +/// return instruction with arguments that don't match the function's signature. +impl<'a> FunctionBuilder<'a> { + /// Creates a new FunctionBuilder structure that will operate on a `Function` using a + /// `FunctionBuilderContext`. + pub fn new(func: &'a mut Function, func_ctx: &'a mut FunctionBuilderContext) -> Self { + debug_assert!(func_ctx.is_empty()); + Self { + func, + srcloc: Default::default(), + func_ctx, + position: Position::default(), + } + } + + /// Set the source location that should be assigned to all new instructions. + pub fn set_srcloc(&mut self, srcloc: ir::SourceLoc) { + self.srcloc = srcloc; + } + + /// Creates a new `Block` and returns its reference. + pub fn create_block(&mut self) -> Block { + let block = self.func.dfg.make_block(); + self.func_ctx.ssa.declare_block_header_block(block); + self.func_ctx.blocks[block] = BlockData { + filled: false, + pristine: true, + user_param_count: 0, + }; + block + } + + /// After the call to this function, new instructions will be inserted into the designated + /// block, in the order they are declared. You must declare the types of the Block arguments + /// you will use here. + /// + /// When inserting the terminator instruction (which doesn't have a fallthrough to its immediate + /// successor), the block will be declared filled and it will not be possible to append + /// instructions to it. + pub fn switch_to_block(&mut self, block: Block) { + // First we check that the previous block has been filled. + debug_assert!( + self.position.is_default() + || self.is_unreachable() + || self.is_pristine() + || self.is_filled(), + "you have to fill your block before switching" + ); + // We cannot switch to a filled block + debug_assert!( + !self.func_ctx.blocks[block].filled, + "you cannot switch to a block which is already filled" + ); + + let basic_block = self.func_ctx.ssa.header_block(block); + // Then we change the cursor position. + self.position = Position::at(block, basic_block); + } + + /// Declares that all the predecessors of this block are known. + /// + /// Function to call with `block` as soon as the last branch instruction to `block` has been + /// created. Forgetting to call this method on every block will cause inconsistencies in the + /// produced functions. + pub fn seal_block(&mut self, block: Block) { + let side_effects = self.func_ctx.ssa.seal_block_header_block(block, self.func); + self.handle_ssa_side_effects(side_effects); + } + + /// Effectively calls seal_block on all blocks in the function. + /// + /// It's more efficient to seal `Block`s as soon as possible, during + /// translation, but for frontends where this is impractical to do, this + /// function can be used at the end of translating all blocks to ensure + /// that everything is sealed. + pub fn seal_all_blocks(&mut self) { + let side_effects = self.func_ctx.ssa.seal_all_block_header_blocks(self.func); + self.handle_ssa_side_effects(side_effects); + } + + /// In order to use a variable in a `use_var`, you need to declare its type with this method. + pub fn declare_var(&mut self, var: Variable, ty: Type) { + self.func_ctx.types[var] = ty; + } + + /// Returns the Cranelift IR value corresponding to the utilization at the current program + /// position of a previously defined user variable. + pub fn use_var(&mut self, var: Variable) -> Value { + let (val, side_effects) = { + let ty = *self.func_ctx.types.get(var).unwrap_or_else(|| { + panic!( + "variable {:?} is used but its type has not been declared", + var + ) + }); + self.func_ctx + .ssa + .use_var(self.func, var, ty, self.position.basic_block.unwrap()) + }; + self.handle_ssa_side_effects(side_effects); + val + } + + /// Register a new definition of a user variable. The type of the value must be + /// the same as the type registered for the variable. + pub fn def_var(&mut self, var: Variable, val: Value) { + debug_assert_eq!( + *self.func_ctx.types.get(var).unwrap_or_else(|| panic!( + "variable {:?} is used but its type has not been declared", + var + )), + self.func.dfg.value_type(val), + "declared type of variable {:?} doesn't match type of value {}", + var, + val + ); + + self.func_ctx + .ssa + .def_var(var, val, self.position.basic_block.unwrap()); + } + + /// Set label for Value + /// + /// This will not do anything unless `func.dfg.collect_debug_info` is called first. + pub fn set_val_label(&mut self, val: Value, label: ValueLabel) { + if let Some(values_labels) = self.func.dfg.values_labels.as_mut() { + use crate::hash_map::Entry; + + let start = ValueLabelStart { + from: self.srcloc, + label, + }; + + match values_labels.entry(val) { + Entry::Occupied(mut e) => match e.get_mut() { + ValueLabelAssignments::Starts(starts) => starts.push(start), + _ => panic!("Unexpected ValueLabelAssignments at this stage"), + }, + Entry::Vacant(e) => { + e.insert(ValueLabelAssignments::Starts(vec![start])); + } + } + } + } + + /// Creates a jump table in the function, to be used by `br_table` instructions. + pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable { + self.func.create_jump_table(data) + } + + /// Creates a stack slot in the function, to be used by `stack_load`, `stack_store` and + /// `stack_addr` instructions. + pub fn create_stack_slot(&mut self, data: StackSlotData) -> StackSlot { + self.func.create_stack_slot(data) + } + + /// Adds a signature which can later be used to declare an external function import. + pub fn import_signature(&mut self, signature: Signature) -> SigRef { + self.func.import_signature(signature) + } + + /// Declare an external function import. + pub fn import_function(&mut self, data: ExtFuncData) -> FuncRef { + self.func.import_function(data) + } + + /// Declares a global value accessible to the function. + pub fn create_global_value(&mut self, data: GlobalValueData) -> GlobalValue { + self.func.create_global_value(data) + } + + /// Declares a heap accessible to the function. + pub fn create_heap(&mut self, data: HeapData) -> Heap { + self.func.create_heap(data) + } + + /// Returns an object with the [`InstBuilder`](cranelift_codegen::ir::InstBuilder) + /// trait that allows to conveniently append an instruction to the current `Block` being built. + pub fn ins<'short>(&'short mut self) -> FuncInstBuilder<'short, 'a> { + let block = self + .position + .block + .expect("Please call switch_to_block before inserting instructions"); + FuncInstBuilder::new(self, block) + } + + /// Make sure that the current block is inserted in the layout. + pub fn ensure_inserted_block(&mut self) { + let block = self.position.block.unwrap(); + if self.func_ctx.blocks[block].pristine { + if !self.func.layout.is_block_inserted(block) { + self.func.layout.append_block(block); + } + self.func_ctx.blocks[block].pristine = false; + } else { + debug_assert!( + !self.func_ctx.blocks[block].filled, + "you cannot add an instruction to a block already filled" + ); + } + } + + /// Returns a `FuncCursor` pointed at the current position ready for inserting instructions. + /// + /// This can be used to insert SSA code that doesn't need to access locals and that doesn't + /// need to know about `FunctionBuilder` at all. + pub fn cursor(&mut self) -> FuncCursor { + self.ensure_inserted_block(); + FuncCursor::new(self.func) + .with_srcloc(self.srcloc) + .at_bottom(self.position.block.unwrap()) + } + + /// Append parameters to the given `Block` corresponding to the function + /// parameters. This can be used to set up the block parameters for the + /// entry block. + pub fn append_block_params_for_function_params(&mut self, block: Block) { + debug_assert!( + !self.func_ctx.ssa.has_any_predecessors(block), + "block parameters for function parameters should only be added to the entry block" + ); + + // These parameters count as "user" parameters here because they aren't + // inserted by the SSABuilder. + let user_param_count = &mut self.func_ctx.blocks[block].user_param_count; + for argtyp in &self.func.signature.params { + *user_param_count += 1; + self.func.dfg.append_block_param(block, argtyp.value_type); + } + } + + /// Append parameters to the given `Block` corresponding to the function + /// return values. This can be used to set up the block parameters for a + /// function exit block. + pub fn append_block_params_for_function_returns(&mut self, block: Block) { + // These parameters count as "user" parameters here because they aren't + // inserted by the SSABuilder. + let user_param_count = &mut self.func_ctx.blocks[block].user_param_count; + for argtyp in &self.func.signature.returns { + *user_param_count += 1; + self.func.dfg.append_block_param(block, argtyp.value_type); + } + } + + /// Declare that translation of the current function is complete. This + /// resets the state of the `FunctionBuilder` in preparation to be used + /// for another function. + pub fn finalize(&mut self) { + // Check that all the `Block`s are filled and sealed. + debug_assert!( + self.func_ctx.blocks.iter().all( + |(block, block_data)| block_data.pristine || self.func_ctx.ssa.is_sealed(block) + ), + "all blocks should be sealed before dropping a FunctionBuilder" + ); + debug_assert!( + self.func_ctx + .blocks + .values() + .all(|block_data| block_data.pristine || block_data.filled), + "all blocks should be filled before dropping a FunctionBuilder" + ); + + // In debug mode, check that all blocks are valid basic blocks. + #[cfg(debug_assertions)] + { + // Iterate manually to provide more helpful error messages. + for block in self.func_ctx.blocks.keys() { + if let Err((inst, _msg)) = self.func.is_block_basic(block) { + let inst_str = self.func.dfg.display_inst(inst, None); + panic!("{} failed basic block invariants on {}", block, inst_str); + } + } + } + + // Clear the state (but preserve the allocated buffers) in preparation + // for translation another function. + self.func_ctx.clear(); + + // Reset srcloc and position to initial states. + self.srcloc = Default::default(); + self.position = Position::default(); + } +} + +/// All the functions documented in the previous block are write-only and help you build a valid +/// Cranelift IR functions via multiple debug asserts. However, you might need to improve the +/// performance of your translation perform more complex transformations to your Cranelift IR +/// function. The functions below help you inspect the function you're creating and modify it +/// in ways that can be unsafe if used incorrectly. +impl<'a> FunctionBuilder<'a> { + /// Retrieves all the parameters for a `Block` currently inferred from the jump instructions + /// inserted that target it and the SSA construction. + pub fn block_params(&self, block: Block) -> &[Value] { + self.func.dfg.block_params(block) + } + + /// Retrieves the signature with reference `sigref` previously added with `import_signature`. + pub fn signature(&self, sigref: SigRef) -> Option<&Signature> { + self.func.dfg.signatures.get(sigref) + } + + /// Creates a parameter for a specific `Block` by appending it to the list of already existing + /// parameters. + /// + /// **Note:** this function has to be called at the creation of the `Block` before adding + /// instructions to it, otherwise this could interfere with SSA construction. + pub fn append_block_param(&mut self, block: Block, ty: Type) -> Value { + debug_assert!( + self.func_ctx.blocks[block].pristine, + "You can't add block parameters after adding any instruction" + ); + debug_assert_eq!( + self.func_ctx.blocks[block].user_param_count, + self.func.dfg.num_block_params(block) + ); + self.func_ctx.blocks[block].user_param_count += 1; + self.func.dfg.append_block_param(block, ty) + } + + /// Returns the result values of an instruction. + pub fn inst_results(&self, inst: Inst) -> &[Value] { + self.func.dfg.inst_results(inst) + } + + /// Changes the destination of a jump instruction after creation. + /// + /// **Note:** You are responsible for maintaining the coherence with the arguments of + /// other jump instructions. + pub fn change_jump_destination(&mut self, inst: Inst, new_dest: Block) { + let old_dest = self.func.dfg[inst] + .branch_destination_mut() + .expect("you want to change the jump destination of a non-jump instruction"); + let pred = self.func_ctx.ssa.remove_block_predecessor(*old_dest, inst); + *old_dest = new_dest; + self.func_ctx + .ssa + .declare_block_predecessor(new_dest, pred, inst); + } + + /// Returns `true` if and only if the current `Block` is sealed and has no predecessors declared. + /// + /// The entry block of a function is never unreachable. + pub fn is_unreachable(&self) -> bool { + let is_entry = match self.func.layout.entry_block() { + None => false, + Some(entry) => self.position.block.unwrap() == entry, + }; + !is_entry + && self.func_ctx.ssa.is_sealed(self.position.block.unwrap()) + && !self + .func_ctx + .ssa + .has_any_predecessors(self.position.block.unwrap()) + } + + /// Returns `true` if and only if no instructions have been added since the last call to + /// `switch_to_block`. + pub fn is_pristine(&self) -> bool { + self.func_ctx.blocks[self.position.block.unwrap()].pristine + } + + /// Returns `true` if and only if a terminator instruction has been inserted since the + /// last call to `switch_to_block`. + pub fn is_filled(&self) -> bool { + self.func_ctx.blocks[self.position.block.unwrap()].filled + } + + /// Returns a displayable object for the function as it is. + /// + /// Useful for debug purposes. Use it with `None` for standard printing. + // Clippy thinks the lifetime that follows is needless, but rustc needs it + #[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_lifetimes))] + pub fn display<'b, I: Into>>(&'b self, isa: I) -> DisplayFunction { + self.func.display(isa) + } +} + +/// Helper functions +impl<'a> FunctionBuilder<'a> { + /// Calls libc.memcpy + /// + /// Copies the `size` bytes from `src` to `dest`, assumes that `src + size` + /// won't overlap onto `dest`. If `dest` and `src` overlap, the behavior is + /// undefined. Applications in which `dest` and `src` might overlap should + /// use `call_memmove` instead. + pub fn call_memcpy( + &mut self, + config: TargetFrontendConfig, + dest: Value, + src: Value, + size: Value, + ) { + let pointer_type = config.pointer_type(); + let signature = { + let mut s = Signature::new(config.default_call_conv); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(pointer_type)); + self.import_signature(s) + }; + + let libc_memcpy = self.import_function(ExtFuncData { + name: ExternalName::LibCall(LibCall::Memcpy), + signature, + colocated: false, + }); + + self.ins().call(libc_memcpy, &[dest, src, size]); + } + + /// Optimised memcpy or memmove for small copies. + /// + /// # Codegen safety + /// + /// The following properties must hold to prevent UB: + /// + /// * `src_align` and `dest_align` are an upper-bound on the alignment of `src` respectively `dest`. + /// * If `non_overlapping` is true, then this must be correct. + pub fn emit_small_memory_copy( + &mut self, + config: TargetFrontendConfig, + dest: Value, + src: Value, + size: u64, + dest_align: u8, + src_align: u8, + non_overlapping: bool, + ) { + // Currently the result of guess work, not actual profiling. + const THRESHOLD: u64 = 4; + + if size == 0 { + return; + } + + let access_size = greatest_divisible_power_of_two(size); + assert!( + access_size.is_power_of_two(), + "`size` is not a power of two" + ); + assert!( + access_size >= u64::from(::core::cmp::min(src_align, dest_align)), + "`size` is smaller than `dest` and `src`'s alignment value." + ); + + let (access_size, int_type) = if access_size <= 8 { + (access_size, Type::int((access_size * 8) as u16).unwrap()) + } else { + (8, types::I64) + }; + + let load_and_store_amount = size / access_size; + + if load_and_store_amount > THRESHOLD { + let size_value = self.ins().iconst(config.pointer_type(), size as i64); + if non_overlapping { + self.call_memcpy(config, dest, src, size_value); + } else { + self.call_memmove(config, dest, src, size_value); + } + return; + } + + let mut flags = MemFlags::new(); + flags.set_aligned(); + + // Load all of the memory first. This is necessary in case `dest` overlaps. + // It can also improve performance a bit. + let registers: smallvec::SmallVec<[_; THRESHOLD as usize]> = (0..load_and_store_amount) + .map(|i| { + let offset = (access_size * i) as i32; + (self.ins().load(int_type, flags, src, offset), offset) + }) + .collect(); + + for (value, offset) in registers { + self.ins().store(flags, value, dest, offset); + } + } + + /// Calls libc.memset + /// + /// Writes `size` bytes of i8 value `ch` to memory starting at `buffer`. + pub fn call_memset( + &mut self, + config: TargetFrontendConfig, + buffer: Value, + ch: Value, + size: Value, + ) { + let pointer_type = config.pointer_type(); + let signature = { + let mut s = Signature::new(config.default_call_conv); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(types::I32)); + s.params.push(AbiParam::new(pointer_type)); + self.import_signature(s) + }; + + let libc_memset = self.import_function(ExtFuncData { + name: ExternalName::LibCall(LibCall::Memset), + signature, + colocated: false, + }); + + let ch = self.ins().uextend(types::I32, ch); + self.ins().call(libc_memset, &[buffer, ch, size]); + } + + /// Calls libc.memset + /// + /// Writes `size` bytes of value `ch` to memory starting at `buffer`. + pub fn emit_small_memset( + &mut self, + config: TargetFrontendConfig, + buffer: Value, + ch: u8, + size: u64, + buffer_align: u8, + ) { + // Currently the result of guess work, not actual profiling. + const THRESHOLD: u64 = 4; + + if size == 0 { + return; + } + + let access_size = greatest_divisible_power_of_two(size); + assert!( + access_size.is_power_of_two(), + "`size` is not a power of two" + ); + assert!( + access_size >= u64::from(buffer_align), + "`size` is smaller than `dest` and `src`'s alignment value." + ); + + let (access_size, int_type) = if access_size <= 8 { + (access_size, Type::int((access_size * 8) as u16).unwrap()) + } else { + (8, types::I64) + }; + + let load_and_store_amount = size / access_size; + + if load_and_store_amount > THRESHOLD { + let ch = self.ins().iconst(types::I8, i64::from(ch)); + let size = self.ins().iconst(config.pointer_type(), size as i64); + self.call_memset(config, buffer, ch, size); + } else { + let mut flags = MemFlags::new(); + flags.set_aligned(); + + let ch = u64::from(ch); + let raw_value = if int_type == types::I64 { + (ch << 32) | (ch << 16) | (ch << 8) | ch + } else if int_type == types::I32 { + (ch << 16) | (ch << 8) | ch + } else if int_type == types::I16 { + (ch << 8) | ch + } else { + assert_eq!(int_type, types::I8); + ch + }; + + let value = self.ins().iconst(int_type, raw_value as i64); + for i in 0..load_and_store_amount { + let offset = (access_size * i) as i32; + self.ins().store(flags, value, buffer, offset); + } + } + } + + /// Calls libc.memmove + /// + /// Copies `size` bytes from memory starting at `source` to memory starting + /// at `dest`. `source` is always read before writing to `dest`. + pub fn call_memmove( + &mut self, + config: TargetFrontendConfig, + dest: Value, + source: Value, + size: Value, + ) { + let pointer_type = config.pointer_type(); + let signature = { + let mut s = Signature::new(config.default_call_conv); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(pointer_type)); + s.params.push(AbiParam::new(pointer_type)); + self.import_signature(s) + }; + + let libc_memmove = self.import_function(ExtFuncData { + name: ExternalName::LibCall(LibCall::Memmove), + signature, + colocated: false, + }); + + self.ins().call(libc_memmove, &[dest, source, size]); + } +} + +fn greatest_divisible_power_of_two(size: u64) -> u64 { + (size as i64 & -(size as i64)) as u64 +} + +// Helper functions +impl<'a> FunctionBuilder<'a> { + fn move_to_next_basic_block(&mut self) { + self.position.basic_block = PackedOption::from( + self.func_ctx + .ssa + .declare_block_body_block(self.position.basic_block.unwrap()), + ); + } + + /// An Block is 'filled' when a terminator instruction is present. + fn fill_current_block(&mut self) { + self.func_ctx.blocks[self.position.block.unwrap()].filled = true; + } + + fn declare_successor(&mut self, dest_block: Block, jump_inst: Inst) { + self.func_ctx.ssa.declare_block_predecessor( + dest_block, + self.position.basic_block.unwrap(), + jump_inst, + ); + } + + fn handle_ssa_side_effects(&mut self, side_effects: SideEffects) { + for split_block in side_effects.split_blocks_created { + self.func_ctx.blocks[split_block].filled = true + } + for modified_block in side_effects.instructions_added_to_blocks { + self.func_ctx.blocks[modified_block].pristine = false + } + } +} + +#[cfg(test)] +mod tests { + use super::greatest_divisible_power_of_two; + use crate::frontend::{FunctionBuilder, FunctionBuilderContext}; + use crate::Variable; + use alloc::string::ToString; + use cranelift_codegen::entity::EntityRef; + use cranelift_codegen::ir::types::*; + use cranelift_codegen::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; + use cranelift_codegen::isa::CallConv; + use cranelift_codegen::settings; + use cranelift_codegen::verifier::verify_function; + + fn sample_function(lazy_seal: bool) { + let mut sig = Signature::new(CallConv::SystemV); + sig.returns.push(AbiParam::new(I32)); + sig.params.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + let block3 = builder.create_block(); + let x = Variable::new(0); + let y = Variable::new(1); + let z = Variable::new(2); + builder.declare_var(x, I32); + builder.declare_var(y, I32); + builder.declare_var(z, I32); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + if !lazy_seal { + builder.seal_block(block0); + } + { + let tmp = builder.block_params(block0)[0]; // the first function parameter + builder.def_var(x, tmp); + } + { + let tmp = builder.ins().iconst(I32, 2); + builder.def_var(y, tmp); + } + { + let arg1 = builder.use_var(x); + let arg2 = builder.use_var(y); + let tmp = builder.ins().iadd(arg1, arg2); + builder.def_var(z, tmp); + } + builder.ins().jump(block1, &[]); + + builder.switch_to_block(block1); + { + let arg1 = builder.use_var(y); + let arg2 = builder.use_var(z); + let tmp = builder.ins().iadd(arg1, arg2); + builder.def_var(z, tmp); + } + { + let arg = builder.use_var(y); + builder.ins().brnz(arg, block3, &[]); + } + builder.ins().jump(block2, &[]); + + builder.switch_to_block(block2); + if !lazy_seal { + builder.seal_block(block2); + } + { + let arg1 = builder.use_var(z); + let arg2 = builder.use_var(x); + let tmp = builder.ins().isub(arg1, arg2); + builder.def_var(z, tmp); + } + { + let arg = builder.use_var(y); + builder.ins().return_(&[arg]); + } + + builder.switch_to_block(block3); + if !lazy_seal { + builder.seal_block(block3); + } + + { + let arg1 = builder.use_var(y); + let arg2 = builder.use_var(x); + let tmp = builder.ins().isub(arg1, arg2); + builder.def_var(y, tmp); + } + builder.ins().jump(block1, &[]); + if !lazy_seal { + builder.seal_block(block1); + } + + if lazy_seal { + builder.seal_all_blocks(); + } + + builder.finalize(); + } + + let flags = settings::Flags::new(settings::builder()); + // println!("{}", func.display(None)); + if let Err(errors) = verify_function(&func, &flags) { + panic!("{}\n{}", func.display(None), errors) + } + } + + #[test] + fn sample() { + sample_function(false) + } + + #[test] + fn sample_with_lazy_seal() { + sample_function(true) + } + + #[test] + fn memcpy() { + use core::str::FromStr; + use cranelift_codegen::{isa, settings}; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = ::target_lexicon::Triple::from_str("arm").expect("Couldn't create arm triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires arm support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let x = Variable::new(0); + let y = Variable::new(1); + let z = Variable::new(2); + builder.declare_var(x, target.pointer_type()); + builder.declare_var(y, target.pointer_type()); + builder.declare_var(z, I32); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let src = builder.use_var(x); + let dest = builder.use_var(y); + let size = builder.use_var(y); + builder.call_memcpy(target.frontend_config(), dest, src, size); + builder.ins().return_(&[size]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + assert_eq!( + func.display(None).to_string(), + "function %sample() -> i32 system_v { + sig0 = (i32, i32, i32) system_v + fn0 = %Memcpy sig0 + +block0: + v3 = iconst.i32 0 + v1 -> v3 + v2 = iconst.i32 0 + v0 -> v2 + call fn0(v1, v0, v1) + return v1 +} +" + ); + } + + #[test] + fn small_memcpy() { + use core::str::FromStr; + use cranelift_codegen::{isa, settings}; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = ::target_lexicon::Triple::from_str("arm").expect("Couldn't create arm triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires arm support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let x = Variable::new(0); + let y = Variable::new(16); + builder.declare_var(x, target.pointer_type()); + builder.declare_var(y, target.pointer_type()); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let src = builder.use_var(x); + let dest = builder.use_var(y); + let size = 8; + builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true); + builder.ins().return_(&[dest]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + assert_eq!( + func.display(None).to_string(), + "function %sample() -> i32 system_v { +block0: + v4 = iconst.i32 0 + v1 -> v4 + v3 = iconst.i32 0 + v0 -> v3 + v2 = load.i64 aligned v0 + store aligned v2, v1 + return v1 +} +" + ); + } + + #[test] + fn not_so_small_memcpy() { + use core::str::FromStr; + use cranelift_codegen::{isa, settings}; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = ::target_lexicon::Triple::from_str("arm").expect("Couldn't create arm triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires arm support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let x = Variable::new(0); + let y = Variable::new(16); + builder.declare_var(x, target.pointer_type()); + builder.declare_var(y, target.pointer_type()); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let src = builder.use_var(x); + let dest = builder.use_var(y); + let size = 8192; + builder.emit_small_memory_copy(target.frontend_config(), dest, src, size, 8, 8, true); + builder.ins().return_(&[dest]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + assert_eq!( + func.display(None).to_string(), + "function %sample() -> i32 system_v { + sig0 = (i32, i32, i32) system_v + fn0 = %Memcpy sig0 + +block0: + v4 = iconst.i32 0 + v1 -> v4 + v3 = iconst.i32 0 + v0 -> v3 + v2 = iconst.i32 8192 + call fn0(v1, v0, v2) + return v1 +} +" + ); + } + + #[test] + fn small_memset() { + use core::str::FromStr; + use cranelift_codegen::{isa, settings}; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = ::target_lexicon::Triple::from_str("arm").expect("Couldn't create arm triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires arm support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let y = Variable::new(16); + builder.declare_var(y, target.pointer_type()); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let dest = builder.use_var(y); + let size = 8; + builder.emit_small_memset(target.frontend_config(), dest, 1, size, 8); + builder.ins().return_(&[dest]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + assert_eq!( + func.display(None).to_string(), + "function %sample() -> i32 system_v { +block0: + v2 = iconst.i32 0 + v0 -> v2 + v1 = iconst.i64 0x0001_0001_0101 + store aligned v1, v0 + return v0 +} +" + ); + } + + #[test] + fn not_so_small_memset() { + use core::str::FromStr; + use cranelift_codegen::{isa, settings}; + + let shared_builder = settings::builder(); + let shared_flags = settings::Flags::new(shared_builder); + + let triple = ::target_lexicon::Triple::from_str("arm").expect("Couldn't create arm triple"); + + let target = isa::lookup(triple) + .ok() + .map(|b| b.finish(shared_flags)) + .expect("This test requires arm support."); + + let mut sig = Signature::new(target.default_call_conv()); + sig.returns.push(AbiParam::new(I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(ExternalName::testcase("sample"), sig); + { + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let block0 = builder.create_block(); + let y = Variable::new(16); + builder.declare_var(y, target.pointer_type()); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + + let dest = builder.use_var(y); + let size = 8192; + builder.emit_small_memset(target.frontend_config(), dest, 1, size, 8); + builder.ins().return_(&[dest]); + + builder.seal_all_blocks(); + builder.finalize(); + } + + assert_eq!( + func.display(None).to_string(), + "function %sample() -> i32 system_v { + sig0 = (i32, i32, i32) system_v + fn0 = %Memset sig0 + +block0: + v4 = iconst.i32 0 + v0 -> v4 + v1 = iconst.i8 1 + v2 = iconst.i32 8192 + v3 = uextend.i32 v1 + call fn0(v0, v3, v2) + return v0 +} +" + ); + } + + #[test] + fn test_greatest_divisible_power_of_two() { + assert_eq!(64, greatest_divisible_power_of_two(64)); + assert_eq!(16, greatest_divisible_power_of_two(48)); + assert_eq!(8, greatest_divisible_power_of_two(24)); + assert_eq!(1, greatest_divisible_power_of_two(25)); + } +} diff --git a/cranelift/frontend/src/lib.rs b/cranelift/frontend/src/lib.rs new file mode 100644 index 0000000000..d28cb53cdf --- /dev/null +++ b/cranelift/frontend/src/lib.rs @@ -0,0 +1,207 @@ +//! Cranelift IR builder library. +//! +//! Provides a straightforward way to create a Cranelift IR function and fill it with instructions +//! corresponding to your source program written in another language. +//! +//! To get started, create an [`FunctionBuilderContext`](struct.FunctionBuilderContext.html) and +//! pass it as an argument to a [`FunctionBuilder`](struct.FunctionBuilder.html). +//! +//! # Mutable variables and Cranelift IR values +//! +//! The most interesting feature of this API is that it provides a single way to deal with all your +//! variable problems. Indeed, the [`FunctionBuilder`](struct.FunctionBuilder.html) struct has a +//! type `Variable` that should be an index of your source language variables. Then, through +//! calling the functions +//! [`declare_var`](struct.FunctionBuilder.html#method.declare_var), +//! [`def_var`](struct.FunctionBuilder.html#method.def_var) and +//! [`use_var`](struct.FunctionBuilder.html#method.use_var), the +//! [`FunctionBuilder`](struct.FunctionBuilder.html) will create for you all the Cranelift IR +//! values corresponding to your variables. +//! +//! This API has been designed to help you translate your mutable variables into +//! [`SSA`](https://en.wikipedia.org/wiki/Static_single_assignment_form) form. +//! [`use_var`](struct.FunctionBuilder.html#method.use_var) will return the Cranelift IR value +//! that corresponds to your mutable variable at a precise point in the program. However, if you know +//! beforehand that one of your variables is defined only once, for instance if it is the result +//! of an intermediate expression in an expression-based language, then you can translate it +//! directly by the Cranelift IR value returned by the instruction builder. Using the +//! [`use_var`](struct.FunctionBuilder.html#method.use_var) API for such an immutable variable +//! would also work but with a slight additional overhead (the SSA algorithm does not know +//! beforehand if a variable is immutable or not). +//! +//! The moral is that you should use these three functions to handle all your mutable variables, +//! even those that are not present in the source code but artifacts of the translation. It is up +//! to you to keep a mapping between the mutable variables of your language and their `Variable` +//! index that is used by Cranelift. Caution: as the `Variable` is used by Cranelift to index an +//! array containing information about your mutable variables, when you create a new `Variable` +//! with [`Variable::new(var_index)`] you should make sure that `var_index` is provided by a +//! counter incremented by 1 each time you encounter a new mutable variable. +//! +//! # Example +//! +//! Here is a pseudo-program we want to transform into Cranelift IR: +//! +//! ```clif +//! function(x) { +//! x, y, z : i32 +//! block0: +//! y = 2; +//! z = x + y; +//! jump block1 +//! block1: +//! z = z + y; +//! brnz y, block3; +//! jump block2 +//! block2: +//! z = z - x; +//! return y +//! block3: +//! y = y - x +//! jump block1 +//! } +//! ``` +//! +//! Here is how you build the corresponding Cranelift IR function using `FunctionBuilderContext`: +//! +//! ```rust +//! extern crate cranelift_codegen; +//! extern crate cranelift_frontend; +//! +//! use cranelift_codegen::entity::EntityRef; +//! use cranelift_codegen::ir::types::*; +//! use cranelift_codegen::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; +//! use cranelift_codegen::isa::CallConv; +//! use cranelift_codegen::settings; +//! use cranelift_codegen::verifier::verify_function; +//! use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable}; +//! +//! let mut sig = Signature::new(CallConv::SystemV); +//! sig.returns.push(AbiParam::new(I32)); +//! sig.params.push(AbiParam::new(I32)); +//! let mut fn_builder_ctx = FunctionBuilderContext::new(); +//! let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); +//! { +//! let mut builder = FunctionBuilder::new(&mut func, &mut fn_builder_ctx); +//! +//! let block0 = builder.create_block(); +//! let block1 = builder.create_block(); +//! let block2 = builder.create_block(); +//! let block3 = builder.create_block(); +//! let x = Variable::new(0); +//! let y = Variable::new(1); +//! let z = Variable::new(2); +//! builder.declare_var(x, I32); +//! builder.declare_var(y, I32); +//! builder.declare_var(z, I32); +//! builder.append_block_params_for_function_params(block0); +//! +//! builder.switch_to_block(block0); +//! builder.seal_block(block0); +//! { +//! let tmp = builder.block_params(block0)[0]; // the first function parameter +//! builder.def_var(x, tmp); +//! } +//! { +//! let tmp = builder.ins().iconst(I32, 2); +//! builder.def_var(y, tmp); +//! } +//! { +//! let arg1 = builder.use_var(x); +//! let arg2 = builder.use_var(y); +//! let tmp = builder.ins().iadd(arg1, arg2); +//! builder.def_var(z, tmp); +//! } +//! builder.ins().jump(block1, &[]); +//! +//! builder.switch_to_block(block1); +//! { +//! let arg1 = builder.use_var(y); +//! let arg2 = builder.use_var(z); +//! let tmp = builder.ins().iadd(arg1, arg2); +//! builder.def_var(z, tmp); +//! } +//! { +//! let arg = builder.use_var(y); +//! builder.ins().brnz(arg, block3, &[]); +//! } +//! builder.ins().jump(block2, &[]); +//! +//! builder.switch_to_block(block2); +//! builder.seal_block(block2); +//! { +//! let arg1 = builder.use_var(z); +//! let arg2 = builder.use_var(x); +//! let tmp = builder.ins().isub(arg1, arg2); +//! builder.def_var(z, tmp); +//! } +//! { +//! let arg = builder.use_var(y); +//! builder.ins().return_(&[arg]); +//! } +//! +//! builder.switch_to_block(block3); +//! builder.seal_block(block3); +//! +//! { +//! let arg1 = builder.use_var(y); +//! let arg2 = builder.use_var(x); +//! let tmp = builder.ins().isub(arg1, arg2); +//! builder.def_var(y, tmp); +//! } +//! builder.ins().jump(block1, &[]); +//! builder.seal_block(block1); +//! +//! builder.finalize(); +//! } +//! +//! let flags = settings::Flags::new(settings::builder()); +//! let res = verify_function(&func, &flags); +//! println!("{}", func.display(None)); +//! if let Err(errors) = res { +//! panic!("{}", errors); +//! } +//! ``` + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "std", deny(unstable_features))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +#[allow(unused_imports)] // #[macro_use] is required for no_std +#[macro_use] +extern crate alloc; + +#[cfg(feature = "std")] +#[macro_use] +extern crate std; + +#[cfg(not(feature = "std"))] +use hashbrown::{hash_map, HashMap}; +#[cfg(feature = "std")] +use std::collections::{hash_map, HashMap}; + +pub use crate::frontend::{FunctionBuilder, FunctionBuilderContext}; +pub use crate::switch::Switch; +pub use crate::variable::Variable; + +mod frontend; +mod ssa; +mod switch; +mod variable; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/frontend/src/ssa.rs b/cranelift/frontend/src/ssa.rs new file mode 100644 index 0000000000..7d36d9b1c7 --- /dev/null +++ b/cranelift/frontend/src/ssa.rs @@ -0,0 +1,1384 @@ +//! A SSA-building API that handles incomplete CFGs. +//! +//! The algorithm is based upon Braun M., Buchwald S., Hack S., Leißa R., Mallon C., +//! Zwinkau A. (2013) Simple and Efficient Construction of Static Single Assignment Form. +//! In: Jhala R., De Bosschere K. (eds) Compiler Construction. CC 2013. +//! Lecture Notes in Computer Science, vol 7791. Springer, Berlin, Heidelberg +//! +//! https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf + +use crate::Variable; +use alloc::vec::Vec; +use core::mem; +use core::u32; +use cranelift_codegen::cursor::{Cursor, FuncCursor}; +use cranelift_codegen::entity::{EntityRef, PrimaryMap, SecondaryMap}; +use cranelift_codegen::ir::immediates::{Ieee32, Ieee64}; +use cranelift_codegen::ir::instructions::BranchInfo; +use cranelift_codegen::ir::types::{F32, F64}; +use cranelift_codegen::ir::{Block, Function, Inst, InstBuilder, InstructionData, Type, Value}; +use cranelift_codegen::packed_option::PackedOption; +use cranelift_codegen::packed_option::ReservedValue; +use smallvec::SmallVec; + +/// Structure containing the data relevant the construction of SSA for a given function. +/// +/// The parameter struct `Variable` corresponds to the way variables are represented in the +/// non-SSA language you're translating from. +/// +/// The SSA building relies on information about the variables used and defined, as well as +/// their position relative to basic blocks which are stricter tha basic blocks since +/// they don't allow branching in the middle of them. +/// +/// This SSA building module allows you to def and use variables on the fly while you are +/// constructing the CFG, no need for a separate SSA pass after the CFG is completed. +/// +/// A basic block is said _filled_ if all the instruction that it contains have been translated, +/// and it is said _sealed_ if all of its predecessors have been declared. Only filled predecessors +/// can be declared. +pub struct SSABuilder { + // TODO: Consider a sparse representation rather than SecondaryMap-of-SecondaryMap. + /// Records for every variable and for every relevant block, the last definition of + /// the variable in the block. + variables: SecondaryMap>>, + + /// Records the position of the basic blocks and the list of values used but not defined in the + /// block. + ssa_blocks: PrimaryMap, + + /// Records the basic blocks at the beginning of the `Block`s. + block_headers: SecondaryMap>, + + /// Call stack for use in the `use_var`/`predecessors_lookup` state machine. + calls: Vec, + /// Result stack for use in the `use_var`/`predecessors_lookup` state machine. + results: Vec, + + /// Side effects accumulated in the `use_var`/`predecessors_lookup` state machine. + side_effects: SideEffects, +} + +/// Side effects of a `use_var` or a `seal_block_header_block` method call. +pub struct SideEffects { + /// When we want to append jump arguments to a `br_table` instruction, the critical edge is + /// splitted and the newly created `Block`s are signaled here. + pub split_blocks_created: Vec, + /// When a variable is used but has never been defined before (this happens in the case of + /// unreachable code), a placeholder `iconst` or `fconst` value is added to the right `Block`. + /// This field signals if it is the case and return the `Block` to which the initialization has + /// been added. + pub instructions_added_to_blocks: Vec, +} + +impl SideEffects { + fn new() -> Self { + Self { + split_blocks_created: Vec::new(), + instructions_added_to_blocks: Vec::new(), + } + } + + fn is_empty(&self) -> bool { + self.split_blocks_created.is_empty() && self.instructions_added_to_blocks.is_empty() + } +} + +/// Describes the current position of a basic block in the control flow graph. +enum SSABlockData { + /// A block at the top of a `Block`. + BlockHeader(BlockHeaderSSABlockData), + /// A block inside a `Block` with an unique other block as its predecessor. + /// The block is implicitly sealed at creation. + BlockBody { ssa_pred: SSABlock }, +} + +impl SSABlockData { + fn add_predecessor(&mut self, ssa_pred: SSABlock, inst: Inst) { + match *self { + Self::BlockBody { .. } => panic!("you can't add a predecessor to a body block"), + Self::BlockHeader(ref mut data) => { + debug_assert!(!data.sealed, "sealed blocks cannot accept new predecessors"); + data.predecessors.push(PredBlock::new(ssa_pred, inst)); + } + } + } + fn remove_predecessor(&mut self, inst: Inst) -> SSABlock { + match *self { + Self::BlockBody { .. } => panic!("should not happen"), + Self::BlockHeader(ref mut data) => { + // This a linear complexity operation but the number of predecessors is low + // in all non-pathological cases + let pred: usize = data + .predecessors + .iter() + .position(|&PredBlock { branch, .. }| branch == inst) + .expect("the predecessor you are trying to remove is not declared"); + data.predecessors.swap_remove(pred).ssa_block + } + } + } +} + +struct PredBlock { + ssa_block: SSABlock, + branch: Inst, +} + +impl PredBlock { + fn new(ssa_block: SSABlock, branch: Inst) -> Self { + Self { ssa_block, branch } + } +} + +type PredBlockSmallVec = SmallVec<[PredBlock; 4]>; + +struct BlockHeaderSSABlockData { + // The predecessors of the Block header block, with the block and branch instruction. + predecessors: PredBlockSmallVec, + // A block header block is sealed if all of its predecessors have been declared. + sealed: bool, + // The block which this block is part of. + block: Block, + // List of current Block arguments for which an earlier def has not been found yet. + undef_variables: Vec<(Variable, Value)>, +} + +/// A opaque reference to a basic block. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct SSABlock(u32); +impl EntityRef for SSABlock { + fn new(index: usize) -> Self { + debug_assert!(index < (u32::MAX as usize)); + Self(index as u32) + } + + fn index(self) -> usize { + self.0 as usize + } +} + +impl ReservedValue for SSABlock { + fn reserved_value() -> Self { + Self(u32::MAX) + } +} + +impl SSABuilder { + /// Allocate a new blank SSA builder struct. Use the API function to interact with the struct. + pub fn new() -> Self { + Self { + variables: SecondaryMap::with_default(SecondaryMap::new()), + ssa_blocks: PrimaryMap::new(), + block_headers: SecondaryMap::new(), + calls: Vec::new(), + results: Vec::new(), + side_effects: SideEffects::new(), + } + } + + /// Clears a `SSABuilder` from all its data, letting it in a pristine state without + /// deallocating memory. + pub fn clear(&mut self) { + self.variables.clear(); + self.ssa_blocks.clear(); + self.block_headers.clear(); + debug_assert!(self.calls.is_empty()); + debug_assert!(self.results.is_empty()); + debug_assert!(self.side_effects.is_empty()); + } + + /// Tests whether an `SSABuilder` is in a cleared state. + pub fn is_empty(&self) -> bool { + self.variables.is_empty() + && self.ssa_blocks.is_empty() + && self.block_headers.is_empty() + && self.calls.is_empty() + && self.results.is_empty() + && self.side_effects.is_empty() + } +} + +/// Small enum used for clarity in some functions. +#[derive(Debug)] +enum ZeroOneOrMore { + Zero, + One(T), + More, +} + +/// Cases used internally by `use_var_nonlocal()` for avoiding the borrow checker. +#[derive(Debug)] +enum UseVarCases { + Unsealed(Value), + SealedOnePredecessor(SSABlock), + SealedMultiplePredecessors(Value, Block), +} + +/// States for the `use_var`/`predecessors_lookup` state machine. +enum Call { + UseVar(SSABlock), + FinishSealedOnePredecessor(SSABlock), + FinishPredecessorsLookup(Value, Block), +} + +/// Emit instructions to produce a zero value in the given type. +fn emit_zero(ty: Type, mut cur: FuncCursor) -> Value { + if ty.is_int() { + cur.ins().iconst(ty, 0) + } else if ty.is_bool() { + cur.ins().bconst(ty, false) + } else if ty == F32 { + cur.ins().f32const(Ieee32::with_bits(0)) + } else if ty == F64 { + cur.ins().f64const(Ieee64::with_bits(0)) + } else if ty.is_ref() { + cur.ins().null(ty) + } else if ty.is_vector() { + let scalar_ty = ty.lane_type(); + if scalar_ty.is_int() { + cur.ins().iconst(ty, 0) + } else if scalar_ty.is_bool() { + cur.ins().bconst(ty, false) + } else if scalar_ty == F32 { + let scalar = cur.ins().f32const(Ieee32::with_bits(0)); + cur.ins().splat(ty, scalar) + } else if scalar_ty == F64 { + let scalar = cur.ins().f64const(Ieee64::with_bits(0)); + cur.ins().splat(ty, scalar) + } else { + panic!("unimplemented scalar type: {:?}", ty) + } + } else { + panic!("unimplemented type: {:?}", ty) + } +} + +/// The following methods are the API of the SSA builder. Here is how it should be used when +/// translating to Cranelift IR: +/// +/// - for each sequence of contiguous instructions (with no branches), create a corresponding +/// basic block with `declare_block_body_block` or `declare_block_header_block` depending on the +/// position of the basic block; +/// +/// - while traversing a basic block and translating instruction, use `def_var` and `use_var` +/// to record definitions and uses of variables, these methods will give you the corresponding +/// SSA values; +/// +/// - when all the instructions in a basic block have translated, the block is said _filled_ and +/// only then you can add it as a predecessor to other blocks with `declare_block_predecessor`; +/// +/// - when you have constructed all the predecessor to a basic block at the beginning of a `Block`, +/// call `seal_block_header_block` on it with the `Function` that you are building. +/// +/// This API will give you the correct SSA values to use as arguments of your instructions, +/// as well as modify the jump instruction and `Block` headers parameters to account for the SSA +/// Phi functions. +/// +impl SSABuilder { + /// Declares a new definition of a variable in a given basic block. + /// The SSA value is passed as an argument because it should be created with + /// `ir::DataFlowGraph::append_result`. + pub fn def_var(&mut self, var: Variable, val: Value, ssa_block: SSABlock) { + self.variables[var][ssa_block] = PackedOption::from(val); + } + + /// Declares a use of a variable in a given basic block. Returns the SSA value corresponding + /// to the current SSA definition of this variable and a list of newly created Blocks that + /// are the results of critical edge splitting for `br_table` with arguments. + /// + /// If the variable has never been defined in this blocks or recursively in its predecessors, + /// this method will silently create an initializer with `iconst` or `fconst`. You are + /// responsible for making sure that you initialize your variables. + pub fn use_var( + &mut self, + func: &mut Function, + var: Variable, + ty: Type, + ssa_block: SSABlock, + ) -> (Value, SideEffects) { + // First, try Local Value Numbering (Algorithm 1 in the paper). + // If the variable already has a known Value in this block, use that. + if let Some(var_defs) = self.variables.get(var) { + if let Some(val) = var_defs[ssa_block].expand() { + return (val, SideEffects::new()); + } + } + + // Otherwise, use Global Value Numbering (Algorithm 2 in the paper). + // This resolves the Value with respect to its predecessors. + debug_assert!(self.calls.is_empty()); + debug_assert!(self.results.is_empty()); + debug_assert!(self.side_effects.is_empty()); + + // Prepare the 'calls' and 'results' stacks for the state machine. + self.use_var_nonlocal(func, var, ty, ssa_block); + + let value = self.run_state_machine(func, var, ty); + let side_effects = mem::replace(&mut self.side_effects, SideEffects::new()); + + (value, side_effects) + } + + /// Resolve the minimal SSA Value of `var` in `block` by traversing predecessors. + /// + /// This function sets up state for `run_state_machine()` but does not execute it. + fn use_var_nonlocal( + &mut self, + func: &mut Function, + var: Variable, + ty: Type, + ssa_block: SSABlock, + ) { + // This function is split into two parts to appease the borrow checker. + // Part 1: With a mutable borrow of self, update the DataFlowGraph if necessary. + let case = match self.ssa_blocks[ssa_block] { + SSABlockData::BlockHeader(ref mut data) => { + // The block has multiple predecessors so we append an Block parameter that + // will serve as a value. + if data.sealed { + if data.predecessors.len() == 1 { + // Optimize the common case of one predecessor: no param needed. + UseVarCases::SealedOnePredecessor(data.predecessors[0].ssa_block) + } else { + // Break potential cycles by eagerly adding an operandless param. + let val = func.dfg.append_block_param(data.block, ty); + UseVarCases::SealedMultiplePredecessors(val, data.block) + } + } else { + let val = func.dfg.append_block_param(data.block, ty); + data.undef_variables.push((var, val)); + UseVarCases::Unsealed(val) + } + } + SSABlockData::BlockBody { ssa_pred } => UseVarCases::SealedOnePredecessor(ssa_pred), + }; + + // Part 2: Prepare SSABuilder state for run_state_machine(). + match case { + UseVarCases::SealedOnePredecessor(pred) => { + // Get the Value directly from the single predecessor. + self.calls.push(Call::FinishSealedOnePredecessor(ssa_block)); + self.calls.push(Call::UseVar(pred)); + } + UseVarCases::Unsealed(val) => { + // Define the operandless param added above to prevent lookup cycles. + self.def_var(var, val, ssa_block); + + // Nothing more can be known at this point. + self.results.push(val); + } + UseVarCases::SealedMultiplePredecessors(val, block) => { + // Define the operandless param added above to prevent lookup cycles. + self.def_var(var, val, ssa_block); + + // Look up a use_var for each precessor. + self.begin_predecessors_lookup(val, block); + } + } + } + + /// For blocks with a single predecessor, once we've determined the value, + /// record a local def for it for future queries to find. + fn finish_sealed_one_predecessor(&mut self, var: Variable, ssa_block: SSABlock) { + let val = *self.results.last().unwrap(); + self.def_var(var, val, ssa_block); + } + + /// Declares a new basic block belonging to the body of a certain `Block` and having `pred` + /// as a predecessor. `pred` is the only predecessor of the block and the block is sealed + /// at creation. + /// + /// To declare a `Block` header block, see `declare_block_header_block`. + pub fn declare_block_body_block(&mut self, ssa_pred: SSABlock) -> SSABlock { + self.ssa_blocks.push(SSABlockData::BlockBody { ssa_pred }) + } + + /// Declares a new basic block at the beginning of a `Block`. No predecessors are declared + /// here and the block is not sealed. + /// Predecessors have to be added with `declare_block_predecessor`. + pub fn declare_block_header_block(&mut self, block: Block) -> SSABlock { + let ssa_block = self + .ssa_blocks + .push(SSABlockData::BlockHeader(BlockHeaderSSABlockData { + predecessors: PredBlockSmallVec::new(), + sealed: false, + block, + undef_variables: Vec::new(), + })); + self.block_headers[block] = ssa_block.into(); + ssa_block + } + /// Gets the header block corresponding to an Block, panics if the Block or the header block + /// isn't declared. + pub fn header_block(&self, block: Block) -> SSABlock { + self.block_headers + .get(block) + .expect("the block has not been declared") + .expand() + .expect("the header block has not been defined") + } + + /// Declares a new predecessor for a `Block` header block and record the branch instruction + /// of the predecessor that leads to it. + /// + /// Note that the predecessor is a `SSABlock` and not a `Block`. This `SSABlock` must be filled + /// before added as predecessor. Note that you must provide no jump arguments to the branch + /// instruction when you create it since `SSABuilder` will fill them for you. + /// + /// Callers are expected to avoid adding the same predecessor more than once in the case + /// of a jump table. + pub fn declare_block_predecessor(&mut self, block: Block, ssa_pred: SSABlock, inst: Inst) { + debug_assert!(!self.is_sealed(block)); + let header_block = self.header_block(block); + self.ssa_blocks[header_block].add_predecessor(ssa_pred, inst) + } + + /// Remove a previously declared Block predecessor by giving a reference to the jump + /// instruction. Returns the basic block containing the instruction. + /// + /// Note: use only when you know what you are doing, this might break the SSA building problem + pub fn remove_block_predecessor(&mut self, block: Block, inst: Inst) -> SSABlock { + debug_assert!(!self.is_sealed(block)); + let header_block = self.header_block(block); + self.ssa_blocks[header_block].remove_predecessor(inst) + } + + /// Completes the global value numbering for a `Block`, all of its predecessors having been + /// already sealed. + /// + /// This method modifies the function's `Layout` by adding arguments to the `Block`s to + /// take into account the Phi function placed by the SSA algorithm. + /// + /// Returns the list of newly created blocks for critical edge splitting. + pub fn seal_block_header_block(&mut self, block: Block, func: &mut Function) -> SideEffects { + self.seal_one_block_header_block(block, func); + mem::replace(&mut self.side_effects, SideEffects::new()) + } + + /// Completes the global value numbering for all `Block`s in `func`. + /// + /// It's more efficient to seal `Block`s as soon as possible, during + /// translation, but for frontends where this is impractical to do, this + /// function can be used at the end of translating all blocks to ensure + /// that everything is sealed. + pub fn seal_all_block_header_blocks(&mut self, func: &mut Function) -> SideEffects { + // Seal all `Block`s currently in the function. This can entail splitting + // and creation of new blocks, however such new blocks are sealed on + // the fly, so we don't need to account for them here. + for block in self.block_headers.keys() { + self.seal_one_block_header_block(block, func); + } + mem::replace(&mut self.side_effects, SideEffects::new()) + } + + /// Helper function for `seal_block_header_block` and + /// `seal_all_block_header_blocks`. + fn seal_one_block_header_block(&mut self, block: Block, func: &mut Function) { + let ssa_block = self.header_block(block); + + let undef_vars = match self.ssa_blocks[ssa_block] { + SSABlockData::BlockBody { .. } => panic!("this should not happen"), + SSABlockData::BlockHeader(ref mut data) => { + debug_assert!( + !data.sealed, + "Attempting to seal {} which is already sealed.", + block + ); + debug_assert_eq!(block, data.block); + // Extract the undef_variables data from the block so that we + // can iterate over it without borrowing the whole builder. + mem::replace(&mut data.undef_variables, Vec::new()) + } + }; + + // For each undef var we look up values in the predecessors and create an block parameter + // only if necessary. + for (var, val) in undef_vars { + let ty = func.dfg.value_type(val); + self.predecessors_lookup(func, val, var, ty, block); + } + self.mark_block_header_block_sealed(ssa_block); + } + + /// Set the `sealed` flag for `block`. + fn mark_block_header_block_sealed(&mut self, ssa_block: SSABlock) { + // Then we mark the block as sealed. + match self.ssa_blocks[ssa_block] { + SSABlockData::BlockBody { .. } => panic!("this should not happen"), + SSABlockData::BlockHeader(ref mut data) => { + debug_assert!(!data.sealed); + debug_assert!(data.undef_variables.is_empty()); + data.sealed = true; + // We could call data.predecessors.shrink_to_fit() here, if + // important, because no further predecessors will be added + // to this block. + } + } + } + + /// Given the local SSA Value of a Variable in an Block, perform a recursive lookup on + /// predecessors to determine if it is redundant with another Value earlier in the CFG. + /// + /// If such a Value exists and is redundant, the local Value is replaced by the + /// corresponding non-local Value. If the original Value was an Block parameter, + /// the parameter may be removed if redundant. Parameters are placed eagerly by callers + /// to avoid infinite loops when looking up a Value for an Block that is in a CFG loop. + /// + /// Doing this lookup for each Value in each Block preserves SSA form during construction. + /// + /// Returns the chosen Value. + /// + /// ## Arguments + /// + /// `sentinel` is a dummy Block parameter inserted by `use_var_nonlocal()`. + /// Its purpose is to allow detection of CFG cycles while traversing predecessors. + /// + /// The `sentinel: Value` and the `ty: Type` are describing the `var: Variable` + /// that is being looked up. + fn predecessors_lookup( + &mut self, + func: &mut Function, + sentinel: Value, + var: Variable, + ty: Type, + block: Block, + ) -> Value { + debug_assert!(self.calls.is_empty()); + debug_assert!(self.results.is_empty()); + // self.side_effects may be non-empty here so that callers can + // accumulate side effects over multiple calls. + self.begin_predecessors_lookup(sentinel, block); + self.run_state_machine(func, var, ty) + } + + /// Set up state for `run_state_machine()` to initiate non-local use lookups + /// in all predecessors of `dest_block`, and arrange for a call to + /// `finish_predecessors_lookup` once they complete. + fn begin_predecessors_lookup(&mut self, sentinel: Value, dest_block: Block) { + self.calls + .push(Call::FinishPredecessorsLookup(sentinel, dest_block)); + // Iterate over the predecessors. + let mut calls = mem::replace(&mut self.calls, Vec::new()); + calls.extend(self.predecessors(dest_block).iter().rev().map( + |&PredBlock { + ssa_block: pred, .. + }| Call::UseVar(pred), + )); + self.calls = calls; + } + + /// Examine the values from the predecessors and compute a result value, creating + /// block parameters as needed. + fn finish_predecessors_lookup( + &mut self, + func: &mut Function, + sentinel: Value, + var: Variable, + dest_block: Block, + ) { + let mut pred_values: ZeroOneOrMore = ZeroOneOrMore::Zero; + + // Determine how many predecessors are yielding unique, non-temporary Values. + let num_predecessors = self.predecessors(dest_block).len(); + for &pred_val in self.results.iter().rev().take(num_predecessors) { + match pred_values { + ZeroOneOrMore::Zero => { + if pred_val != sentinel { + pred_values = ZeroOneOrMore::One(pred_val); + } + } + ZeroOneOrMore::One(old_val) => { + if pred_val != sentinel && pred_val != old_val { + pred_values = ZeroOneOrMore::More; + break; + } + } + ZeroOneOrMore::More => { + break; + } + } + } + + // Those predecessors' Values have been examined: pop all their results. + self.results.truncate(self.results.len() - num_predecessors); + + let result_val = match pred_values { + ZeroOneOrMore::Zero => { + // The variable is used but never defined before. This is an irregularity in the + // code, but rather than throwing an error we silently initialize the variable to + // 0. This will have no effect since this situation happens in unreachable code. + if !func.layout.is_block_inserted(dest_block) { + func.layout.append_block(dest_block); + } + self.side_effects + .instructions_added_to_blocks + .push(dest_block); + let zero = emit_zero( + func.dfg.value_type(sentinel), + FuncCursor::new(func).at_first_insertion_point(dest_block), + ); + func.dfg.remove_block_param(sentinel); + func.dfg.change_to_alias(sentinel, zero); + zero + } + ZeroOneOrMore::One(pred_val) => { + // Here all the predecessors use a single value to represent our variable + // so we don't need to have it as an block argument. + // We need to replace all the occurrences of val with pred_val but since + // we can't afford a re-writing pass right now we just declare an alias. + // Resolve aliases eagerly so that we can check for cyclic aliasing, + // which can occur in unreachable code. + let mut resolved = func.dfg.resolve_aliases(pred_val); + if sentinel == resolved { + // Cycle detected. Break it by creating a zero value. + resolved = emit_zero( + func.dfg.value_type(sentinel), + FuncCursor::new(func).at_first_insertion_point(dest_block), + ); + } + func.dfg.remove_block_param(sentinel); + func.dfg.change_to_alias(sentinel, resolved); + resolved + } + ZeroOneOrMore::More => { + // There is disagreement in the predecessors on which value to use so we have + // to keep the block argument. To avoid borrowing `self` for the whole loop, + // temporarily detach the predecessors list and replace it with an empty list. + let mut preds = + mem::replace(self.predecessors_mut(dest_block), PredBlockSmallVec::new()); + for &mut PredBlock { + ssa_block: ref mut pred_ssa_block, + branch: ref mut last_inst, + } in &mut preds + { + // We already did a full `use_var` above, so we can do just the fast path. + let ssa_block_map = self.variables.get(var).unwrap(); + let pred_val = ssa_block_map.get(*pred_ssa_block).unwrap().unwrap(); + let jump_arg = self.append_jump_argument( + func, + *last_inst, + *pred_ssa_block, + dest_block, + pred_val, + var, + ); + if let Some((middle_block, middle_ssa_block, middle_jump_inst)) = jump_arg { + *pred_ssa_block = middle_ssa_block; + *last_inst = middle_jump_inst; + self.side_effects.split_blocks_created.push(middle_block); + } + } + // Now that we're done, move the predecessors list back. + debug_assert!(self.predecessors(dest_block).is_empty()); + *self.predecessors_mut(dest_block) = preds; + + sentinel + } + }; + + self.results.push(result_val); + } + + /// Appends a jump argument to a jump instruction, returns block created in case of + /// critical edge splitting. + fn append_jump_argument( + &mut self, + func: &mut Function, + jump_inst: Inst, + jump_inst_ssa_block: SSABlock, + dest_block: Block, + val: Value, + var: Variable, + ) -> Option<(Block, SSABlock, Inst)> { + match func.dfg.analyze_branch(jump_inst) { + BranchInfo::NotABranch => { + panic!("you have declared a non-branch instruction as a predecessor to an block"); + } + // For a single destination appending a jump argument to the instruction + // is sufficient. + BranchInfo::SingleDest(_, _) => { + func.dfg.append_inst_arg(jump_inst, val); + None + } + BranchInfo::Table(jt, default_block) => { + // In the case of a jump table, the situation is tricky because br_table doesn't + // support arguments. + // We have to split the critical edge + let middle_block = func.dfg.make_block(); + func.layout.append_block(middle_block); + let middle_ssa_block = self.declare_block_header_block(middle_block); + self.ssa_blocks[middle_ssa_block].add_predecessor(jump_inst_ssa_block, jump_inst); + self.mark_block_header_block_sealed(middle_ssa_block); + + if let Some(default_block) = default_block { + if dest_block == default_block { + match func.dfg[jump_inst] { + InstructionData::BranchTable { + destination: ref mut dest, + .. + } => { + *dest = middle_block; + } + _ => panic!("should not happen"), + } + } + } + + for old_dest in func.jump_tables[jt].as_mut_slice() { + if *old_dest == dest_block { + *old_dest = middle_block; + } + } + let mut cur = FuncCursor::new(func).at_bottom(middle_block); + let middle_jump_inst = cur.ins().jump(dest_block, &[val]); + self.def_var(var, val, middle_ssa_block); + Some((middle_block, middle_ssa_block, middle_jump_inst)) + } + } + } + + /// Returns the list of `Block`s that have been declared as predecessors of the argument. + fn predecessors(&self, block: Block) -> &[PredBlock] { + let ssa_block = self.header_block(block); + match self.ssa_blocks[ssa_block] { + SSABlockData::BlockBody { .. } => panic!("should not happen"), + SSABlockData::BlockHeader(ref data) => &data.predecessors, + } + } + + /// Returns whether the given Block has any predecessor or not. + pub fn has_any_predecessors(&self, block: Block) -> bool { + !self.predecessors(block).is_empty() + } + + /// Same as predecessors, but for &mut. + fn predecessors_mut(&mut self, block: Block) -> &mut PredBlockSmallVec { + let ssa_block = self.header_block(block); + match self.ssa_blocks[ssa_block] { + SSABlockData::BlockBody { .. } => panic!("should not happen"), + SSABlockData::BlockHeader(ref mut data) => &mut data.predecessors, + } + } + + /// Returns `true` if and only if `seal_block_header_block` has been called on the argument. + pub fn is_sealed(&self, block: Block) -> bool { + match self.ssa_blocks[self.header_block(block)] { + SSABlockData::BlockBody { .. } => panic!("should not happen"), + SSABlockData::BlockHeader(ref data) => data.sealed, + } + } + + /// The main algorithm is naturally recursive: when there's a `use_var` in a + /// block with no corresponding local defs, it recurses and performs a + /// `use_var` in each predecessor. To avoid risking running out of callstack + /// space, we keep an explicit stack and use a small state machine rather + /// than literal recursion. + fn run_state_machine(&mut self, func: &mut Function, var: Variable, ty: Type) -> Value { + // Process the calls scheduled in `self.calls` until it is empty. + while let Some(call) = self.calls.pop() { + match call { + Call::UseVar(ssa_block) => { + // First we lookup for the current definition of the variable in this block + if let Some(var_defs) = self.variables.get(var) { + if let Some(val) = var_defs[ssa_block].expand() { + self.results.push(val); + continue; + } + } + self.use_var_nonlocal(func, var, ty, ssa_block); + } + Call::FinishSealedOnePredecessor(ssa_block) => { + self.finish_sealed_one_predecessor(var, ssa_block); + } + Call::FinishPredecessorsLookup(sentinel, dest_block) => { + self.finish_predecessors_lookup(func, sentinel, var, dest_block); + } + } + } + debug_assert_eq!(self.results.len(), 1); + self.results.pop().unwrap() + } +} + +#[cfg(test)] +mod tests { + use crate::ssa::SSABuilder; + use crate::Variable; + use cranelift_codegen::cursor::{Cursor, FuncCursor}; + use cranelift_codegen::entity::EntityRef; + use cranelift_codegen::ir::instructions::BranchInfo; + use cranelift_codegen::ir::types::*; + use cranelift_codegen::ir::{Function, Inst, InstBuilder, JumpTableData, Opcode}; + use cranelift_codegen::settings; + use cranelift_codegen::verify_function; + + #[test] + fn simple_block() { + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + // Here is the pseudo-program we want to translate: + // x = 1; + // y = 2; + // z = x + y; + // z = x + z; + + let ssa_block = ssa.declare_block_header_block(block0); + let x_var = Variable::new(0); + let x_ssa = { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(block0); + cur.ins().iconst(I32, 1) + }; + ssa.def_var(x_var, x_ssa, ssa_block); + let y_var = Variable::new(1); + let y_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iconst(I32, 2) + }; + ssa.def_var(y_var, y_ssa, ssa_block); + + assert_eq!(ssa.use_var(&mut func, x_var, I32, ssa_block).0, x_ssa); + assert_eq!(ssa.use_var(&mut func, y_var, I32, ssa_block).0, y_ssa); + let z_var = Variable::new(2); + let x_use1 = ssa.use_var(&mut func, x_var, I32, ssa_block).0; + let y_use1 = ssa.use_var(&mut func, y_var, I32, ssa_block).0; + let z1_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iadd(x_use1, y_use1) + }; + ssa.def_var(z_var, z1_ssa, ssa_block); + assert_eq!(ssa.use_var(&mut func, z_var, I32, ssa_block).0, z1_ssa); + let x_use2 = ssa.use_var(&mut func, x_var, I32, ssa_block).0; + let z_use1 = ssa.use_var(&mut func, z_var, I32, ssa_block).0; + let z2_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iadd(x_use2, z_use1) + }; + ssa.def_var(z_var, z2_ssa, ssa_block); + assert_eq!(ssa.use_var(&mut func, z_var, I32, ssa_block).0, z2_ssa); + } + + #[test] + fn sequence_of_blocks() { + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + // Here is the pseudo-program we want to translate: + // block0: + // x = 1; + // y = 2; + // z = x + y; + // brnz y, block1; + // z = x + z; + // block1: + // y = x + y; + + let ssa_block0 = ssa.declare_block_header_block(block0); + let x_var = Variable::new(0); + let x_ssa = { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(block0); + cur.insert_block(block1); + cur.goto_bottom(block0); + cur.ins().iconst(I32, 1) + }; + ssa.def_var(x_var, x_ssa, ssa_block0); + let y_var = Variable::new(1); + let y_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iconst(I32, 2) + }; + ssa.def_var(y_var, y_ssa, ssa_block0); + assert_eq!(ssa.use_var(&mut func, x_var, I32, ssa_block0).0, x_ssa); + assert_eq!(ssa.use_var(&mut func, y_var, I32, ssa_block0).0, y_ssa); + let z_var = Variable::new(2); + let x_use1 = ssa.use_var(&mut func, x_var, I32, ssa_block0).0; + let y_use1 = ssa.use_var(&mut func, y_var, I32, ssa_block0).0; + let z1_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iadd(x_use1, y_use1) + }; + ssa.def_var(z_var, z1_ssa, ssa_block0); + assert_eq!(ssa.use_var(&mut func, z_var, I32, ssa_block0).0, z1_ssa); + let y_use2 = ssa.use_var(&mut func, y_var, I32, ssa_block0).0; + let jump_inst: Inst = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().brnz(y_use2, block1, &[]) + }; + let ssa_block1 = ssa.declare_block_body_block(ssa_block0); + let x_use2 = ssa.use_var(&mut func, x_var, I32, ssa_block1).0; + assert_eq!(x_use2, x_ssa); + let z_use1 = ssa.use_var(&mut func, z_var, I32, ssa_block1).0; + assert_eq!(z_use1, z1_ssa); + let z2_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iadd(x_use2, z_use1) + }; + ssa.def_var(z_var, z2_ssa, ssa_block1); + assert_eq!(ssa.use_var(&mut func, z_var, I32, ssa_block1).0, z2_ssa); + ssa.seal_block_header_block(block0, &mut func); + let ssa_block2 = ssa.declare_block_header_block(block1); + ssa.declare_block_predecessor(block1, ssa_block0, jump_inst); + ssa.seal_block_header_block(block1, &mut func); + let x_use3 = ssa.use_var(&mut func, x_var, I32, ssa_block2).0; + assert_eq!(x_ssa, x_use3); + let y_use3 = ssa.use_var(&mut func, y_var, I32, ssa_block2).0; + assert_eq!(y_ssa, y_use3); + let y2_ssa = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iadd(x_use3, y_use3) + }; + ssa.def_var(y_var, y2_ssa, ssa_block2); + match func.dfg.analyze_branch(jump_inst) { + BranchInfo::SingleDest(dest, jump_args) => { + assert_eq!(dest, block1); + assert_eq!(jump_args.len(), 0); + } + _ => assert!(false), + }; + } + + #[test] + fn program_with_loop() { + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + // Here is the pseudo-program we want to translate: + // block0: + // x = 1; + // y = 2; + // z = x + y; + // jump block1 + // block1: + // z = z + y; + // brnz y, block1; + // z = z - x; + // return y + // block2: + // y = y - x + // jump block1 + + let ssa_block0 = ssa.declare_block_header_block(block0); + ssa.seal_block_header_block(block0, &mut func); + let x_var = Variable::new(0); + let x1 = { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(block0); + cur.insert_block(block1); + cur.insert_block(block2); + cur.goto_bottom(block0); + cur.ins().iconst(I32, 1) + }; + ssa.def_var(x_var, x1, ssa_block0); + assert_eq!(ssa.use_var(&mut func, x_var, I32, ssa_block0).0, x1); + let y_var = Variable::new(1); + let y1 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iconst(I32, 2) + }; + ssa.def_var(y_var, y1, ssa_block0); + assert_eq!(ssa.use_var(&mut func, y_var, I32, ssa_block0).0, y1); + let z_var = Variable::new(2); + let x2 = ssa.use_var(&mut func, x_var, I32, ssa_block0).0; + assert_eq!(x2, x1); + let y2 = ssa.use_var(&mut func, y_var, I32, ssa_block0).0; + assert_eq!(y2, y1); + let z1 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iadd(x2, y2) + }; + ssa.def_var(z_var, z1, ssa_block0); + let jump_block0_block1 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().jump(block1, &[]) + }; + let ssa_block1 = ssa.declare_block_header_block(block1); + ssa.declare_block_predecessor(block1, ssa_block0, jump_block0_block1); + let z2 = ssa.use_var(&mut func, z_var, I32, ssa_block1).0; + let y3 = ssa.use_var(&mut func, y_var, I32, ssa_block1).0; + let z3 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().iadd(z2, y3) + }; + ssa.def_var(z_var, z3, ssa_block1); + let y4 = ssa.use_var(&mut func, y_var, I32, ssa_block1).0; + assert_eq!(y4, y3); + let jump_block1_block2 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().brnz(y4, block2, &[]) + }; + let ssa_block2 = ssa.declare_block_body_block(ssa_block1); + let z4 = ssa.use_var(&mut func, z_var, I32, ssa_block2).0; + assert_eq!(z4, z3); + let x3 = ssa.use_var(&mut func, x_var, I32, ssa_block2).0; + let z5 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().isub(z4, x3) + }; + ssa.def_var(z_var, z5, ssa_block2); + let y5 = ssa.use_var(&mut func, y_var, I32, ssa_block2).0; + assert_eq!(y5, y3); + { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().return_(&[y5]) + }; + + let ssa_block3 = ssa.declare_block_header_block(block2); + ssa.declare_block_predecessor(block2, ssa_block1, jump_block1_block2); + ssa.seal_block_header_block(block2, &mut func); + let y6 = ssa.use_var(&mut func, y_var, I32, ssa_block3).0; + assert_eq!(y6, y3); + let x4 = ssa.use_var(&mut func, x_var, I32, ssa_block3).0; + assert_eq!(x4, x3); + let y7 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block2); + cur.ins().isub(y6, x4) + }; + ssa.def_var(y_var, y7, ssa_block3); + let jump_block2_block1 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block2); + cur.ins().jump(block1, &[]) + }; + + ssa.declare_block_predecessor(block1, ssa_block3, jump_block2_block1); + ssa.seal_block_header_block(block1, &mut func); + assert_eq!(func.dfg.block_params(block1)[0], z2); + assert_eq!(func.dfg.block_params(block1)[1], y3); + assert_eq!(func.dfg.resolve_aliases(x3), x1); + } + + #[test] + fn br_table_with_args() { + // This tests the on-demand splitting of critical edges for br_table with jump arguments + // + // Here is the pseudo-program we want to translate: + // + // function %f { + // jt = jump_table [block2, block1] + // block0: + // x = 1; + // br_table x, block2, jt + // block1: + // x = 2 + // jump block2 + // block2: + // x = x + 1 + // return + // } + + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let mut jump_table = JumpTableData::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + // block0: + // x = 1; + let ssa_block0 = ssa.declare_block_header_block(block0); + ssa.seal_block_header_block(block0, &mut func); + let x_var = Variable::new(0); + let x1 = { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(block0); + cur.insert_block(block1); + cur.insert_block(block2); + cur.goto_bottom(block0); + cur.ins().iconst(I32, 1) + }; + ssa.def_var(x_var, x1, ssa_block0); + + // jt = jump_table [block2, block1] + jump_table.push_entry(block2); + jump_table.push_entry(block1); + let jt = func.create_jump_table(jump_table); + + // block0: + // ... + // br_table x, block2, jt + ssa.use_var(&mut func, x_var, I32, ssa_block0).0; + let br_table = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().br_table(x1, block2, jt) + }; + + // block1: + // x = 2 + // jump block2 + let ssa_block1 = ssa.declare_block_header_block(block1); + ssa.seal_block_header_block(block1, &mut func); + let x2 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().iconst(I32, 2) + }; + ssa.def_var(x_var, x2, ssa_block1); + let jump_inst = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().jump(block2, &[]) + }; + + // block2: + // x = x + 1 + // return + let ssa_block3 = ssa.declare_block_header_block(block2); + ssa.declare_block_predecessor(block2, ssa_block1, jump_inst); + ssa.declare_block_predecessor(block2, ssa_block0, br_table); + ssa.seal_block_header_block(block2, &mut func); + let ssa_block4 = ssa.declare_block_body_block(ssa_block3); + let x3 = ssa.use_var(&mut func, x_var, I32, ssa_block4).0; + let x4 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block2); + cur.ins().iadd_imm(x3, 1) + }; + ssa.def_var(x_var, x4, ssa_block4); + { + let mut cur = FuncCursor::new(&mut func).at_bottom(block2); + cur.ins().return_(&[]) + }; + + let flags = settings::Flags::new(settings::builder()); + match verify_function(&func, &flags) { + Ok(()) => {} + Err(_errors) => { + #[cfg(feature = "std")] + panic!(_errors); + #[cfg(not(feature = "std"))] + panic!("function failed to verify"); + } + } + } + + #[test] + fn undef_values_reordering() { + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + // Here is the pseudo-program we want to translate: + // block0: + // x = 0 + // y = 1 + // z = 2 + // jump block1 + // block1: + // x = z + x + // y = y - x + // jump block1 + // + let ssa_block0 = ssa.declare_block_header_block(block0); + let x_var = Variable::new(0); + let y_var = Variable::new(1); + let z_var = Variable::new(2); + ssa.seal_block_header_block(block0, &mut func); + let x1 = { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(block0); + cur.insert_block(block1); + cur.goto_bottom(block0); + cur.ins().iconst(I32, 0) + }; + ssa.def_var(x_var, x1, ssa_block0); + let y1 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iconst(I32, 1) + }; + ssa.def_var(y_var, y1, ssa_block0); + let z1 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().iconst(I32, 2) + }; + ssa.def_var(z_var, z1, ssa_block0); + let jump_inst = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block0); + cur.ins().jump(block1, &[]) + }; + let ssa_block1 = ssa.declare_block_header_block(block1); + ssa.declare_block_predecessor(block1, ssa_block0, jump_inst); + let z2 = ssa.use_var(&mut func, z_var, I32, ssa_block1).0; + assert_eq!(func.dfg.block_params(block1)[0], z2); + let x2 = ssa.use_var(&mut func, x_var, I32, ssa_block1).0; + assert_eq!(func.dfg.block_params(block1)[1], x2); + let x3 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().iadd(x2, z2) + }; + ssa.def_var(x_var, x3, ssa_block1); + let x4 = ssa.use_var(&mut func, x_var, I32, ssa_block1).0; + let y3 = ssa.use_var(&mut func, y_var, I32, ssa_block1).0; + assert_eq!(func.dfg.block_params(block1)[2], y3); + let y4 = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().isub(y3, x4) + }; + ssa.def_var(y_var, y4, ssa_block1); + let jump_inst = { + let mut cur = FuncCursor::new(&mut func).at_bottom(block1); + cur.ins().jump(block1, &[]) + }; + ssa.declare_block_predecessor(block1, ssa_block1, jump_inst); + ssa.seal_block_header_block(block1, &mut func); + // At sealing the "z" argument disappear but the remaining "x" and "y" args have to be + // in the right order. + assert_eq!(func.dfg.block_params(block1)[1], y3); + assert_eq!(func.dfg.block_params(block1)[0], x2); + } + + #[test] + fn undef() { + // Use vars of various types which have not been defined. + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let ssa_block = ssa.declare_block_header_block(block0); + ssa.seal_block_header_block(block0, &mut func); + let i32_var = Variable::new(0); + let f32_var = Variable::new(1); + let f64_var = Variable::new(2); + let b1_var = Variable::new(3); + let f32x4_var = Variable::new(4); + ssa.use_var(&mut func, i32_var, I32, ssa_block); + ssa.use_var(&mut func, f32_var, F32, ssa_block); + ssa.use_var(&mut func, f64_var, F64, ssa_block); + ssa.use_var(&mut func, b1_var, B1, ssa_block); + ssa.use_var(&mut func, f32x4_var, F32X4, ssa_block); + assert_eq!(func.dfg.num_block_params(block0), 0); + } + + #[test] + fn undef_in_entry() { + // Use a var which has not been defined. The search should hit the + // top of the entry block, and then fall back to inserting an iconst. + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let ssa_block = ssa.declare_block_header_block(block0); + ssa.seal_block_header_block(block0, &mut func); + let x_var = Variable::new(0); + assert_eq!(func.dfg.num_block_params(block0), 0); + ssa.use_var(&mut func, x_var, I32, ssa_block); + assert_eq!(func.dfg.num_block_params(block0), 0); + assert_eq!( + func.dfg[func.layout.first_inst(block0).unwrap()].opcode(), + Opcode::Iconst + ); + } + + #[test] + fn undef_in_entry_sealed_after() { + // Use a var which has not been defined, but the block is not sealed + // until afterward. Before sealing, the SSA builder should insert an + // block param; after sealing, it should be removed. + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let ssa_block = ssa.declare_block_header_block(block0); + let x_var = Variable::new(0); + assert_eq!(func.dfg.num_block_params(block0), 0); + ssa.use_var(&mut func, x_var, I32, ssa_block); + assert_eq!(func.dfg.num_block_params(block0), 1); + ssa.seal_block_header_block(block0, &mut func); + assert_eq!(func.dfg.num_block_params(block0), 0); + assert_eq!( + func.dfg[func.layout.first_inst(block0).unwrap()].opcode(), + Opcode::Iconst + ); + } + + #[test] + fn unreachable_use() { + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + // Here is the pseudo-program we want to translate: + // block0: + // return + // block1: + // brz v1, block1 + // jump block1 + let _ssa_block0 = ssa.declare_block_header_block(block0); + ssa.seal_block_header_block(block0, &mut func); + let ssa_block1 = ssa.declare_block_header_block(block1); + let ssa_block2 = ssa.declare_block_body_block(ssa_block1); + { + let mut cur = FuncCursor::new(&mut func); + cur.insert_block(block0); + cur.insert_block(block1); + cur.goto_bottom(block0); + cur.ins().return_(&[]); + let x_var = Variable::new(0); + cur.goto_bottom(block1); + let val = ssa.use_var(&mut cur.func, x_var, I32, ssa_block1).0; + let brz = cur.ins().brz(val, block1, &[]); + ssa.declare_block_predecessor(block1, ssa_block1, brz); + let j = cur.ins().jump(block1, &[]); + ssa.declare_block_predecessor(block1, ssa_block2, j); + } + ssa.seal_block_header_block(block1, &mut func); + let flags = settings::Flags::new(settings::builder()); + match verify_function(&func, &flags) { + Ok(()) => {} + Err(_errors) => { + #[cfg(feature = "std")] + panic!(_errors); + #[cfg(not(feature = "std"))] + panic!("function failed to verify"); + } + } + } + + #[test] + fn unreachable_use_with_multiple_preds() { + let mut func = Function::new(); + let mut ssa = SSABuilder::new(); + let block0 = func.dfg.make_block(); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + // Here is the pseudo-program we want to translate: + // block0: + // return + // block1: + // brz v1, block2 + // jump block1 + // block2: + // jump block1 + let _ssa_block0 = ssa.declare_block_header_block(block0); + ssa.seal_block_header_block(block0, &mut func); + let ssa_block1 = ssa.declare_block_header_block(block1); + let ssa_block2 = ssa.declare_block_header_block(block2); + { + let mut cur = FuncCursor::new(&mut func); + let x_var = Variable::new(0); + cur.insert_block(block0); + cur.insert_block(block1); + cur.insert_block(block2); + cur.goto_bottom(block0); + cur.ins().return_(&[]); + cur.goto_bottom(block1); + let v = ssa.use_var(&mut cur.func, x_var, I32, ssa_block1).0; + let brz = cur.ins().brz(v, block2, &[]); + let j0 = cur.ins().jump(block1, &[]); + cur.goto_bottom(block2); + let j1 = cur.ins().jump(block1, &[]); + ssa.declare_block_predecessor(block1, ssa_block2, brz); + ssa.declare_block_predecessor(block1, ssa_block1, j0); + ssa.declare_block_predecessor(block2, ssa_block1, j1); + } + ssa.seal_block_header_block(block1, &mut func); + ssa.seal_block_header_block(block2, &mut func); + let flags = settings::Flags::new(settings::builder()); + match verify_function(&func, &flags) { + Ok(()) => {} + Err(_errors) => { + #[cfg(feature = "std")] + panic!(_errors); + #[cfg(not(feature = "std"))] + panic!("function failed to verify"); + } + } + } +} diff --git a/cranelift/frontend/src/switch.rs b/cranelift/frontend/src/switch.rs new file mode 100644 index 0000000000..f444d9aacc --- /dev/null +++ b/cranelift/frontend/src/switch.rs @@ -0,0 +1,491 @@ +use super::HashMap; +use crate::frontend::FunctionBuilder; +use alloc::vec::Vec; +use cranelift_codegen::ir::condcodes::IntCC; +use cranelift_codegen::ir::*; +use log::debug; + +type EntryIndex = u64; + +/// Unlike with `br_table`, `Switch` cases may be sparse or non-0-based. +/// They emit efficient code using branches, jump tables, or a combination of both. +/// +/// # Example +/// +/// ```rust +/// # use cranelift_codegen::ir::types::*; +/// # use cranelift_codegen::ir::{ExternalName, Function, Signature, InstBuilder}; +/// # use cranelift_codegen::isa::CallConv; +/// # use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Switch}; +/// # +/// # let mut sig = Signature::new(CallConv::SystemV); +/// # let mut fn_builder_ctx = FunctionBuilderContext::new(); +/// # let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); +/// # let mut builder = FunctionBuilder::new(&mut func, &mut fn_builder_ctx); +/// # +/// # let entry = builder.create_block(); +/// # builder.switch_to_block(entry); +/// # +/// let block0 = builder.create_block(); +/// let block1 = builder.create_block(); +/// let block2 = builder.create_block(); +/// let fallback = builder.create_block(); +/// +/// let val = builder.ins().iconst(I32, 1); +/// +/// let mut switch = Switch::new(); +/// switch.set_entry(0, block0); +/// switch.set_entry(1, block1); +/// switch.set_entry(7, block2); +/// switch.emit(&mut builder, val, fallback); +/// ``` +#[derive(Debug, Default)] +pub struct Switch { + cases: HashMap, +} + +impl Switch { + /// Create a new empty switch + pub fn new() -> Self { + Self { + cases: HashMap::new(), + } + } + + /// Set a switch entry + pub fn set_entry(&mut self, index: EntryIndex, block: Block) { + let prev = self.cases.insert(index, block); + assert!( + prev.is_none(), + "Tried to set the same entry {} twice", + index + ); + } + + /// Get a reference to all existing entries + pub fn entries(&self) -> &HashMap { + &self.cases + } + + /// Turn the `cases` `HashMap` into a list of `ContiguousCaseRange`s. + /// + /// # Postconditions + /// + /// * Every entry will be represented. + /// * The `ContiguousCaseRange`s will not overlap. + /// * Between two `ContiguousCaseRange`s there will be at least one entry index. + /// * No `ContiguousCaseRange`s will be empty. + fn collect_contiguous_case_ranges(self) -> Vec { + debug!("build_contiguous_case_ranges before: {:#?}", self.cases); + let mut cases = self.cases.into_iter().collect::>(); + cases.sort_by_key(|&(index, _)| index); + + let mut contiguous_case_ranges: Vec = vec![]; + let mut last_index = None; + for (index, block) in cases { + match last_index { + None => contiguous_case_ranges.push(ContiguousCaseRange::new(index)), + Some(last_index) => { + if index > last_index + 1 { + contiguous_case_ranges.push(ContiguousCaseRange::new(index)); + } + } + } + contiguous_case_ranges + .last_mut() + .unwrap() + .blocks + .push(block); + last_index = Some(index); + } + + debug!( + "build_contiguous_case_ranges after: {:#?}", + contiguous_case_ranges + ); + + contiguous_case_ranges + } + + /// Binary search for the right `ContiguousCaseRange`. + fn build_search_tree( + bx: &mut FunctionBuilder, + val: Value, + otherwise: Block, + contiguous_case_ranges: Vec, + ) -> Vec<(EntryIndex, Block, Vec)> { + let mut cases_and_jt_blocks = Vec::new(); + + // Avoid allocation in the common case + if contiguous_case_ranges.len() <= 3 { + Self::build_search_branches( + bx, + val, + otherwise, + contiguous_case_ranges, + &mut cases_and_jt_blocks, + ); + return cases_and_jt_blocks; + } + + let mut stack: Vec<(Option, Vec)> = Vec::new(); + stack.push((None, contiguous_case_ranges)); + + while let Some((block, contiguous_case_ranges)) = stack.pop() { + if let Some(block) = block { + bx.switch_to_block(block); + } + + if contiguous_case_ranges.len() <= 3 { + Self::build_search_branches( + bx, + val, + otherwise, + contiguous_case_ranges, + &mut cases_and_jt_blocks, + ); + } else { + let split_point = contiguous_case_ranges.len() / 2; + let mut left = contiguous_case_ranges; + let right = left.split_off(split_point); + + let left_block = bx.create_block(); + let right_block = bx.create_block(); + + let should_take_right_side = bx.ins().icmp_imm( + IntCC::UnsignedGreaterThanOrEqual, + val, + right[0].first_index as i64, + ); + bx.ins().brnz(should_take_right_side, right_block, &[]); + bx.ins().jump(left_block, &[]); + + stack.push((Some(left_block), left)); + stack.push((Some(right_block), right)); + } + } + + cases_and_jt_blocks + } + + /// Linear search for the right `ContiguousCaseRange`. + fn build_search_branches( + bx: &mut FunctionBuilder, + val: Value, + otherwise: Block, + contiguous_case_ranges: Vec, + cases_and_jt_blocks: &mut Vec<(EntryIndex, Block, Vec)>, + ) { + let mut was_branch = false; + let ins_fallthrough_jump = |was_branch: bool, bx: &mut FunctionBuilder| { + if was_branch { + let block = bx.create_block(); + bx.ins().jump(block, &[]); + bx.switch_to_block(block); + } + }; + for ContiguousCaseRange { + first_index, + blocks, + } in contiguous_case_ranges.into_iter().rev() + { + match (blocks.len(), first_index) { + (1, 0) => { + ins_fallthrough_jump(was_branch, bx); + bx.ins().brz(val, blocks[0], &[]); + } + (1, _) => { + ins_fallthrough_jump(was_branch, bx); + let is_good_val = bx.ins().icmp_imm(IntCC::Equal, val, first_index as i64); + bx.ins().brnz(is_good_val, blocks[0], &[]); + } + (_, 0) => { + // if `first_index` is 0, then `icmp_imm uge val, first_index` is trivially true + let jt_block = bx.create_block(); + bx.ins().jump(jt_block, &[]); + cases_and_jt_blocks.push((first_index, jt_block, blocks)); + // `jump otherwise` below must not be hit, because the current block has been + // filled above. This is the last iteration anyway, as 0 is the smallest + // unsigned int, so just return here. + return; + } + (_, _) => { + ins_fallthrough_jump(was_branch, bx); + let jt_block = bx.create_block(); + let is_good_val = bx.ins().icmp_imm( + IntCC::UnsignedGreaterThanOrEqual, + val, + first_index as i64, + ); + bx.ins().brnz(is_good_val, jt_block, &[]); + cases_and_jt_blocks.push((first_index, jt_block, blocks)); + } + } + was_branch = true; + } + + bx.ins().jump(otherwise, &[]); + } + + /// For every item in `cases_and_jt_blocks` this will create a jump table in the specified block. + fn build_jump_tables( + bx: &mut FunctionBuilder, + val: Value, + otherwise: Block, + cases_and_jt_blocks: Vec<(EntryIndex, Block, Vec)>, + ) { + for (first_index, jt_block, blocks) in cases_and_jt_blocks.into_iter().rev() { + let mut jt_data = JumpTableData::new(); + for block in blocks { + jt_data.push_entry(block); + } + let jump_table = bx.create_jump_table(jt_data); + + bx.switch_to_block(jt_block); + let discr = if first_index == 0 { + val + } else { + bx.ins().iadd_imm(val, (first_index as i64).wrapping_neg()) + }; + bx.ins().br_table(discr, otherwise, jump_table); + } + } + + /// Build the switch + /// + /// # Arguments + /// + /// * The function builder to emit to + /// * The value to switch on + /// * The default block + pub fn emit(self, bx: &mut FunctionBuilder, val: Value, otherwise: Block) { + // FIXME icmp(_imm) doesn't have encodings for i8 and i16 on x86(_64) yet + let val = match bx.func.dfg.value_type(val) { + types::I8 | types::I16 => bx.ins().uextend(types::I32, val), + _ => val, + }; + + let contiguous_case_ranges = self.collect_contiguous_case_ranges(); + let cases_and_jt_blocks = + Self::build_search_tree(bx, val, otherwise, contiguous_case_ranges); + Self::build_jump_tables(bx, val, otherwise, cases_and_jt_blocks); + } +} + +/// This represents a contiguous range of cases to switch on. +/// +/// For example 10 => block1, 11 => block2, 12 => block7 will be represented as: +/// +/// ```plain +/// ContiguousCaseRange { +/// first_index: 10, +/// blocks: vec![Block::from_u32(1), Block::from_u32(2), Block::from_u32(7)] +/// } +/// ``` +#[derive(Debug)] +struct ContiguousCaseRange { + /// The entry index of the first case. Eg. 10 when the entry indexes are 10, 11, 12 and 13. + first_index: EntryIndex, + + /// The blocks to jump to sorted in ascending order of entry index. + blocks: Vec, +} + +impl ContiguousCaseRange { + fn new(first_index: EntryIndex) -> Self { + Self { + first_index, + blocks: Vec::new(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::frontend::FunctionBuilderContext; + use alloc::string::ToString; + use cranelift_codegen::ir::Function; + + macro_rules! setup { + ($default:expr, [$($index:expr,)*]) => {{ + let mut func = Function::new(); + let mut func_ctx = FunctionBuilderContext::new(); + { + let mut bx = FunctionBuilder::new(&mut func, &mut func_ctx); + let block = bx.create_block(); + bx.switch_to_block(block); + let val = bx.ins().iconst(types::I8, 0); + let mut switch = Switch::new(); + $( + let block = bx.create_block(); + switch.set_entry($index, block); + )* + switch.emit(&mut bx, val, Block::with_number($default).unwrap()); + } + func + .to_string() + .trim_start_matches("function u0:0() fast {\n") + .trim_end_matches("\n}\n") + .to_string() + }}; + } + + #[test] + fn switch_zero() { + let func = setup!(0, [0,]); + assert_eq!( + func, + "block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + brz v1, block1 + jump block0" + ); + } + + #[test] + fn switch_single() { + let func = setup!(0, [1,]); + assert_eq!( + func, + "block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + v2 = icmp_imm eq v1, 1 + brnz v2, block1 + jump block0" + ); + } + + #[test] + fn switch_bool() { + let func = setup!(0, [0, 1,]); + assert_eq!( + func, + " jt0 = jump_table [block1, block2] + +block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + jump block3 + +block3: + br_table.i32 v1, block0, jt0" + ); + } + + #[test] + fn switch_two_gap() { + let func = setup!(0, [0, 2,]); + assert_eq!( + func, + "block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + v2 = icmp_imm eq v1, 2 + brnz v2, block2 + jump block3 + +block3: + brz.i32 v1, block1 + jump block0" + ); + } + + #[test] + fn switch_many() { + let func = setup!(0, [0, 1, 5, 7, 10, 11, 12,]); + assert_eq!( + func, + " jt0 = jump_table [block1, block2] + jt1 = jump_table [block5, block6, block7] + +block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + v2 = icmp_imm uge v1, 7 + brnz v2, block9 + jump block8 + +block9: + v3 = icmp_imm.i32 uge v1, 10 + brnz v3, block10 + jump block11 + +block11: + v4 = icmp_imm.i32 eq v1, 7 + brnz v4, block4 + jump block0 + +block8: + v5 = icmp_imm.i32 eq v1, 5 + brnz v5, block3 + jump block12 + +block12: + br_table.i32 v1, block0, jt0 + +block10: + v6 = iadd_imm.i32 v1, -10 + br_table v6, block0, jt1" + ); + } + + #[test] + fn switch_min_index_value() { + let func = setup!(0, [::core::i64::MIN as u64, 1,]); + assert_eq!( + func, + "block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + v2 = icmp_imm eq v1, 0x8000_0000_0000_0000 + brnz v2, block1 + jump block3 + +block3: + v3 = icmp_imm.i32 eq v1, 1 + brnz v3, block2 + jump block0" + ); + } + + #[test] + fn switch_max_index_value() { + let func = setup!(0, [::core::i64::MAX as u64, 1,]); + assert_eq!( + func, + "block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + v2 = icmp_imm eq v1, 0x7fff_ffff_ffff_ffff + brnz v2, block1 + jump block3 + +block3: + v3 = icmp_imm.i32 eq v1, 1 + brnz v3, block2 + jump block0" + ) + } + + #[test] + fn switch_optimal_codegen() { + let func = setup!(0, [-1i64 as u64, 0, 1,]); + assert_eq!( + func, + " jt0 = jump_table [block2, block3] + +block0: + v0 = iconst.i8 0 + v1 = uextend.i32 v0 + v2 = icmp_imm eq v1, -1 + brnz v2, block1 + jump block4 + +block4: + br_table.i32 v1, block0, jt0" + ); + } +} diff --git a/cranelift/frontend/src/variable.rs b/cranelift/frontend/src/variable.rs new file mode 100644 index 0000000000..9a40b9dfe9 --- /dev/null +++ b/cranelift/frontend/src/variable.rs @@ -0,0 +1,35 @@ +//! A basic `Variable` implementation. +//! +//! Frontends can use any indexing scheme they see fit and +//! generate the appropriate `Variable` instances. +//! +//! Note: The `Variable` is used by Cranelift to index into densely allocated +//! arrays containing information about your mutable variables +//! Thus, make sure that Variable's indexes are allocated contiguously and +//! starting at `0`. + +use core::u32; +use cranelift_codegen::entity::EntityRef; + +///! An opaque reference to a variable. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Variable(u32); + +impl Variable { + /// Create a new Variable with the given index. + pub fn with_u32(index: u32) -> Self { + debug_assert!(index < u32::MAX); + Self(index) + } +} + +impl EntityRef for Variable { + fn new(index: usize) -> Self { + debug_assert!(index < (u32::MAX as usize)); + Self(index as u32) + } + + fn index(self) -> usize { + self.0 as usize + } +} diff --git a/cranelift/media/crane-ferris.svg b/cranelift/media/crane-ferris.svg new file mode 100644 index 0000000000..808fd75c3e --- /dev/null +++ b/cranelift/media/crane-ferris.svg @@ -0,0 +1,265 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cranelift/media/spidermonkey1.png b/cranelift/media/spidermonkey1.png new file mode 100644 index 0000000000..0a8d0670cc Binary files /dev/null and b/cranelift/media/spidermonkey1.png differ diff --git a/cranelift/media/spidermonkey2.png b/cranelift/media/spidermonkey2.png new file mode 100644 index 0000000000..8e1c928709 Binary files /dev/null and b/cranelift/media/spidermonkey2.png differ diff --git a/cranelift/module/Cargo.toml b/cranelift/module/Cargo.toml new file mode 100644 index 0000000000..22f124b713 --- /dev/null +++ b/cranelift/module/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "cranelift-module" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "Support for linking functions and data with Cranelift" +repository = "https://github.com/bytecodealliance/cranelift" +documentation = "https://cranelift.readthedocs.io/" +categories = ["no-std"] +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.59.0" } +hashbrown = { version = "0.6", optional = true } +log = { version = "0.4.6", default-features = false } +thiserror = "1.0.4" + +[features] +default = ["std"] +std = ["cranelift-codegen/std"] +core = ["hashbrown", "cranelift-codegen/core"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/module/LICENSE b/cranelift/module/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/module/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/module/README.md b/cranelift/module/README.md new file mode 100644 index 0000000000..07e7bd998d --- /dev/null +++ b/cranelift/module/README.md @@ -0,0 +1,23 @@ +This crate provides module-level functionality, which allow multiple +functions and data to be emitted with +[Cranelift](https://crates.io/crates/cranelift) and then linked together. + +This crate is structured as an optional layer on top of cranelift-codegen. +It provides additional functionality, such as linking, however users that +require greater flexibility don't need to use it. + +A `Module` is a collection of functions and data objects that are linked +together. `Backend` is a trait that defines an interface for backends +that compile modules into various forms. Most users will use one of the +following `Backend` implementations: + + - `SimpleJITBackend`, provided by [cranelift-simplejit], which JITs + code to memory for direct execution. + - `ObjectBackend`, provided by [cranelift-object], which emits native + object files. + - `FaerieBackend`, provided by [cranelift-faerie], which emits native + object files. + +[cranelift-simplejit]: https://crates.io/crates/cranelift-simplejit +[cranelift-object]: https://crates.io/crates/cranelift-object +[cranelift-faerie]: https://crates.io/crates/cranelift-faerie diff --git a/cranelift/module/src/backend.rs b/cranelift/module/src/backend.rs new file mode 100644 index 0000000000..0fd8724afa --- /dev/null +++ b/cranelift/module/src/backend.rs @@ -0,0 +1,196 @@ +//! Defines the `Backend` trait. + +use crate::DataContext; +use crate::DataId; +use crate::FuncId; +use crate::Linkage; +use crate::ModuleNamespace; +use crate::ModuleResult; +use crate::TrapSite; +use core::marker; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::Context; +use cranelift_codegen::{binemit, ir}; + +use std::borrow::ToOwned; +use std::boxed::Box; +use std::string::String; +use std::vec::Vec; + +/// A `Backend` implements the functionality needed to support a `Module`. +/// +/// Three notable implementations of this trait are: +/// - `SimpleJITBackend`, defined in [cranelift-simplejit], which JITs +/// the contents of a `Module` to memory which can be directly executed. +/// - `ObjectBackend`, defined in [cranelift-object], which writes the +/// contents of a `Module` out as a native object file. +/// - `FaerieBackend`, defined in [cranelift-faerie], which writes the +/// contents of a `Module` out as a native object file. +/// +/// [cranelift-simplejit]: https://docs.rs/cranelift-simplejit/ +/// [cranelift-object]: https://docs.rs/cranelift-object/ +/// [cranelift-faerie]: https://docs.rs/cranelift-faerie/ +pub trait Backend +where + Self: marker::Sized, +{ + /// A builder for constructing `Backend` instances. + type Builder; + + /// The results of compiling a function. + type CompiledFunction; + + /// The results of "compiling" a data object. + type CompiledData; + + /// The completed output artifact for a function, if this is meaningful for + /// the `Backend`. + type FinalizedFunction; + + /// The completed output artifact for a data object, if this is meaningful for + /// the `Backend`. + type FinalizedData; + + /// This is an object returned by `Module`'s + /// [`finish`](struct.Module.html#method.finish) function, + /// if the `Backend` has a purpose for this. + type Product; + + /// Create a new `Backend` instance. + fn new(_: Self::Builder) -> Self; + + /// Return the `TargetIsa` to compile for. + fn isa(&self) -> &dyn TargetIsa; + + /// Declare a function. + fn declare_function(&mut self, id: FuncId, name: &str, linkage: Linkage); + + /// Declare a data object. + fn declare_data( + &mut self, + id: DataId, + name: &str, + linkage: Linkage, + writable: bool, + tls: bool, + align: Option, + ); + + /// Define a function, producing the function body from the given `Context`. + /// + /// Functions must be declared before being defined. + fn define_function( + &mut self, + id: FuncId, + name: &str, + ctx: &Context, + namespace: &ModuleNamespace, + code_size: u32, + ) -> ModuleResult; + + /// Define a function, taking the function body from the given `bytes`. + /// + /// Functions must be declared before being defined. + fn define_function_bytes( + &mut self, + id: FuncId, + name: &str, + bytes: &[u8], + namespace: &ModuleNamespace, + traps: Vec, + ) -> ModuleResult; + + /// Define a zero-initialized data object of the given size. + /// + /// Data objects must be declared before being defined. + fn define_data( + &mut self, + id: DataId, + name: &str, + writable: bool, + tls: bool, + align: Option, + data_ctx: &DataContext, + namespace: &ModuleNamespace, + ) -> ModuleResult; + + /// Write the address of `what` into the data for `data` at `offset`. `data` must refer to a + /// defined data object. + fn write_data_funcaddr( + &mut self, + data: &mut Self::CompiledData, + offset: usize, + what: ir::FuncRef, + ); + + /// Write the address of `what` plus `addend` into the data for `data` at `offset`. `data` must + /// refer to a defined data object. + fn write_data_dataaddr( + &mut self, + data: &mut Self::CompiledData, + offset: usize, + what: ir::GlobalValue, + addend: binemit::Addend, + ); + + /// Perform all outstanding relocations on the given function. This requires all `Local` + /// and `Export` entities referenced to be defined. + /// + /// This method is not relevant for `Backend` implementations that do not provide + /// `Backend::FinalizedFunction`. + fn finalize_function( + &mut self, + id: FuncId, + func: &Self::CompiledFunction, + namespace: &ModuleNamespace, + ) -> Self::FinalizedFunction; + + /// Return the finalized artifact from the backend, if relevant. + fn get_finalized_function(&self, func: &Self::CompiledFunction) -> Self::FinalizedFunction; + + /// Perform all outstanding relocations on the given data object. This requires all + /// `Local` and `Export` entities referenced to be defined. + /// + /// This method is not relevant for `Backend` implementations that do not provide + /// `Backend::FinalizedData`. + fn finalize_data( + &mut self, + id: DataId, + data: &Self::CompiledData, + namespace: &ModuleNamespace, + ) -> Self::FinalizedData; + + /// Return the finalized artifact from the backend, if relevant. + fn get_finalized_data(&self, data: &Self::CompiledData) -> Self::FinalizedData; + + /// "Publish" all finalized functions and data objects to their ultimate destinations. + /// + /// This method is not relevant for `Backend` implementations that do not provide + /// `Backend::FinalizedFunction` or `Backend::FinalizedData`. + fn publish(&mut self); + + /// Consume this `Backend` and return a result. Some implementations may + /// provide additional functionality through this result. + fn finish(self, namespace: &ModuleNamespace) -> Self::Product; +} + +/// Default names for `ir::LibCall`s. A function by this name is imported into the object as +/// part of the translation of a `ir::ExternalName::LibCall` variant. +pub fn default_libcall_names() -> Box String> { + Box::new(move |libcall| match libcall { + ir::LibCall::Probestack => "__cranelift_probestack".to_owned(), + ir::LibCall::CeilF32 => "ceilf".to_owned(), + ir::LibCall::CeilF64 => "ceil".to_owned(), + ir::LibCall::FloorF32 => "floorf".to_owned(), + ir::LibCall::FloorF64 => "floor".to_owned(), + ir::LibCall::TruncF32 => "truncf".to_owned(), + ir::LibCall::TruncF64 => "trunc".to_owned(), + ir::LibCall::NearestF32 => "nearbyintf".to_owned(), + ir::LibCall::NearestF64 => "nearbyint".to_owned(), + ir::LibCall::Memcpy => "memcpy".to_owned(), + ir::LibCall::Memset => "memset".to_owned(), + ir::LibCall::Memmove => "memmove".to_owned(), + + ir::LibCall::ElfTlsGetAddr => "__tls_get_addr".to_owned(), + }) +} diff --git a/cranelift/module/src/data_context.rs b/cranelift/module/src/data_context.rs new file mode 100644 index 0000000000..94dbeb0b98 --- /dev/null +++ b/cranelift/module/src/data_context.rs @@ -0,0 +1,198 @@ +//! Defines `DataContext`. + +use cranelift_codegen::binemit::{Addend, CodeOffset}; +use cranelift_codegen::entity::PrimaryMap; +use cranelift_codegen::ir; +use std::boxed::Box; +use std::vec::Vec; + +/// This specifies how data is to be initialized. +#[derive(PartialEq, Eq, Debug)] +pub enum Init { + /// This indicates that no initialization has been specified yet. + Uninitialized, + /// Initialize the data with all zeros. + Zeros { + /// The size of the data. + size: usize, + }, + /// Initialize the data with the specified contents. + Bytes { + /// The contents, which also implies the size of the data. + contents: Box<[u8]>, + }, +} + +impl Init { + /// Return the size of the data to be initialized. + pub fn size(&self) -> usize { + match *self { + Self::Uninitialized => panic!("data size not initialized yet"), + Self::Zeros { size } => size, + Self::Bytes { ref contents } => contents.len(), + } + } +} + +/// A description of a data object. +pub struct DataDescription { + /// How the data should be initialized. + pub init: Init, + /// External function declarations. + pub function_decls: PrimaryMap, + /// External data object declarations. + pub data_decls: PrimaryMap, + /// Function addresses to write at specified offsets. + pub function_relocs: Vec<(CodeOffset, ir::FuncRef)>, + /// Data addresses to write at specified offsets. + pub data_relocs: Vec<(CodeOffset, ir::GlobalValue, Addend)>, +} + +/// This is to data objects what cranelift_codegen::Context is to functions. +pub struct DataContext { + description: DataDescription, +} + +impl DataContext { + /// Allocate a new context. + pub fn new() -> Self { + Self { + description: DataDescription { + init: Init::Uninitialized, + function_decls: PrimaryMap::new(), + data_decls: PrimaryMap::new(), + function_relocs: vec![], + data_relocs: vec![], + }, + } + } + + /// Clear all data structures in this context. + pub fn clear(&mut self) { + self.description.init = Init::Uninitialized; + self.description.function_decls.clear(); + self.description.data_decls.clear(); + self.description.function_relocs.clear(); + self.description.data_relocs.clear(); + } + + /// Define a zero-initialized object with the given size. + pub fn define_zeroinit(&mut self, size: usize) { + debug_assert_eq!(self.description.init, Init::Uninitialized); + self.description.init = Init::Zeros { size }; + } + + /// Define an object initialized with the given contents. + /// + /// TODO: Can we avoid a Box here? + pub fn define(&mut self, contents: Box<[u8]>) { + debug_assert_eq!(self.description.init, Init::Uninitialized); + self.description.init = Init::Bytes { contents }; + } + + /// Declare an external function import. + /// + /// Users of the `Module` API generally should call + /// `Module::declare_func_in_data` instead, as it takes care of generating + /// the appropriate `ExternalName`. + pub fn import_function(&mut self, name: ir::ExternalName) -> ir::FuncRef { + self.description.function_decls.push(name) + } + + /// Declares a global value import. + /// + /// TODO: Rename to import_data? + /// + /// Users of the `Module` API generally should call + /// `Module::declare_data_in_data` instead, as it takes care of generating + /// the appropriate `ExternalName`. + pub fn import_global_value(&mut self, name: ir::ExternalName) -> ir::GlobalValue { + self.description.data_decls.push(name) + } + + /// Write the address of `func` into the data at offset `offset`. + pub fn write_function_addr(&mut self, offset: CodeOffset, func: ir::FuncRef) { + self.description.function_relocs.push((offset, func)) + } + + /// Write the address of `data` into the data at offset `offset`. + pub fn write_data_addr(&mut self, offset: CodeOffset, data: ir::GlobalValue, addend: Addend) { + self.description.data_relocs.push((offset, data, addend)) + } + + /// Reference the initializer data. + pub fn description(&self) -> &DataDescription { + debug_assert!( + self.description.init != Init::Uninitialized, + "data must be initialized first" + ); + &self.description + } +} + +#[cfg(test)] +mod tests { + use super::{DataContext, Init}; + use cranelift_codegen::ir; + + #[test] + fn basic_data_context() { + let mut data_ctx = DataContext::new(); + { + let description = &data_ctx.description; + assert_eq!(description.init, Init::Uninitialized); + assert!(description.function_decls.is_empty()); + assert!(description.data_decls.is_empty()); + assert!(description.function_relocs.is_empty()); + assert!(description.data_relocs.is_empty()); + } + + data_ctx.define_zeroinit(256); + + let _func_a = data_ctx.import_function(ir::ExternalName::user(0, 0)); + let func_b = data_ctx.import_function(ir::ExternalName::user(0, 1)); + let func_c = data_ctx.import_function(ir::ExternalName::user(1, 0)); + let _data_a = data_ctx.import_global_value(ir::ExternalName::user(2, 2)); + let data_b = data_ctx.import_global_value(ir::ExternalName::user(2, 3)); + + data_ctx.write_function_addr(8, func_b); + data_ctx.write_function_addr(16, func_c); + data_ctx.write_data_addr(32, data_b, 27); + + { + let description = data_ctx.description(); + assert_eq!(description.init, Init::Zeros { size: 256 }); + assert_eq!(description.function_decls.len(), 3); + assert_eq!(description.data_decls.len(), 2); + assert_eq!(description.function_relocs.len(), 2); + assert_eq!(description.data_relocs.len(), 1); + } + + data_ctx.clear(); + { + let description = &data_ctx.description; + assert_eq!(description.init, Init::Uninitialized); + assert!(description.function_decls.is_empty()); + assert!(description.data_decls.is_empty()); + assert!(description.function_relocs.is_empty()); + assert!(description.data_relocs.is_empty()); + } + + let contents = vec![33, 34, 35, 36]; + let contents_clone = contents.clone(); + data_ctx.define(contents.into_boxed_slice()); + { + let description = data_ctx.description(); + assert_eq!( + description.init, + Init::Bytes { + contents: contents_clone.into_boxed_slice() + } + ); + assert_eq!(description.function_decls.len(), 0); + assert_eq!(description.data_decls.len(), 0); + assert_eq!(description.function_relocs.len(), 0); + assert_eq!(description.data_relocs.len(), 0); + } + } +} diff --git a/cranelift/module/src/lib.rs b/cranelift/module/src/lib.rs new file mode 100644 index 0000000000..25a2759be4 --- /dev/null +++ b/cranelift/module/src/lib.rs @@ -0,0 +1,49 @@ +//! Top-level lib.rs for `cranelift_module`. + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "std", deny(unstable_features))] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +#[cfg(not(feature = "std"))] +#[macro_use] +extern crate alloc as std; +#[cfg(feature = "std")] +#[macro_use] +extern crate std; + +#[cfg(not(feature = "std"))] +use hashbrown::{hash_map, HashMap}; +#[cfg(feature = "std")] +use std::collections::{hash_map, HashMap}; + +mod backend; +mod data_context; +mod module; +mod traps; + +pub use crate::backend::{default_libcall_names, Backend}; +pub use crate::data_context::{DataContext, DataDescription, Init}; +pub use crate::module::{ + DataId, FuncId, FuncOrDataId, Linkage, Module, ModuleError, ModuleFunction, ModuleNamespace, + ModuleResult, +}; +pub use crate::traps::TrapSite; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/module/src/module.rs b/cranelift/module/src/module.rs new file mode 100644 index 0000000000..5df095b610 --- /dev/null +++ b/cranelift/module/src/module.rs @@ -0,0 +1,782 @@ +//! Defines `Module` and related types. + +// TODO: Should `ir::Function` really have a `name`? + +// TODO: Factor out `ir::Function`'s `ext_funcs` and `global_values` into a struct +// shared with `DataContext`? + +use super::HashMap; +use crate::data_context::DataContext; +use crate::traps::TrapSite; +use crate::Backend; +use cranelift_codegen::binemit::{self, CodeInfo}; +use cranelift_codegen::entity::{entity_impl, PrimaryMap}; +use cranelift_codegen::{ir, isa, CodegenError, Context}; +use log::info; +use std::borrow::ToOwned; +use std::convert::TryInto; +use std::string::String; +use std::vec::Vec; +use thiserror::Error; + +/// A function identifier for use in the `Module` interface. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct FuncId(u32); +entity_impl!(FuncId, "funcid"); + +/// Function identifiers are namespace 0 in `ir::ExternalName` +impl From for ir::ExternalName { + fn from(id: FuncId) -> Self { + Self::User { + namespace: 0, + index: id.0, + } + } +} + +/// A data object identifier for use in the `Module` interface. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct DataId(u32); +entity_impl!(DataId, "dataid"); + +/// Data identifiers are namespace 1 in `ir::ExternalName` +impl From for ir::ExternalName { + fn from(id: DataId) -> Self { + Self::User { + namespace: 1, + index: id.0, + } + } +} + +/// Linkage refers to where an entity is defined and who can see it. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum Linkage { + /// Defined outside of a module. + Import, + /// Defined inside the module, but not visible outside it. + Local, + /// Defined inside the module, visible outside it, and may be preempted. + Preemptible, + /// Defined inside the module, and visible outside it. + Export, +} + +impl Linkage { + fn merge(a: Self, b: Self) -> Self { + match a { + Self::Export => Self::Export, + Self::Preemptible => match b { + Self::Export => Self::Export, + _ => Self::Preemptible, + }, + Self::Local => match b { + Self::Export => Self::Export, + Self::Preemptible => Self::Preemptible, + _ => Self::Local, + }, + Self::Import => b, + } + } + + /// Test whether this linkage can have a definition. + pub fn is_definable(self) -> bool { + match self { + Self::Import => false, + Self::Local | Self::Preemptible | Self::Export => true, + } + } + + /// Test whether this linkage will have a definition that cannot be preempted. + pub fn is_final(self) -> bool { + match self { + Self::Import | Self::Preemptible => false, + Self::Local | Self::Export => true, + } + } +} + +/// A declared name may refer to either a function or data declaration +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub enum FuncOrDataId { + /// When it's a FuncId + Func(FuncId), + /// When it's a DataId + Data(DataId), +} + +/// Mapping to `ir::ExternalName` is trivial based on the `FuncId` and `DataId` mapping. +impl From for ir::ExternalName { + fn from(id: FuncOrDataId) -> Self { + match id { + FuncOrDataId::Func(funcid) => Self::from(funcid), + FuncOrDataId::Data(dataid) => Self::from(dataid), + } + } +} + +/// Information about a function which can be called. +pub struct FunctionDeclaration { + pub name: String, + pub linkage: Linkage, + pub signature: ir::Signature, +} + +/// Error messages for all `Module` and `Backend` methods +#[derive(Error, Debug)] +pub enum ModuleError { + /// Indicates an identifier was used before it was declared + #[error("Undeclared identifier: {0}")] + Undeclared(String), + /// Indicates an identifier was used as data/function first, but then used as the other + #[error("Incompatible declaration of identifier: {0}")] + IncompatibleDeclaration(String), + /// Indicates a function identifier was declared with a + /// different signature than declared previously + #[error("Function {0} signature {2:?} is incompatible with previous declaration {1:?}")] + IncompatibleSignature(String, ir::Signature, ir::Signature), + /// Indicates an identifier was defined more than once + #[error("Duplicate definition of identifier: {0}")] + DuplicateDefinition(String), + /// Indicates an identifier was defined, but was declared as an import + #[error("Invalid to define identifier declared as an import: {0}")] + InvalidImportDefinition(String), + /// Indicates a too-long function was defined + #[error("Function {0} exceeds the maximum function size")] + FunctionTooLarge(String), + /// Wraps a `cranelift-codegen` error + #[error("Compilation error: {0}")] + Compilation(#[from] CodegenError), + /// Wraps a generic error from a backend + #[error("Backend error: {0}")] + Backend(String), +} + +/// A convenient alias for a `Result` that uses `ModuleError` as the error type. +pub type ModuleResult = Result; + +/// A function belonging to a `Module`. +pub struct ModuleFunction +where + B: Backend, +{ + /// The function declaration. + pub decl: FunctionDeclaration, + /// The compiled artifact, once it's available. + pub compiled: Option, +} + +impl ModuleFunction +where + B: Backend, +{ + fn merge(&mut self, linkage: Linkage, sig: &ir::Signature) -> Result<(), ModuleError> { + self.decl.linkage = Linkage::merge(self.decl.linkage, linkage); + if &self.decl.signature != sig { + return Err(ModuleError::IncompatibleSignature( + self.decl.name.clone(), + self.decl.signature.clone(), + sig.clone(), + )); + } + Ok(()) + } +} + +/// Information about a data object which can be accessed. +pub struct DataDeclaration { + pub name: String, + pub linkage: Linkage, + pub writable: bool, + pub tls: bool, + pub align: Option, +} + +/// A data object belonging to a `Module`. +struct ModuleData +where + B: Backend, +{ + /// The data object declaration. + decl: DataDeclaration, + /// The "compiled" artifact, once it's available. + compiled: Option, +} + +impl ModuleData +where + B: Backend, +{ + fn merge(&mut self, linkage: Linkage, writable: bool, tls: bool, align: Option) { + self.decl.linkage = Linkage::merge(self.decl.linkage, linkage); + self.decl.writable = self.decl.writable || writable; + self.decl.align = self.decl.align.max(align); + assert_eq!( + self.decl.tls, tls, + "Can't change TLS data object to normal or in the opposite way", + ); + } +} + +/// The functions and data objects belonging to a module. +struct ModuleContents +where + B: Backend, +{ + functions: PrimaryMap>, + data_objects: PrimaryMap>, +} + +impl ModuleContents +where + B: Backend, +{ + fn get_function_id(&self, name: &ir::ExternalName) -> FuncId { + if let ir::ExternalName::User { namespace, index } = *name { + debug_assert_eq!(namespace, 0); + FuncId::from_u32(index) + } else { + panic!("unexpected ExternalName kind {}", name) + } + } + + fn get_data_id(&self, name: &ir::ExternalName) -> DataId { + if let ir::ExternalName::User { namespace, index } = *name { + debug_assert_eq!(namespace, 1); + DataId::from_u32(index) + } else { + panic!("unexpected ExternalName kind {}", name) + } + } + + fn get_function_info(&self, name: &ir::ExternalName) -> &ModuleFunction { + &self.functions[self.get_function_id(name)] + } + + /// Get the `DataDeclaration` for the function named by `name`. + fn get_data_info(&self, name: &ir::ExternalName) -> &ModuleData { + &self.data_objects[self.get_data_id(name)] + } +} + +/// This provides a view to the state of a module which allows `ir::ExternalName`s to be translated +/// into `FunctionDeclaration`s and `DataDeclaration`s. +pub struct ModuleNamespace<'a, B: 'a> +where + B: Backend, +{ + contents: &'a ModuleContents, +} + +impl<'a, B> ModuleNamespace<'a, B> +where + B: Backend, +{ + /// Get the `FuncId` for the function named by `name`. + pub fn get_function_id(&self, name: &ir::ExternalName) -> FuncId { + self.contents.get_function_id(name) + } + + /// Get the `DataId` for the data object named by `name`. + pub fn get_data_id(&self, name: &ir::ExternalName) -> DataId { + self.contents.get_data_id(name) + } + + /// Get the `FunctionDeclaration` for the function named by `name`. + pub fn get_function_decl(&self, name: &ir::ExternalName) -> &FunctionDeclaration { + &self.contents.get_function_info(name).decl + } + + /// Get the `DataDeclaration` for the data object named by `name`. + pub fn get_data_decl(&self, name: &ir::ExternalName) -> &DataDeclaration { + &self.contents.get_data_info(name).decl + } + + /// Get the definition for the function named by `name`, along with its name + /// and signature. + pub fn get_function_definition( + &self, + name: &ir::ExternalName, + ) -> (Option<&B::CompiledFunction>, &str, &ir::Signature) { + let info = self.contents.get_function_info(name); + debug_assert!( + !info.decl.linkage.is_definable() || info.compiled.is_some(), + "Finalization requires a definition for function {}.", + name, + ); + debug_assert_eq!(info.decl.linkage.is_definable(), info.compiled.is_some()); + + ( + info.compiled.as_ref(), + &info.decl.name, + &info.decl.signature, + ) + } + + /// Get the definition for the data object named by `name`, along with its name + /// and writable flag + pub fn get_data_definition( + &self, + name: &ir::ExternalName, + ) -> (Option<&B::CompiledData>, &str, bool) { + let info = self.contents.get_data_info(name); + debug_assert!( + !info.decl.linkage.is_definable() || info.compiled.is_some(), + "Finalization requires a definition for data object {}.", + name, + ); + debug_assert_eq!(info.decl.linkage.is_definable(), info.compiled.is_some()); + + (info.compiled.as_ref(), &info.decl.name, info.decl.writable) + } + + /// Return whether `name` names a function, rather than a data object. + pub fn is_function(&self, name: &ir::ExternalName) -> bool { + if let ir::ExternalName::User { namespace, .. } = *name { + namespace == 0 + } else { + panic!("unexpected ExternalName kind {}", name) + } + } +} + +/// A `Module` is a utility for collecting functions and data objects, and linking them together. +pub struct Module +where + B: Backend, +{ + names: HashMap, + contents: ModuleContents, + functions_to_finalize: Vec, + data_objects_to_finalize: Vec, + backend: B, +} + +impl Module +where + B: Backend, +{ + /// Create a new `Module`. + pub fn new(backend_builder: B::Builder) -> Self { + Self { + names: HashMap::new(), + contents: ModuleContents { + functions: PrimaryMap::new(), + data_objects: PrimaryMap::new(), + }, + functions_to_finalize: Vec::new(), + data_objects_to_finalize: Vec::new(), + backend: B::new(backend_builder), + } + } + + /// Get the module identifier for a given name, if that name + /// has been declared. + pub fn get_name(&self, name: &str) -> Option { + self.names.get(name).cloned() + } + + /// Return the target information needed by frontends to produce Cranelift IR + /// for the current target. + pub fn target_config(&self) -> isa::TargetFrontendConfig { + self.backend.isa().frontend_config() + } + + /// Create a new `Context` initialized for use with this `Module`. + /// + /// This ensures that the `Context` is initialized with the default calling + /// convention for the `TargetIsa`. + pub fn make_context(&self) -> Context { + let mut ctx = Context::new(); + ctx.func.signature.call_conv = self.backend.isa().default_call_conv(); + ctx + } + + /// Clear the given `Context` and reset it for use with a new function. + /// + /// This ensures that the `Context` is initialized with the default calling + /// convention for the `TargetIsa`. + pub fn clear_context(&self, ctx: &mut Context) { + ctx.clear(); + ctx.func.signature.call_conv = self.backend.isa().default_call_conv(); + } + + /// Create a new empty `Signature` with the default calling convention for + /// the `TargetIsa`, to which parameter and return types can be added for + /// declaring a function to be called by this `Module`. + pub fn make_signature(&self) -> ir::Signature { + ir::Signature::new(self.backend.isa().default_call_conv()) + } + + /// Clear the given `Signature` and reset for use with a new function. + /// + /// This ensures that the `Signature` is initialized with the default + /// calling convention for the `TargetIsa`. + pub fn clear_signature(&self, sig: &mut ir::Signature) { + sig.clear(self.backend.isa().default_call_conv()); + } + + /// Declare a function in this module. + pub fn declare_function( + &mut self, + name: &str, + linkage: Linkage, + signature: &ir::Signature, + ) -> ModuleResult { + // TODO: Can we avoid allocating names so often? + use super::hash_map::Entry::*; + match self.names.entry(name.to_owned()) { + Occupied(entry) => match *entry.get() { + FuncOrDataId::Func(id) => { + let existing = &mut self.contents.functions[id]; + existing.merge(linkage, signature)?; + self.backend + .declare_function(id, name, existing.decl.linkage); + Ok(id) + } + FuncOrDataId::Data(..) => { + Err(ModuleError::IncompatibleDeclaration(name.to_owned())) + } + }, + Vacant(entry) => { + let id = self.contents.functions.push(ModuleFunction { + decl: FunctionDeclaration { + name: name.to_owned(), + linkage, + signature: signature.clone(), + }, + compiled: None, + }); + entry.insert(FuncOrDataId::Func(id)); + self.backend.declare_function(id, name, linkage); + Ok(id) + } + } + } + + /// An iterator over functions that have been declared in this module. + pub fn declared_functions(&self) -> core::slice::Iter<'_, ModuleFunction> { + self.contents.functions.values() + } + + /// Declare a data object in this module. + pub fn declare_data( + &mut self, + name: &str, + linkage: Linkage, + writable: bool, + tls: bool, + align: Option, // An alignment bigger than 128 is unlikely + ) -> ModuleResult { + // TODO: Can we avoid allocating names so often? + use super::hash_map::Entry::*; + match self.names.entry(name.to_owned()) { + Occupied(entry) => match *entry.get() { + FuncOrDataId::Data(id) => { + let existing = &mut self.contents.data_objects[id]; + existing.merge(linkage, writable, tls, align); + self.backend.declare_data( + id, + name, + existing.decl.linkage, + existing.decl.writable, + existing.decl.tls, + existing.decl.align, + ); + Ok(id) + } + + FuncOrDataId::Func(..) => { + Err(ModuleError::IncompatibleDeclaration(name.to_owned())) + } + }, + Vacant(entry) => { + let id = self.contents.data_objects.push(ModuleData { + decl: DataDeclaration { + name: name.to_owned(), + linkage, + writable, + tls, + align, + }, + compiled: None, + }); + entry.insert(FuncOrDataId::Data(id)); + self.backend + .declare_data(id, name, linkage, writable, tls, align); + Ok(id) + } + } + } + + /// Use this when you're building the IR of a function to reference a function. + /// + /// TODO: Coalesce redundant decls and signatures. + /// TODO: Look into ways to reduce the risk of using a FuncRef in the wrong function. + pub fn declare_func_in_func(&self, func: FuncId, in_func: &mut ir::Function) -> ir::FuncRef { + let decl = &self.contents.functions[func].decl; + let signature = in_func.import_signature(decl.signature.clone()); + let colocated = decl.linkage.is_final(); + in_func.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(0, func.as_u32()), + signature, + colocated, + }) + } + + /// Use this when you're building the IR of a function to reference a data object. + /// + /// TODO: Same as above. + pub fn declare_data_in_func(&self, data: DataId, func: &mut ir::Function) -> ir::GlobalValue { + let decl = &self.contents.data_objects[data].decl; + let colocated = decl.linkage.is_final(); + func.create_global_value(ir::GlobalValueData::Symbol { + name: ir::ExternalName::user(1, data.as_u32()), + offset: ir::immediates::Imm64::new(0), + colocated, + tls: decl.tls, + }) + } + + /// TODO: Same as above. + pub fn declare_func_in_data(&self, func: FuncId, ctx: &mut DataContext) -> ir::FuncRef { + ctx.import_function(ir::ExternalName::user(0, func.as_u32())) + } + + /// TODO: Same as above. + pub fn declare_data_in_data(&self, data: DataId, ctx: &mut DataContext) -> ir::GlobalValue { + ctx.import_global_value(ir::ExternalName::user(1, data.as_u32())) + } + + /// Define a function, producing the function body from the given `Context`. + /// + /// Returns the size of the function's code and constant data. + /// + /// Note: After calling this function the given `Context` will contain the compiled function. + pub fn define_function( + &mut self, + func: FuncId, + ctx: &mut Context, + ) -> ModuleResult { + info!( + "defining function {}: {}", + func, + ctx.func.display(self.backend.isa()) + ); + let CodeInfo { total_size, .. } = ctx.compile(self.backend.isa())?; + let info = &self.contents.functions[func]; + if info.compiled.is_some() { + return Err(ModuleError::DuplicateDefinition(info.decl.name.clone())); + } + if !info.decl.linkage.is_definable() { + return Err(ModuleError::InvalidImportDefinition(info.decl.name.clone())); + } + + let compiled = Some(self.backend.define_function( + func, + &info.decl.name, + ctx, + &ModuleNamespace:: { + contents: &self.contents, + }, + total_size, + )?); + + self.contents.functions[func].compiled = compiled; + self.functions_to_finalize.push(func); + Ok(total_size) + } + + /// Define a function, taking the function body from the given `bytes`. + /// + /// This function is generally only useful if you need to precisely specify + /// the emitted instructions for some reason; otherwise, you should use + /// `define_function`. + /// + /// Returns the size of the function's code. + pub fn define_function_bytes( + &mut self, + func: FuncId, + bytes: &[u8], + traps: Vec, + ) -> ModuleResult { + info!("defining function {} with bytes", func); + let info = &self.contents.functions[func]; + if info.compiled.is_some() { + return Err(ModuleError::DuplicateDefinition(info.decl.name.clone())); + } + if !info.decl.linkage.is_definable() { + return Err(ModuleError::InvalidImportDefinition(info.decl.name.clone())); + } + + let total_size: u32 = match bytes.len().try_into() { + Ok(total_size) => total_size, + _ => Err(ModuleError::FunctionTooLarge(info.decl.name.clone()))?, + }; + + let compiled = Some(self.backend.define_function_bytes( + func, + &info.decl.name, + bytes, + &ModuleNamespace:: { + contents: &self.contents, + }, + traps, + )?); + + self.contents.functions[func].compiled = compiled; + self.functions_to_finalize.push(func); + Ok(total_size) + } + + /// Define a data object, producing the data contents from the given `DataContext`. + pub fn define_data(&mut self, data: DataId, data_ctx: &DataContext) -> ModuleResult<()> { + let compiled = { + let info = &self.contents.data_objects[data]; + if info.compiled.is_some() { + return Err(ModuleError::DuplicateDefinition(info.decl.name.clone())); + } + if !info.decl.linkage.is_definable() { + return Err(ModuleError::InvalidImportDefinition(info.decl.name.clone())); + } + Some(self.backend.define_data( + data, + &info.decl.name, + info.decl.writable, + info.decl.tls, + info.decl.align, + data_ctx, + &ModuleNamespace:: { + contents: &self.contents, + }, + )?) + }; + self.contents.data_objects[data].compiled = compiled; + self.data_objects_to_finalize.push(data); + Ok(()) + } + + /// Write the address of `what` into the data for `data` at `offset`. `data` must refer to a + /// defined data object. + pub fn write_data_funcaddr(&mut self, data: DataId, offset: usize, what: ir::FuncRef) { + let info = &mut self.contents.data_objects[data]; + debug_assert!( + info.decl.linkage.is_definable(), + "imported data cannot contain references" + ); + self.backend.write_data_funcaddr( + &mut info + .compiled + .as_mut() + .expect("`data` must refer to a defined data object"), + offset, + what, + ); + } + + /// Write the address of `what` plus `addend` into the data for `data` at `offset`. `data` must + /// refer to a defined data object. + pub fn write_data_dataaddr( + &mut self, + data: DataId, + offset: usize, + what: ir::GlobalValue, + addend: binemit::Addend, + ) { + let info = &mut self.contents.data_objects[data]; + debug_assert!( + info.decl.linkage.is_definable(), + "imported data cannot contain references" + ); + self.backend.write_data_dataaddr( + &mut info + .compiled + .as_mut() + .expect("`data` must refer to a defined data object"), + offset, + what, + addend, + ); + } + + /// Finalize all functions and data objects that are defined but not yet finalized. + /// All symbols referenced in their bodies that are declared as needing a definition + /// must be defined by this point. + /// + /// Use `get_finalized_function` and `get_finalized_data` to obtain the final + /// artifacts. + /// + /// This method is not relevant for `Backend` implementations that do not provide + /// `Backend::FinalizedFunction` or `Backend::FinalizedData`. + pub fn finalize_definitions(&mut self) { + for func in self.functions_to_finalize.drain(..) { + let info = &self.contents.functions[func]; + debug_assert!(info.decl.linkage.is_definable()); + self.backend.finalize_function( + func, + info.compiled + .as_ref() + .expect("function must be compiled before it can be finalized"), + &ModuleNamespace:: { + contents: &self.contents, + }, + ); + } + for data in self.data_objects_to_finalize.drain(..) { + let info = &self.contents.data_objects[data]; + debug_assert!(info.decl.linkage.is_definable()); + self.backend.finalize_data( + data, + info.compiled + .as_ref() + .expect("data object must be compiled before it can be finalized"), + &ModuleNamespace:: { + contents: &self.contents, + }, + ); + } + self.backend.publish(); + } + + /// Return the finalized artifact from the backend, if it provides one. + pub fn get_finalized_function(&mut self, func: FuncId) -> B::FinalizedFunction { + let info = &self.contents.functions[func]; + debug_assert!( + !self.functions_to_finalize.iter().any(|x| *x == func), + "function not yet finalized" + ); + self.backend.get_finalized_function( + info.compiled + .as_ref() + .expect("function must be compiled before it can be finalized"), + ) + } + + /// Return the finalized artifact from the backend, if it provides one. + pub fn get_finalized_data(&mut self, data: DataId) -> B::FinalizedData { + let info = &self.contents.data_objects[data]; + debug_assert!( + !self.data_objects_to_finalize.iter().any(|x| *x == data), + "data object not yet finalized" + ); + self.backend.get_finalized_data( + info.compiled + .as_ref() + .expect("data object must be compiled before it can be finalized"), + ) + } + + /// Return the target isa + pub fn isa(&self) -> &dyn isa::TargetIsa { + self.backend.isa() + } + + /// Consume the module and return the resulting `Product`. Some `Backend` + /// implementations may provide additional functionality available after + /// a `Module` is complete. + pub fn finish(self) -> B::Product { + self.backend.finish(&ModuleNamespace:: { + contents: &self.contents, + }) + } +} diff --git a/cranelift/module/src/traps.rs b/cranelift/module/src/traps.rs new file mode 100644 index 0000000000..2344d4189b --- /dev/null +++ b/cranelift/module/src/traps.rs @@ -0,0 +1,14 @@ +//! Defines `TrapSite`. + +use cranelift_codegen::{binemit, ir}; + +/// Record of the arguments cranelift passes to `TrapSink::trap`. +#[derive(Clone, Debug)] +pub struct TrapSite { + /// Offset into function. + pub offset: binemit::CodeOffset, + /// Source location given to cranelift. + pub srcloc: ir::SourceLoc, + /// Trap code, as determined by cranelift. + pub code: ir::TrapCode, +} diff --git a/cranelift/native/Cargo.toml b/cranelift/native/Cargo.toml new file mode 100644 index 0000000000..ae571edffe --- /dev/null +++ b/cranelift/native/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "cranelift-native" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "Support for targeting the host with Cranelift" +repository = "https://github.com/bytecodealliance/cranelift" +categories = ["no-std"] +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0", default-features = false } +target-lexicon = "0.10" + +[target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] +raw-cpuid = "7.0.3" + +[features] +default = ["std"] +std = ["cranelift-codegen/std"] +# when compiling with the "core" feature, nightly must be enabled +# enabling the "nightly" feature for raw-cpuid allows avoiding +# linking in a c-library. +core = ["cranelift-codegen/core", "raw-cpuid/nightly"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/native/LICENSE b/cranelift/native/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/native/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/native/README.md b/cranelift/native/README.md new file mode 100644 index 0000000000..10f01bc906 --- /dev/null +++ b/cranelift/native/README.md @@ -0,0 +1,3 @@ +This crate performs autodetection of the host architecture, which can be used to +configure [Cranelift](https://crates.io/crates/cranelift) to generate code +specialized for the machine it's running on. diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs new file mode 100644 index 0000000000..9ad873e166 --- /dev/null +++ b/cranelift/native/src/lib.rs @@ -0,0 +1,129 @@ +//! Performs autodetection of the host for the purposes of running +//! Cranelift to generate code to run on the same machine. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +use cranelift_codegen::isa; +use target_lexicon::Triple; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use raw_cpuid::CpuId; + +/// Return an `isa` builder configured for the current host +/// machine, or `Err(())` if the host machine is not supported +/// in the current configuration. +pub fn builder() -> Result { + let mut isa_builder = isa::lookup(Triple::host()).map_err(|err| match err { + isa::LookupError::SupportDisabled => "support for architecture disabled at compile time", + isa::LookupError::Unsupported => "unsupported architecture", + })?; + + if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { + parse_x86_cpuid(&mut isa_builder)?; + } + + Ok(isa_builder) +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn parse_x86_cpuid(isa_builder: &mut isa::Builder) -> Result<(), &'static str> { + use cranelift_codegen::settings::Configurable; + let cpuid = CpuId::new(); + + if let Some(info) = cpuid.get_feature_info() { + if !info.has_sse2() { + return Err("x86 support requires SSE2"); + } + if info.has_sse3() { + isa_builder.enable("has_sse3").unwrap(); + } + if info.has_ssse3() { + isa_builder.enable("has_ssse3").unwrap(); + } + if info.has_sse41() { + isa_builder.enable("has_sse41").unwrap(); + } + if info.has_sse42() { + isa_builder.enable("has_sse42").unwrap(); + } + if info.has_popcnt() { + isa_builder.enable("has_popcnt").unwrap(); + } + if info.has_avx() { + isa_builder.enable("has_avx").unwrap(); + } + } + if let Some(info) = cpuid.get_extended_feature_info() { + if info.has_bmi1() { + isa_builder.enable("has_bmi1").unwrap(); + } + if info.has_bmi2() { + isa_builder.enable("has_bmi2").unwrap(); + } + } + if let Some(info) = cpuid.get_extended_function_info() { + if info.has_lzcnt() { + isa_builder.enable("has_lzcnt").unwrap(); + } + } + Ok(()) +} + +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +fn parse_x86_cpuid(_isa_builder: &mut isa::Builder) -> Result<(), &'static str> { + unreachable!(); +} + +#[cfg(test)] +mod tests { + use super::builder; + use cranelift_codegen::isa::CallConv; + use cranelift_codegen::settings; + + #[test] + fn test() { + if let Ok(isa_builder) = builder() { + let flag_builder = settings::builder(); + let isa = isa_builder.finish(settings::Flags::new(flag_builder)); + if cfg!(any(unix, target_os = "nebulet")) { + assert_eq!(isa.default_call_conv(), CallConv::SystemV); + } else if cfg!(windows) { + assert_eq!(isa.default_call_conv(), CallConv::WindowsFastcall); + } + if cfg!(target_pointer_width = "64") { + assert_eq!(isa.pointer_bits(), 64); + } + if cfg!(target_pointer_width = "32") { + assert_eq!(isa.pointer_bits(), 32); + } + if cfg!(target_pointer_width = "16") { + assert_eq!(isa.pointer_bits(), 16); + } + } + } +} + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/object/Cargo.toml b/cranelift/object/Cargo.toml new file mode 100644 index 0000000000..2ec0e3902c --- /dev/null +++ b/cranelift/object/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "cranelift-object" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "Emit Cranelift output to native object files with `object`" +repository = "https://github.com/bytecodealliance/cranelift" +documentation = "https://cranelift.readthedocs.io/" +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-module = { path = "../module", version = "0.59.0" } +object = { version = "0.17", default-features = false, features = ["write"] } +target-lexicon = "0.10" +goblin = "0.1.0" + +[dependencies.cranelift-codegen] +path = "../codegen" +version = "0.59.0" +default-features = false +features = ["std"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/object/LICENSE b/cranelift/object/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/object/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/object/README.md b/cranelift/object/README.md new file mode 100644 index 0000000000..3a0fc51356 --- /dev/null +++ b/cranelift/object/README.md @@ -0,0 +1,4 @@ +This crate contains a library that enables +[Cranelift](https://crates.io/crates/cranelift) +to emit native object (".o") files, using the +[object](https://crates.io/crates/object) library. diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs new file mode 100644 index 0000000000..dae26ea230 --- /dev/null +++ b/cranelift/object/src/backend.rs @@ -0,0 +1,640 @@ +//! Defines `ObjectBackend`. + +use crate::traps::ObjectTrapSink; +use cranelift_codegen::binemit::{ + Addend, CodeOffset, NullStackmapSink, NullTrapSink, Reloc, RelocSink, +}; +use cranelift_codegen::entity::SecondaryMap; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::{self, binemit, ir}; +use cranelift_module::{ + Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleNamespace, + ModuleResult, TrapSite, +}; +use object::write::{ + Object, Relocation, SectionId, StandardSection, Symbol, SymbolId, SymbolSection, +}; +use object::{RelocationEncoding, RelocationKind, SymbolFlags, SymbolKind, SymbolScope}; +use std::collections::HashMap; +use std::mem; +use target_lexicon::{BinaryFormat, PointerWidth}; + +#[derive(Debug)] +/// Setting to enable collection of traps. Setting this to `Enabled` in +/// `ObjectBuilder` means that `ObjectProduct` will contains trap sites. +pub enum ObjectTrapCollection { + /// `ObjectProduct::traps` will be empty + Disabled, + /// `ObjectProduct::traps` will contain trap sites + Enabled, +} + +/// A builder for `ObjectBackend`. +pub struct ObjectBuilder { + isa: Box, + name: Vec, + collect_traps: ObjectTrapCollection, + libcall_names: Box String>, + function_alignment: u64, +} + +impl ObjectBuilder { + /// Create a new `ObjectBuilder` using the given Cranelift target, that + /// can be passed to [`Module::new`](cranelift_module::Module::new). + /// + /// `collect_traps` setting determines whether trap information is collected in the + /// `ObjectProduct`. + /// + /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` + /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain + /// floating point instructions, and for stack probes. If you don't know what to use for this + /// argument, use `cranelift_module::default_libcall_names()`. + pub fn new>>( + isa: Box, + name: V, + collect_traps: ObjectTrapCollection, + libcall_names: Box String>, + ) -> Self { + Self { + isa, + name: name.into(), + collect_traps, + libcall_names, + function_alignment: 1, + } + } + + /// Set the alignment used for functions. + pub fn function_alignment(&mut self, alignment: u64) -> &mut Self { + self.function_alignment = alignment; + self + } +} + +/// A `ObjectBackend` implements `Backend` and emits ".o" files using the `object` library. +/// +/// See the `ObjectBuilder` for a convenient way to construct `ObjectBackend` instances. +pub struct ObjectBackend { + isa: Box, + object: Object, + functions: SecondaryMap>, + data_objects: SecondaryMap>, + traps: SecondaryMap>, + relocs: Vec, + libcalls: HashMap, + libcall_names: Box String>, + collect_traps: ObjectTrapCollection, + function_alignment: u64, +} + +impl Backend for ObjectBackend { + type Builder = ObjectBuilder; + + type CompiledFunction = ObjectCompiledFunction; + type CompiledData = ObjectCompiledData; + + // There's no need to return individual artifacts; we're writing them into + // the output file instead. + type FinalizedFunction = (); + type FinalizedData = (); + + type Product = ObjectProduct; + + /// Create a new `ObjectBackend` using the given Cranelift target. + fn new(builder: ObjectBuilder) -> Self { + let triple = builder.isa.triple(); + let mut object = Object::new(triple.binary_format, triple.architecture); + object.add_file_symbol(builder.name); + Self { + isa: builder.isa, + object, + functions: SecondaryMap::new(), + data_objects: SecondaryMap::new(), + traps: SecondaryMap::new(), + relocs: Vec::new(), + libcalls: HashMap::new(), + libcall_names: builder.libcall_names, + collect_traps: builder.collect_traps, + function_alignment: builder.function_alignment, + } + } + + fn isa(&self) -> &dyn TargetIsa { + &*self.isa + } + + fn declare_function(&mut self, id: FuncId, name: &str, linkage: Linkage) { + let (scope, weak) = translate_linkage(linkage); + + if let Some(function) = self.functions[id] { + let symbol = self.object.symbol_mut(function); + symbol.scope = scope; + symbol.weak = weak; + } else { + let symbol_id = self.object.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope, + weak, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + self.functions[id] = Some(symbol_id); + } + } + + fn declare_data( + &mut self, + id: DataId, + name: &str, + linkage: Linkage, + _writable: bool, + tls: bool, + _align: Option, + ) { + let kind = if tls { + SymbolKind::Tls + } else { + SymbolKind::Data + }; + let (scope, weak) = translate_linkage(linkage); + + if let Some(data) = self.data_objects[id] { + let symbol = self.object.symbol_mut(data); + symbol.kind = kind; + symbol.scope = scope; + symbol.weak = weak; + } else { + let symbol_id = self.object.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: 0, + size: 0, + kind, + scope, + weak, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + self.data_objects[id] = Some(symbol_id); + } + } + + fn define_function( + &mut self, + func_id: FuncId, + _name: &str, + ctx: &cranelift_codegen::Context, + _namespace: &ModuleNamespace, + code_size: u32, + ) -> ModuleResult { + let mut code: Vec = vec![0; code_size as usize]; + let mut reloc_sink = ObjectRelocSink::new(self.object.format()); + let mut trap_sink = ObjectTrapSink::default(); + let mut stackmap_sink = NullStackmapSink {}; + + if let ObjectTrapCollection::Enabled = self.collect_traps { + unsafe { + ctx.emit_to_memory( + &*self.isa, + code.as_mut_ptr(), + &mut reloc_sink, + &mut trap_sink, + &mut stackmap_sink, + ) + }; + } else { + let mut trap_sink = NullTrapSink {}; + unsafe { + ctx.emit_to_memory( + &*self.isa, + code.as_mut_ptr(), + &mut reloc_sink, + &mut trap_sink, + &mut stackmap_sink, + ) + }; + } + + let symbol = self.functions[func_id].unwrap(); + let section = self.object.section_id(StandardSection::Text); + let offset = self + .object + .add_symbol_data(symbol, section, &code, self.function_alignment); + if !reloc_sink.relocs.is_empty() { + self.relocs.push(SymbolRelocs { + section, + offset, + relocs: reloc_sink.relocs, + }); + } + self.traps[func_id] = trap_sink.sites; + Ok(ObjectCompiledFunction) + } + + fn define_function_bytes( + &mut self, + func_id: FuncId, + _name: &str, + bytes: &[u8], + _namespace: &ModuleNamespace, + traps: Vec, + ) -> ModuleResult { + let symbol = self.functions[func_id].unwrap(); + let section = self.object.section_id(StandardSection::Text); + let _offset = self + .object + .add_symbol_data(symbol, section, bytes, self.function_alignment); + self.traps[func_id] = traps; + Ok(ObjectCompiledFunction) + } + + fn define_data( + &mut self, + data_id: DataId, + _name: &str, + writable: bool, + tls: bool, + align: Option, + data_ctx: &DataContext, + _namespace: &ModuleNamespace, + ) -> ModuleResult { + let &DataDescription { + ref init, + ref function_decls, + ref data_decls, + ref function_relocs, + ref data_relocs, + } = data_ctx.description(); + + let reloc_size = match self.isa.triple().pointer_width().unwrap() { + PointerWidth::U16 => 16, + PointerWidth::U32 => 32, + PointerWidth::U64 => 64, + }; + let mut relocs = Vec::new(); + for &(offset, id) in function_relocs { + relocs.push(RelocRecord { + offset, + name: function_decls[id].clone(), + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: reloc_size, + addend: 0, + }); + } + for &(offset, id, addend) in data_relocs { + relocs.push(RelocRecord { + offset, + name: data_decls[id].clone(), + kind: RelocationKind::Absolute, + encoding: RelocationEncoding::Generic, + size: reloc_size, + addend, + }); + } + + let symbol = self.data_objects[data_id].unwrap(); + let section_kind = if let Init::Zeros { .. } = *init { + if tls { + StandardSection::UninitializedTls + } else { + StandardSection::UninitializedData + } + } else if tls { + StandardSection::Tls + } else if writable { + StandardSection::Data + } else if relocs.is_empty() { + StandardSection::ReadOnlyData + } else { + StandardSection::ReadOnlyDataWithRel + }; + let section = self.object.section_id(section_kind); + + let align = u64::from(align.unwrap_or(1)); + let offset = match *init { + Init::Uninitialized => { + panic!("data is not initialized yet"); + } + Init::Zeros { size } => self + .object + .add_symbol_bss(symbol, section, size as u64, align), + Init::Bytes { ref contents } => self + .object + .add_symbol_data(symbol, section, &contents, align), + }; + if !relocs.is_empty() { + self.relocs.push(SymbolRelocs { + section, + offset, + relocs, + }); + } + Ok(ObjectCompiledData) + } + + fn write_data_funcaddr( + &mut self, + _data: &mut ObjectCompiledData, + _offset: usize, + _what: ir::FuncRef, + ) { + unimplemented!() + } + + fn write_data_dataaddr( + &mut self, + _data: &mut ObjectCompiledData, + _offset: usize, + _what: ir::GlobalValue, + _usize: binemit::Addend, + ) { + unimplemented!() + } + + fn finalize_function( + &mut self, + _id: FuncId, + _func: &ObjectCompiledFunction, + _namespace: &ModuleNamespace, + ) { + // Nothing to do. + } + + fn get_finalized_function(&self, _func: &ObjectCompiledFunction) { + // Nothing to do. + } + + fn finalize_data( + &mut self, + _id: DataId, + _data: &ObjectCompiledData, + _namespace: &ModuleNamespace, + ) { + // Nothing to do. + } + + fn get_finalized_data(&self, _data: &ObjectCompiledData) { + // Nothing to do. + } + + fn publish(&mut self) { + // Nothing to do. + } + + fn finish(mut self, namespace: &ModuleNamespace) -> ObjectProduct { + let mut symbol_relocs = Vec::new(); + mem::swap(&mut symbol_relocs, &mut self.relocs); + for symbol in symbol_relocs { + for &RelocRecord { + offset, + ref name, + kind, + encoding, + size, + addend, + } in &symbol.relocs + { + let target_symbol = self.get_symbol(namespace, name); + self.object + .add_relocation( + symbol.section, + Relocation { + offset: symbol.offset + u64::from(offset), + size, + kind, + encoding, + symbol: target_symbol, + addend, + }, + ) + .unwrap(); + } + } + + ObjectProduct { + object: self.object, + functions: self.functions, + data_objects: self.data_objects, + traps: self.traps, + } + } +} + +impl ObjectBackend { + // This should only be called during finish because it creates + // symbols for missing libcalls. + fn get_symbol( + &mut self, + namespace: &ModuleNamespace, + name: &ir::ExternalName, + ) -> SymbolId { + match *name { + ir::ExternalName::User { .. } => { + if namespace.is_function(name) { + let id = namespace.get_function_id(name); + self.functions[id].unwrap() + } else { + let id = namespace.get_data_id(name); + self.data_objects[id].unwrap() + } + } + ir::ExternalName::LibCall(ref libcall) => { + let name = (self.libcall_names)(*libcall); + if let Some(symbol) = self.object.symbol_id(name.as_bytes()) { + symbol + } else if let Some(symbol) = self.libcalls.get(libcall) { + *symbol + } else { + let symbol = self.object.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Unknown, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + self.libcalls.insert(*libcall, symbol); + symbol + } + } + _ => panic!("invalid ExternalName {}", name), + } + } +} + +fn translate_linkage(linkage: Linkage) -> (SymbolScope, bool) { + let scope = match linkage { + Linkage::Import => SymbolScope::Unknown, + Linkage::Local => SymbolScope::Compilation, + Linkage::Export | Linkage::Preemptible => SymbolScope::Dynamic, + }; + // TODO: this matches rustc_codegen_cranelift, but may be wrong. + let weak = linkage == Linkage::Preemptible; + (scope, weak) +} + +pub struct ObjectCompiledFunction; +pub struct ObjectCompiledData; + +/// This is the output of `Module`'s +/// [`finish`](../cranelift_module/struct.Module.html#method.finish) function. +/// It contains the generated `Object` and other information produced during +/// compilation. +pub struct ObjectProduct { + /// Object artifact with all functions and data from the module defined. + pub object: Object, + /// Symbol IDs for functions (both declared and defined). + pub functions: SecondaryMap>, + /// Symbol IDs for data objects (both declared and defined). + pub data_objects: SecondaryMap>, + /// Trap sites for defined functions. + pub traps: SecondaryMap>, +} + +impl ObjectProduct { + /// Return the `SymbolId` for the given function. + #[inline] + pub fn function_symbol(&self, id: FuncId) -> SymbolId { + self.functions[id].unwrap() + } + + /// Return the `SymbolId` for the given data object. + #[inline] + pub fn data_symbol(&self, id: DataId) -> SymbolId { + self.data_objects[id].unwrap() + } + + /// Write the object bytes in memory. + #[inline] + pub fn emit(self) -> Result, String> { + self.object.write() + } +} + +#[derive(Clone)] +struct SymbolRelocs { + section: SectionId, + offset: u64, + relocs: Vec, +} + +#[derive(Clone)] +struct RelocRecord { + offset: CodeOffset, + name: ir::ExternalName, + kind: RelocationKind, + encoding: RelocationEncoding, + size: u8, + addend: Addend, +} + +struct ObjectRelocSink { + format: BinaryFormat, + relocs: Vec, +} + +impl ObjectRelocSink { + fn new(format: BinaryFormat) -> Self { + Self { + format, + relocs: vec![], + } + } +} + +impl RelocSink for ObjectRelocSink { + fn reloc_block(&mut self, _offset: CodeOffset, _reloc: Reloc, _block_offset: CodeOffset) { + unimplemented!(); + } + + fn reloc_external( + &mut self, + offset: CodeOffset, + reloc: Reloc, + name: &ir::ExternalName, + mut addend: Addend, + ) { + let (kind, encoding, size) = match reloc { + Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32), + Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64), + Reloc::X86PCRel4 => (RelocationKind::Relative, RelocationEncoding::Generic, 32), + Reloc::X86CallPCRel4 => (RelocationKind::Relative, RelocationEncoding::X86Branch, 32), + // TODO: Get Cranelift to tell us when we can use + // R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX. + Reloc::X86CallPLTRel4 => ( + RelocationKind::PltRelative, + RelocationEncoding::X86Branch, + 32, + ), + Reloc::X86GOTPCRel4 => (RelocationKind::GotRelative, RelocationEncoding::Generic, 32), + + Reloc::ElfX86_64TlsGd => { + assert_eq!( + self.format, + BinaryFormat::Elf, + "ElfX86_64TlsGd is not supported for this file format" + ); + ( + RelocationKind::Elf(goblin::elf64::reloc::R_X86_64_TLSGD), + RelocationEncoding::Generic, + 32, + ) + } + Reloc::MachOX86_64Tlv => { + assert_eq!( + self.format, + BinaryFormat::Macho, + "MachOX86_64Tlv is not supported for this file format" + ); + addend += 4; // X86_64_RELOC_TLV has an implicit addend of -4 + ( + RelocationKind::MachO { + value: goblin::mach::relocation::X86_64_RELOC_TLV, + relative: true, + }, + RelocationEncoding::Generic, + 32, + ) + } + // FIXME + _ => unimplemented!(), + }; + self.relocs.push(RelocRecord { + offset, + name: name.clone(), + kind, + encoding, + size, + addend, + }); + } + + fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } + + fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } +} diff --git a/cranelift/object/src/lib.rs b/cranelift/object/src/lib.rs new file mode 100644 index 0000000000..1542c0a191 --- /dev/null +++ b/cranelift/object/src/lib.rs @@ -0,0 +1,35 @@ +//! Top-level lib.rs for `cranelift_object`. +//! +//! Users of this module should not have to depend on `object` directly. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +mod backend; +mod traps; + +pub use crate::backend::{ObjectBackend, ObjectBuilder, ObjectProduct, ObjectTrapCollection}; +pub use crate::traps::ObjectTrapSink; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/object/src/traps.rs b/cranelift/object/src/traps.rs new file mode 100644 index 0000000000..2fabfc626d --- /dev/null +++ b/cranelift/object/src/traps.rs @@ -0,0 +1,22 @@ +//! Records every `TrapCode` that cranelift outputs during code generation, +//! for every function in the module. This data may be useful at runtime. + +use cranelift_codegen::{binemit, ir}; +use cranelift_module::TrapSite; + +/// Record of the trap sites for a given function +#[derive(Default, Clone)] +pub struct ObjectTrapSink { + /// All trap sites collected in function + pub sites: Vec, +} + +impl binemit::TrapSink for ObjectTrapSink { + fn trap(&mut self, offset: binemit::CodeOffset, srcloc: ir::SourceLoc, code: ir::TrapCode) { + self.sites.push(TrapSite { + offset, + srcloc, + code, + }); + } +} diff --git a/cranelift/preopt/Cargo.toml b/cranelift/preopt/Cargo.toml new file mode 100644 index 0000000000..a62b990284 --- /dev/null +++ b/cranelift/preopt/Cargo.toml @@ -0,0 +1,28 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-preopt" +version = "0.59.0" +description = "Support for optimizations in Cranelift" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +categories = ["no-std"] +readme = "README.md" +keywords = ["optimize", "compile", "compiler", "jit"] +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.59.0" } +# This is commented out because it doesn't build on Rust 1.25.0, which +# cranelift currently supports. +# rustc_apfloat = { version = "0.1.2", default-features = false } + +[features] +default = ["std"] +std = ["cranelift-codegen/std"] +core = ["cranelift-codegen/core"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/preopt/LICENSE b/cranelift/preopt/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/preopt/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/preopt/README.md b/cranelift/preopt/README.md new file mode 100644 index 0000000000..1c4f04dc64 --- /dev/null +++ b/cranelift/preopt/README.md @@ -0,0 +1 @@ +This crate performs early-stage optimizations on [Cranelift](https://crates.io/crates/cranelift) IR. diff --git a/cranelift/preopt/src/constant_folding.rs b/cranelift/preopt/src/constant_folding.rs new file mode 100644 index 0000000000..40d597eddc --- /dev/null +++ b/cranelift/preopt/src/constant_folding.rs @@ -0,0 +1,257 @@ +//! Fold operations on constants at compile time. +#![allow(clippy::float_arithmetic)] + +use cranelift_codegen::{ + cursor::{Cursor, FuncCursor}, + ir::{self, dfg::ValueDef, InstBuilder}, +}; +// use rustc_apfloat::{ +// ieee::{Double, Single}, +// Float, +// }; + +enum ConstImm { + Bool(bool), + I64(i64), + Ieee32(f32), // Ieee32 and Ieee64 will be replaced with `Single` and `Double` from the rust_apfloat library eventually. + Ieee64(f64), +} + +impl ConstImm { + fn unwrap_i64(self) -> i64 { + if let Self::I64(imm) = self { + imm + } else { + panic!("self did not contain an `i64`.") + } + } + + fn evaluate_truthiness(self) -> bool { + match self { + Self::Bool(b) => b, + Self::I64(imm) => imm != 0, + _ => panic!( + "Only a `ConstImm::Bool` and `ConstImm::I64` can be evaluated for \"truthiness\"" + ), + } + } +} + +/// Fold operations on constants. +/// +/// It's important to note that this will not remove unused constants. It's +/// assumed that the DCE pass will take care of them. +pub fn fold_constants(func: &mut ir::Function) { + let mut pos = FuncCursor::new(func); + + while let Some(_block) = pos.next_block() { + while let Some(inst) = pos.next_inst() { + use self::ir::InstructionData::*; + match pos.func.dfg[inst] { + Binary { opcode, args } => { + fold_binary(&mut pos.func.dfg, inst, opcode, args); + } + Unary { opcode, arg } => { + fold_unary(&mut pos.func.dfg, inst, opcode, arg); + } + Branch { opcode, .. } => { + fold_branch(&mut pos, inst, opcode); + } + _ => {} + } + } + } +} + +fn resolve_value_to_imm(dfg: &ir::DataFlowGraph, value: ir::Value) -> Option { + let original = dfg.resolve_aliases(value); + + let inst = match dfg.value_def(original) { + ValueDef::Result(inst, _) => inst, + ValueDef::Param(_, _) => return None, + }; + + use self::ir::{InstructionData::*, Opcode::*}; + match dfg[inst] { + UnaryImm { + opcode: Iconst, + imm, + } => Some(ConstImm::I64(imm.into())), + UnaryIeee32 { + opcode: F32const, + imm, + } => { + // See https://doc.rust-lang.org/std/primitive.f32.html#method.from_bits for caveats. + let ieee_f32 = f32::from_bits(imm.bits()); + Some(ConstImm::Ieee32(ieee_f32)) + } + UnaryIeee64 { + opcode: F64const, + imm, + } => { + // See https://doc.rust-lang.org/std/primitive.f32.html#method.from_bits for caveats. + let ieee_f64 = f64::from_bits(imm.bits()); + Some(ConstImm::Ieee64(ieee_f64)) + } + UnaryBool { + opcode: Bconst, + imm, + } => Some(ConstImm::Bool(imm)), + _ => None, + } +} + +fn evaluate_binary(opcode: ir::Opcode, imm0: ConstImm, imm1: ConstImm) -> Option { + use core::num::Wrapping; + + match opcode { + ir::Opcode::Iadd => { + let imm0 = Wrapping(imm0.unwrap_i64()); + let imm1 = Wrapping(imm1.unwrap_i64()); + Some(ConstImm::I64((imm0 + imm1).0)) + } + ir::Opcode::Isub => { + let imm0 = Wrapping(imm0.unwrap_i64()); + let imm1 = Wrapping(imm1.unwrap_i64()); + Some(ConstImm::I64((imm0 - imm1).0)) + } + ir::Opcode::Imul => { + let imm0 = Wrapping(imm0.unwrap_i64()); + let imm1 = Wrapping(imm1.unwrap_i64()); + Some(ConstImm::I64((imm0 * imm1).0)) + } + ir::Opcode::Udiv => { + let imm0 = Wrapping(imm0.unwrap_i64()); + let imm1 = Wrapping(imm1.unwrap_i64()); + if imm1.0 == 0 { + panic!("Cannot divide by a zero.") + } + Some(ConstImm::I64((imm0 / imm1).0)) + } + ir::Opcode::Fadd => match (imm0, imm1) { + (ConstImm::Ieee32(imm0), ConstImm::Ieee32(imm1)) => Some(ConstImm::Ieee32(imm0 + imm1)), + (ConstImm::Ieee64(imm0), ConstImm::Ieee64(imm1)) => Some(ConstImm::Ieee64(imm0 + imm1)), + _ => unreachable!(), + }, + ir::Opcode::Fsub => match (imm0, imm1) { + (ConstImm::Ieee32(imm0), ConstImm::Ieee32(imm1)) => Some(ConstImm::Ieee32(imm0 - imm1)), + (ConstImm::Ieee64(imm0), ConstImm::Ieee64(imm1)) => Some(ConstImm::Ieee64(imm0 - imm1)), + _ => unreachable!(), + }, + ir::Opcode::Fmul => match (imm0, imm1) { + (ConstImm::Ieee32(imm0), ConstImm::Ieee32(imm1)) => Some(ConstImm::Ieee32(imm0 * imm1)), + (ConstImm::Ieee64(imm0), ConstImm::Ieee64(imm1)) => Some(ConstImm::Ieee64(imm0 * imm1)), + _ => unreachable!(), + }, + ir::Opcode::Fdiv => match (imm0, imm1) { + (ConstImm::Ieee32(imm0), ConstImm::Ieee32(imm1)) => Some(ConstImm::Ieee32(imm0 / imm1)), + (ConstImm::Ieee64(imm0), ConstImm::Ieee64(imm1)) => Some(ConstImm::Ieee64(imm0 / imm1)), + _ => unreachable!(), + }, + _ => None, + } +} + +fn evaluate_unary(opcode: ir::Opcode, imm: ConstImm) -> Option { + match opcode { + ir::Opcode::Fneg => match imm { + ConstImm::Ieee32(imm) => Some(ConstImm::Ieee32(-imm)), + ConstImm::Ieee64(imm) => Some(ConstImm::Ieee64(-imm)), + _ => unreachable!(), + }, + ir::Opcode::Fabs => match imm { + ConstImm::Ieee32(imm) => Some(ConstImm::Ieee32(imm.abs())), + ConstImm::Ieee64(imm) => Some(ConstImm::Ieee64(imm.abs())), + _ => unreachable!(), + }, + _ => None, + } +} + +fn replace_inst(dfg: &mut ir::DataFlowGraph, inst: ir::Inst, const_imm: ConstImm) { + use self::ConstImm::*; + match const_imm { + I64(imm) => { + let typevar = dfg.ctrl_typevar(inst); + dfg.replace(inst).iconst(typevar, imm); + } + Ieee32(imm) => { + dfg.replace(inst) + .f32const(ir::immediates::Ieee32::with_bits(imm.to_bits())); + } + Ieee64(imm) => { + dfg.replace(inst) + .f64const(ir::immediates::Ieee64::with_bits(imm.to_bits())); + } + Bool(imm) => { + let typevar = dfg.ctrl_typevar(inst); + dfg.replace(inst).bconst(typevar, imm); + } + } +} + +/// Fold a binary instruction. +fn fold_binary( + dfg: &mut ir::DataFlowGraph, + inst: ir::Inst, + opcode: ir::Opcode, + args: [ir::Value; 2], +) { + let (imm0, imm1) = if let (Some(imm0), Some(imm1)) = ( + resolve_value_to_imm(dfg, args[0]), + resolve_value_to_imm(dfg, args[1]), + ) { + (imm0, imm1) + } else { + return; + }; + + if let Some(const_imm) = evaluate_binary(opcode, imm0, imm1) { + replace_inst(dfg, inst, const_imm); + } +} + +/// Fold a unary instruction. +fn fold_unary(dfg: &mut ir::DataFlowGraph, inst: ir::Inst, opcode: ir::Opcode, arg: ir::Value) { + let imm = if let Some(imm) = resolve_value_to_imm(dfg, arg) { + imm + } else { + return; + }; + + if let Some(const_imm) = evaluate_unary(opcode, imm) { + replace_inst(dfg, inst, const_imm); + } +} + +fn fold_branch(pos: &mut FuncCursor, inst: ir::Inst, opcode: ir::Opcode) { + let (cond, block, args) = { + let values = pos.func.dfg.inst_args(inst); + let inst_data = &pos.func.dfg[inst]; + ( + match resolve_value_to_imm(&pos.func.dfg, values[0]) { + Some(imm) => imm, + None => return, + }, + inst_data.branch_destination().unwrap(), + values[1..].to_vec(), + ) + }; + + let truthiness = cond.evaluate_truthiness(); + let branch_if_zero = match opcode { + ir::Opcode::Brz => true, + ir::Opcode::Brnz => false, + _ => unreachable!(), + }; + + if (branch_if_zero && !truthiness) || (!branch_if_zero && truthiness) { + pos.func.dfg.replace(inst).jump(block, &args); + // remove the rest of the block to avoid verifier errors + while let Some(next_inst) = pos.func.layout.next_inst(inst) { + pos.func.layout.remove_inst(next_inst); + } + } else { + pos.remove_inst_and_step_back(); + } +} diff --git a/cranelift/preopt/src/lib.rs b/cranelift/preopt/src/lib.rs new file mode 100644 index 0000000000..027460bc59 --- /dev/null +++ b/cranelift/preopt/src/lib.rs @@ -0,0 +1,47 @@ +//! Performs early-stage optimizations on Cranelift IR. + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "std", deny(unstable_features))] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +mod constant_folding; + +use cranelift_codegen::{isa::TargetIsa, settings::FlagsOrIsa, CodegenResult, Context}; + +/// Optimize the function with available optimizations. +/// +/// Since this can be resource intensive (and code-size inflating), +/// it is separated from `Context::compile` to allow DCE to remove it +/// if it's not used. +pub fn optimize(ctx: &mut Context, isa: &dyn TargetIsa) -> CodegenResult<()> { + ctx.verify_if(isa)?; + fold_constants(ctx, isa)?; + + Ok(()) +} + +/// Fold constants +pub fn fold_constants<'a, FOI>(ctx: &mut Context, fisa: FOI) -> CodegenResult<()> +where + FOI: Into>, +{ + constant_folding::fold_constants(&mut ctx.func); + ctx.verify_if(fisa)?; + Ok(()) +} diff --git a/cranelift/reader/Cargo.toml b/cranelift/reader/Cargo.toml new file mode 100644 index 0000000000..8ecda5e776 --- /dev/null +++ b/cranelift/reader/Cargo.toml @@ -0,0 +1,18 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift-reader" +version = "0.59.0" +description = "Cranelift textual IR reader" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0" } +target-lexicon = "0.10" + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/reader/LICENSE b/cranelift/reader/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/reader/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/reader/README.md b/cranelift/reader/README.md new file mode 100644 index 0000000000..094cd92314 --- /dev/null +++ b/cranelift/reader/README.md @@ -0,0 +1,3 @@ +This crate library supports reading .clif files. This functionality is needed +for testing [Cranelift](https://crates.io/crates/cranelift), but is not essential +for a JIT compiler. diff --git a/cranelift/reader/src/error.rs b/cranelift/reader/src/error.rs new file mode 100644 index 0000000000..bed2352e5c --- /dev/null +++ b/cranelift/reader/src/error.rs @@ -0,0 +1,66 @@ +//! Define the `Location`, `ParseError`, and `ParseResult` types. + +#![macro_use] + +use std::fmt; + +/// The location of a `Token` or `Error`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub struct Location { + /// Line number. Command-line arguments are line 0 and source file + /// lines start from 1. + pub line_number: usize, +} + +/// A parse error is returned when the parse failed. +#[derive(Debug)] +pub struct ParseError { + /// Location of the error. + pub location: Location, + /// Error message. + pub message: String, + /// Whether it's a warning or a plain error. + pub is_warning: bool, +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.location.line_number == 0 { + write!(f, "command-line arguments: {}", self.message) + } else { + write!(f, "{}: {}", self.location.line_number, self.message) + } + } +} + +/// Result of a parser operation. The `ParseError` variant includes a location. +pub type ParseResult = Result; + +// Create an `Err` variant of `ParseResult` from a location and `format!` args. +macro_rules! err { + ( $loc:expr, $msg:expr ) => { + Err($crate::ParseError { + location: $loc.clone(), + message: $msg.to_string(), + is_warning: false, + }) + }; + + ( $loc:expr, $fmt:expr, $( $arg:expr ),+ ) => { + Err($crate::ParseError { + location: $loc.clone(), + message: format!( $fmt, $( $arg ),+ ), + is_warning: false, + }) + }; +} + +macro_rules! warn { + ( $loc:expr, $fmt:expr, $( $arg:expr ),+ ) => { + Err($crate::ParseError { + location: $loc.clone(), + message: format!($fmt, $( $arg ),+ ), + is_warning: true, + }) + }; +} diff --git a/cranelift/reader/src/isaspec.rs b/cranelift/reader/src/isaspec.rs new file mode 100644 index 0000000000..ed16fd4416 --- /dev/null +++ b/cranelift/reader/src/isaspec.rs @@ -0,0 +1,69 @@ +//! Parsed representation of `set` and `isa` commands. +//! +//! A test case file can contain `set` commands that set ISA-independent settings, and it can +//! contain `isa` commands that select an ISA and applies ISA-specific settings. +//! +//! If a test case file contains `isa` commands, the tests will only be run against the specified +//! ISAs. If the file contains no `isa` commands, the tests will be run against all supported ISAs. + +use crate::error::{Location, ParseResult}; +use crate::testcommand::TestOption; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::settings::{Configurable, Flags, SetError}; + +/// The ISA specifications in a `.clif` file. +pub enum IsaSpec { + /// The parsed file does not contain any `isa` commands, but it may contain `set` commands + /// which are reflected in the finished `Flags` object. + None(Flags), + + /// The parsed file does contain `isa` commands. + /// Each `isa` command is used to configure a `TargetIsa` trait object. + Some(Vec>), +} + +impl IsaSpec { + /// If the `IsaSpec` contains exactly 1 `TargetIsa` we return a reference to it + pub fn unique_isa(&self) -> Option<&dyn TargetIsa> { + if let Self::Some(ref isa_vec) = *self { + if isa_vec.len() == 1 { + return Some(&*isa_vec[0]); + } + } + None + } +} + +/// Parse an iterator of command line options and apply them to `config`. +pub fn parse_options<'a, I>( + iter: I, + config: &mut dyn Configurable, + loc: Location, +) -> ParseResult<()> +where + I: Iterator, +{ + for opt in iter.map(TestOption::new) { + match opt { + TestOption::Flag(name) => match config.enable(name) { + Ok(_) => {} + Err(SetError::BadName(name)) => return err!(loc, "unknown flag '{}'", name), + Err(_) => return err!(loc, "not a boolean flag: '{}'", opt), + }, + TestOption::Value(name, value) => match config.set(name, value) { + Ok(_) => {} + Err(SetError::BadName(name)) => return err!(loc, "unknown setting '{}'", name), + Err(SetError::BadType) => return err!(loc, "invalid setting type: '{}'", opt), + Err(SetError::BadValue(expected)) => { + return err!( + loc, + "invalid setting value for '{}', expected {}", + opt, + expected + ); + } + }, + } + } + Ok(()) +} diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs new file mode 100644 index 0000000000..1d2908a92f --- /dev/null +++ b/cranelift/reader/src/lexer.rs @@ -0,0 +1,710 @@ +//! Lexical analysis for .clif files. + +use crate::error::Location; +use cranelift_codegen::ir::types; +use cranelift_codegen::ir::{Block, Value}; +#[allow(unused_imports, deprecated)] +use std::ascii::AsciiExt; +use std::str::CharIndices; +use std::u16; + +/// A Token returned from the `Lexer`. +/// +/// Some variants may contains references to the original source text, so the `Token` has the same +/// lifetime as the source. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Token<'a> { + Comment(&'a str), + LPar, // '(' + RPar, // ')' + LBrace, // '{' + RBrace, // '}' + LBracket, // '[' + RBracket, // ']' + Minus, // '-' + Plus, // '+' + Comma, // ',' + Dot, // '.' + Colon, // ':' + Equal, // '=' + Not, // '!' + Arrow, // '->' + Float(&'a str), // Floating point immediate + Integer(&'a str), // Integer immediate + Type(types::Type), // i32, f32, b32x4, ... + Value(Value), // v12, v7 + Block(Block), // block3 + StackSlot(u32), // ss3 + GlobalValue(u32), // gv3 + Heap(u32), // heap2 + Table(u32), // table2 + JumpTable(u32), // jt2 + FuncRef(u32), // fn2 + SigRef(u32), // sig2 + UserRef(u32), // u345 + Name(&'a str), // %9arbitrary_alphanum, %x3, %0, %function ... + String(&'a str), // "abritrary quoted string with no escape" ... + HexSequence(&'a str), // #89AF + Identifier(&'a str), // Unrecognized identifier (opcode, enumerator, ...) + SourceLoc(&'a str), // @00c7 +} + +/// A `Token` with an associated location. +#[derive(Debug, PartialEq, Eq)] +pub struct LocatedToken<'a> { + pub token: Token<'a>, + pub location: Location, +} + +/// Wrap up a `Token` with the given location. +fn token(token: Token, loc: Location) -> Result { + Ok(LocatedToken { + token, + location: loc, + }) +} + +/// An error from the lexical analysis. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LexError { + InvalidChar, +} + +/// A `LexError` with an associated Location. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct LocatedError { + pub error: LexError, + pub location: Location, +} + +/// Wrap up a `LexError` with the given location. +fn error<'a>(error: LexError, loc: Location) -> Result, LocatedError> { + Err(LocatedError { + error, + location: loc, + }) +} + +/// Get the number of decimal digits at the end of `s`. +fn trailing_digits(s: &str) -> usize { + // It's faster to iterate backwards over bytes, and we're only counting ASCII digits. + s.as_bytes() + .iter() + .rev() + .take_while(|&&b| b'0' <= b && b <= b'9') + .count() +} + +/// Pre-parse a supposed entity name by splitting it into two parts: A head of lowercase ASCII +/// letters and numeric tail. +pub fn split_entity_name(name: &str) -> Option<(&str, u32)> { + let (head, tail) = name.split_at(name.len() - trailing_digits(name)); + if tail.len() > 1 && tail.starts_with('0') { + None + } else { + tail.parse().ok().map(|n| (head, n)) + } +} + +/// Lexical analysis. +/// +/// A `Lexer` reads text from a `&str` and provides a sequence of tokens. +/// +/// Also keep track of a line number for error reporting. +/// +pub struct Lexer<'a> { + // Complete source being processed. + source: &'a str, + + // Iterator into `source`. + chars: CharIndices<'a>, + + // Next character to be processed, or `None` at the end. + lookahead: Option, + + // Index into `source` of lookahead character. + pos: usize, + + // Current line number. + line_number: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(s: &'a str) -> Self { + let mut lex = Self { + source: s, + chars: s.char_indices(), + lookahead: None, + pos: 0, + line_number: 1, + }; + // Advance to the first char. + lex.next_ch(); + lex + } + + // Advance to the next character. + // Return the next lookahead character, or None when the end is encountered. + // Always update cur_ch to reflect + fn next_ch(&mut self) -> Option { + if self.lookahead == Some('\n') { + self.line_number += 1; + } + match self.chars.next() { + Some((idx, ch)) => { + self.pos = idx; + self.lookahead = Some(ch); + } + None => { + self.pos = self.source.len(); + self.lookahead = None; + } + } + self.lookahead + } + + // Get the location corresponding to `lookahead`. + fn loc(&self) -> Location { + Location { + line_number: self.line_number, + } + } + + // Starting from `lookahead`, are we looking at `prefix`? + fn looking_at(&self, prefix: &str) -> bool { + self.source[self.pos..].starts_with(prefix) + } + + // Starting from `lookahead`, are we looking at a number? + fn looking_at_numeric(&self) -> bool { + if let Some(c) = self.lookahead { + if c.is_digit(10) { + return true; + } + match c { + '-' => return true, + '+' => return true, + '.' => return true, + _ => {} + } + if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") { + return true; + } + } + false + } + + // Scan a single-char token. + fn scan_char(&mut self, tok: Token<'a>) -> Result, LocatedError> { + assert_ne!(self.lookahead, None); + let loc = self.loc(); + self.next_ch(); + token(tok, loc) + } + + // Scan a multi-char token. + fn scan_chars( + &mut self, + count: usize, + tok: Token<'a>, + ) -> Result, LocatedError> { + let loc = self.loc(); + for _ in 0..count { + assert_ne!(self.lookahead, None); + self.next_ch(); + } + token(tok, loc) + } + + /// Get the rest of the current line. + /// The next token returned by `next()` will be from the following lines. + pub fn rest_of_line(&mut self) -> &'a str { + let begin = self.pos; + loop { + match self.next_ch() { + None | Some('\n') => return &self.source[begin..self.pos], + _ => {} + } + } + } + + // Scan a comment extending to the end of the current line. + fn scan_comment(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + let text = self.rest_of_line(); + token(Token::Comment(text), loc) + } + + // Scan a number token which can represent either an integer or floating point number. + // + // Accept the following forms: + // + // - `10`: Integer + // - `-10`: Integer + // - `0xff_00`: Integer + // - `0.0`: Float + // - `0x1.f`: Float + // - `-0x2.4`: Float + // - `0x0.4p-34`: Float + // + // This function does not filter out all invalid numbers. It depends in the context-sensitive + // decoding of the text for that. For example, the number of allowed digits in an `Ieee32` and + // an `Ieee64` constant are different. + fn scan_number(&mut self) -> Result, LocatedError> { + let begin = self.pos; + let loc = self.loc(); + let mut is_float = false; + + // Skip a leading sign. + match self.lookahead { + Some('-') => { + self.next_ch(); + if !self.looking_at_numeric() { + // If the next characters won't parse as a number, we return Token::Minus + return token(Token::Minus, loc); + } + } + Some('+') => { + self.next_ch(); + if !self.looking_at_numeric() { + // If the next characters won't parse as a number, we return Token::Minus + return token(Token::Plus, loc); + } + } + _ => {} + } + + // Check for NaNs with payloads. + if self.looking_at("NaN:") || self.looking_at("sNaN:") { + // Skip the `NaN:` prefix, the loop below won't accept it. + // We expect a hexadecimal number to follow the colon. + while self.next_ch() != Some(':') {} + is_float = true; + } else if self.looking_at("NaN") || self.looking_at("Inf") { + // This is Inf or a default quiet NaN. + is_float = true; + } + + // Look for the end of this number. Detect the radix point if there is one. + loop { + match self.next_ch() { + Some('-') | Some('_') => {} + Some('.') => is_float = true, + Some(ch) if ch.is_alphanumeric() => {} + _ => break, + } + } + let text = &self.source[begin..self.pos]; + if is_float { + token(Token::Float(text), loc) + } else { + token(Token::Integer(text), loc) + } + } + + // Scan a 'word', which is an identifier-like sequence of characters beginning with '_' or an + // alphabetic char, followed by zero or more alphanumeric or '_' characters. + fn scan_word(&mut self) -> Result, LocatedError> { + let begin = self.pos; + let loc = self.loc(); + + assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_alphabetic()); + loop { + match self.next_ch() { + Some('_') => {} + Some(ch) if ch.is_alphanumeric() => {} + _ => break, + } + } + let text = &self.source[begin..self.pos]; + + // Look for numbered well-known entities like block15, v45, ... + token( + split_entity_name(text) + .and_then(|(prefix, number)| { + Self::numbered_entity(prefix, number) + .or_else(|| Self::value_type(text, prefix, number)) + }) + .unwrap_or_else(|| match text { + "iflags" => Token::Type(types::IFLAGS), + "fflags" => Token::Type(types::FFLAGS), + _ => Token::Identifier(text), + }), + loc, + ) + } + + // If prefix is a well-known entity prefix and suffix is a valid entity number, return the + // decoded token. + fn numbered_entity(prefix: &str, number: u32) -> Option> { + match prefix { + "v" => Value::with_number(number).map(Token::Value), + "block" => Block::with_number(number).map(Token::Block), + "ss" => Some(Token::StackSlot(number)), + "gv" => Some(Token::GlobalValue(number)), + "heap" => Some(Token::Heap(number)), + "table" => Some(Token::Table(number)), + "jt" => Some(Token::JumpTable(number)), + "fn" => Some(Token::FuncRef(number)), + "sig" => Some(Token::SigRef(number)), + "u" => Some(Token::UserRef(number)), + _ => None, + } + } + + // Recognize a scalar or vector type. + fn value_type(text: &str, prefix: &str, number: u32) -> Option> { + let is_vector = prefix.ends_with('x'); + let scalar = if is_vector { + &prefix[0..prefix.len() - 1] + } else { + text + }; + let base_type = match scalar { + "i8" => types::I8, + "i16" => types::I16, + "i32" => types::I32, + "i64" => types::I64, + "i128" => types::I128, + "f32" => types::F32, + "f64" => types::F64, + "b1" => types::B1, + "b8" => types::B8, + "b16" => types::B16, + "b32" => types::B32, + "b64" => types::B64, + "b128" => types::B128, + "r32" => types::R32, + "r64" => types::R64, + _ => return None, + }; + if is_vector { + if number <= u32::from(u16::MAX) { + base_type.by(number as u16).map(Token::Type) + } else { + None + } + } else { + Some(Token::Type(base_type)) + } + } + + fn scan_name(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + let begin = self.pos + 1; + + assert_eq!(self.lookahead, Some('%')); + + while let Some(c) = self.next_ch() { + if !(c.is_ascii() && c.is_alphanumeric() || c == '_') { + break; + } + } + + let end = self.pos; + token(Token::Name(&self.source[begin..end]), loc) + } + + /// Scan for a multi-line quoted string with no escape character. + fn scan_string(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + let begin = self.pos + 1; + + assert_eq!(self.lookahead, Some('"')); + + while let Some(c) = self.next_ch() { + if c == '"' { + break; + } + } + + let end = self.pos; + if self.lookahead != Some('"') { + return error(LexError::InvalidChar, self.loc()); + } + self.next_ch(); + token(Token::String(&self.source[begin..end]), loc) + } + + fn scan_hex_sequence(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + let begin = self.pos + 1; + + assert_eq!(self.lookahead, Some('#')); + + while let Some(c) = self.next_ch() { + if !char::is_digit(c, 16) { + break; + } + } + + let end = self.pos; + token(Token::HexSequence(&self.source[begin..end]), loc) + } + + fn scan_srcloc(&mut self) -> Result, LocatedError> { + let loc = self.loc(); + let begin = self.pos + 1; + + assert_eq!(self.lookahead, Some('@')); + + while let Some(c) = self.next_ch() { + if !char::is_digit(c, 16) { + break; + } + } + + let end = self.pos; + token(Token::SourceLoc(&self.source[begin..end]), loc) + } + + /// Get the next token or a lexical error. + /// + /// Return None when the end of the source is encountered. + #[allow(clippy::cognitive_complexity)] + pub fn next(&mut self) -> Option, LocatedError>> { + loop { + let loc = self.loc(); + return match self.lookahead { + None => None, + Some(';') => Some(self.scan_comment()), + Some('(') => Some(self.scan_char(Token::LPar)), + Some(')') => Some(self.scan_char(Token::RPar)), + Some('{') => Some(self.scan_char(Token::LBrace)), + Some('}') => Some(self.scan_char(Token::RBrace)), + Some('[') => Some(self.scan_char(Token::LBracket)), + Some(']') => Some(self.scan_char(Token::RBracket)), + Some(',') => Some(self.scan_char(Token::Comma)), + Some('.') => Some(self.scan_char(Token::Dot)), + Some(':') => Some(self.scan_char(Token::Colon)), + Some('=') => Some(self.scan_char(Token::Equal)), + Some('!') => Some(self.scan_char(Token::Not)), + Some('+') => Some(self.scan_number()), + Some('-') => { + if self.looking_at("->") { + Some(self.scan_chars(2, Token::Arrow)) + } else { + Some(self.scan_number()) + } + } + Some(ch) if ch.is_digit(10) => Some(self.scan_number()), + Some(ch) if ch.is_alphabetic() => { + if self.looking_at("NaN") || self.looking_at("Inf") { + Some(self.scan_number()) + } else { + Some(self.scan_word()) + } + } + Some('%') => Some(self.scan_name()), + Some('"') => Some(self.scan_string()), + Some('#') => Some(self.scan_hex_sequence()), + Some('@') => Some(self.scan_srcloc()), + Some(ch) if ch.is_whitespace() => { + self.next_ch(); + continue; + } + _ => { + // Skip invalid char, return error. + self.next_ch(); + Some(error(LexError::InvalidChar, loc)) + } + }; + } + } +} + +#[cfg(test)] +mod tests { + use super::trailing_digits; + use super::*; + use crate::error::Location; + use cranelift_codegen::ir::types; + use cranelift_codegen::ir::{Block, Value}; + + #[test] + fn digits() { + assert_eq!(trailing_digits(""), 0); + assert_eq!(trailing_digits("x"), 0); + assert_eq!(trailing_digits("0x"), 0); + assert_eq!(trailing_digits("x1"), 1); + assert_eq!(trailing_digits("1x1"), 1); + assert_eq!(trailing_digits("1x01"), 2); + } + + #[test] + fn entity_name() { + assert_eq!(split_entity_name(""), None); + assert_eq!(split_entity_name("x"), None); + assert_eq!(split_entity_name("x+"), None); + assert_eq!(split_entity_name("x+1"), Some(("x+", 1))); + assert_eq!(split_entity_name("x-1"), Some(("x-", 1))); + assert_eq!(split_entity_name("1"), Some(("", 1))); + assert_eq!(split_entity_name("x1"), Some(("x", 1))); + assert_eq!(split_entity_name("xy0"), Some(("xy", 0))); + // Reject this non-canonical form. + assert_eq!(split_entity_name("inst01"), None); + } + + fn token<'a>(token: Token<'a>, line: usize) -> Option, LocatedError>> { + Some(super::token(token, Location { line_number: line })) + } + + fn error<'a>(error: LexError, line: usize) -> Option, LocatedError>> { + Some(super::error(error, Location { line_number: line })) + } + + #[test] + fn make_lexer() { + let mut l1 = Lexer::new(""); + let mut l2 = Lexer::new(" "); + let mut l3 = Lexer::new("\n "); + + assert_eq!(l1.next(), None); + assert_eq!(l2.next(), None); + assert_eq!(l3.next(), None); + } + + #[test] + fn lex_comment() { + let mut lex = Lexer::new("; hello"); + assert_eq!(lex.next(), token(Token::Comment("; hello"), 1)); + assert_eq!(lex.next(), None); + + lex = Lexer::new("\n ;hello\n;foo"); + assert_eq!(lex.next(), token(Token::Comment(";hello"), 2)); + assert_eq!(lex.next(), token(Token::Comment(";foo"), 3)); + assert_eq!(lex.next(), None); + + // Scan a comment after an invalid char. + let mut lex = Lexer::new("$; hello"); + assert_eq!(lex.next(), error(LexError::InvalidChar, 1)); + assert_eq!(lex.next(), token(Token::Comment("; hello"), 1)); + assert_eq!(lex.next(), None); + } + + #[test] + fn lex_chars() { + let mut lex = Lexer::new("(); hello\n = :{, }."); + assert_eq!(lex.next(), token(Token::LPar, 1)); + assert_eq!(lex.next(), token(Token::RPar, 1)); + assert_eq!(lex.next(), token(Token::Comment("; hello"), 1)); + assert_eq!(lex.next(), token(Token::Equal, 2)); + assert_eq!(lex.next(), token(Token::Colon, 2)); + assert_eq!(lex.next(), token(Token::LBrace, 2)); + assert_eq!(lex.next(), token(Token::Comma, 2)); + assert_eq!(lex.next(), token(Token::RBrace, 2)); + assert_eq!(lex.next(), token(Token::Dot, 2)); + assert_eq!(lex.next(), None); + } + + #[test] + fn lex_numbers() { + let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5"); + assert_eq!(lex.next(), token(Token::Integer("0"), 1)); + assert_eq!(lex.next(), token(Token::Integer("2_000"), 1)); + assert_eq!(lex.next(), token(Token::Integer("-1"), 1)); + assert_eq!(lex.next(), token(Token::Comma, 1)); + assert_eq!(lex.next(), token(Token::Integer("0xf"), 1)); + assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1)); + assert_eq!(lex.next(), token(Token::Float("0.0"), 1)); + assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1)); + assert_eq!(lex.next(), token(Token::Float("NaN"), 1)); + assert_eq!(lex.next(), token(Token::Integer("+5"), 1)); + assert_eq!(lex.next(), None); + } + + #[test] + fn lex_identifiers() { + let mut lex = Lexer::new( + "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \ + function0 function b1 i32x4 f32x5 \ + iflags fflags iflagss", + ); + assert_eq!( + lex.next(), + token(Token::Value(Value::with_number(0).unwrap()), 1) + ); + assert_eq!(lex.next(), token(Token::Identifier("v00"), 1)); + assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1)); + assert_eq!( + lex.next(), + token(Token::Block(Block::with_number(1234567890).unwrap()), 1) + ); + assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1)); + assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1)); + assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1)); + assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1)); + assert_eq!(lex.next(), token(Token::Identifier("function0"), 1)); + assert_eq!(lex.next(), token(Token::Identifier("function"), 1)); + assert_eq!(lex.next(), token(Token::Type(types::B1), 1)); + assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1)); + assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1)); + assert_eq!(lex.next(), token(Token::Type(types::IFLAGS), 1)); + assert_eq!(lex.next(), token(Token::Type(types::FFLAGS), 1)); + assert_eq!(lex.next(), token(Token::Identifier("iflagss"), 1)); + assert_eq!(lex.next(), None); + } + + #[test] + fn lex_hex_sequences() { + let mut lex = Lexer::new("#0 #DEADbeef123 #789"); + + assert_eq!(lex.next(), token(Token::HexSequence("0"), 1)); + assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1)); + assert_eq!(lex.next(), token(Token::HexSequence("789"), 1)); + } + + #[test] + fn lex_names() { + let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %_"); + + assert_eq!(lex.next(), token(Token::Name("0"), 1)); + assert_eq!(lex.next(), token(Token::Name("x3"), 1)); + assert_eq!(lex.next(), token(Token::Name("function"), 1)); + assert_eq!(lex.next(), token(Token::Name("123_abc"), 1)); + assert_eq!(lex.next(), token(Token::Name("ss0"), 1)); + assert_eq!(lex.next(), token(Token::Name("v3"), 1)); + assert_eq!(lex.next(), token(Token::Name("block11"), 1)); + assert_eq!(lex.next(), token(Token::Name("_"), 1)); + } + + #[test] + fn lex_strings() { + let mut lex = Lexer::new( + r#""" "0" "x3""function" "123 abc" "\" "start + and end on + different lines" "#, + ); + + assert_eq!(lex.next(), token(Token::String(""), 1)); + assert_eq!(lex.next(), token(Token::String("0"), 1)); + assert_eq!(lex.next(), token(Token::String("x3"), 1)); + assert_eq!(lex.next(), token(Token::String("function"), 1)); + assert_eq!(lex.next(), token(Token::String("123 abc"), 1)); + assert_eq!(lex.next(), token(Token::String(r#"\"#), 1)); + assert_eq!( + lex.next(), + token( + Token::String( + r#"start + and end on + different lines"# + ), + 1 + ) + ); + } + + #[test] + fn lex_userrefs() { + let mut lex = Lexer::new("u0 u1 u234567890 u9:8765"); + + assert_eq!(lex.next(), token(Token::UserRef(0), 1)); + assert_eq!(lex.next(), token(Token::UserRef(1), 1)); + assert_eq!(lex.next(), token(Token::UserRef(234567890), 1)); + assert_eq!(lex.next(), token(Token::UserRef(9), 1)); + assert_eq!(lex.next(), token(Token::Colon, 1)); + assert_eq!(lex.next(), token(Token::Integer("8765"), 1)); + assert_eq!(lex.next(), None); + } +} diff --git a/cranelift/reader/src/lib.rs b/cranelift/reader/src/lib.rs new file mode 100644 index 0000000000..f0922bf884 --- /dev/null +++ b/cranelift/reader/src/lib.rs @@ -0,0 +1,42 @@ +//! Cranelift file reader library. +//! +//! The `cranelift_reader` library supports reading .clif files. This functionality is needed for +//! testing Cranelift, but is not essential for a JIT compiler. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +pub use crate::error::{Location, ParseError, ParseResult}; +pub use crate::isaspec::{parse_options, IsaSpec}; +pub use crate::parser::{parse_functions, parse_test, ParseOptions}; +pub use crate::sourcemap::SourceMap; +pub use crate::testcommand::{TestCommand, TestOption}; +pub use crate::testfile::{Comment, Details, Feature, TestFile}; + +mod error; +mod isaspec; +mod lexer; +mod parser; +mod sourcemap; +mod testcommand; +mod testfile; diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs new file mode 100644 index 0000000000..02bf24ee8e --- /dev/null +++ b/cranelift/reader/src/parser.rs @@ -0,0 +1,3357 @@ +//! Parser for .clif files. + +use crate::error::{Location, ParseError, ParseResult}; +use crate::isaspec; +use crate::lexer::{LexError, Lexer, LocatedError, LocatedToken, Token}; +use crate::sourcemap::SourceMap; +use crate::testcommand::TestCommand; +use crate::testfile::{Comment, Details, Feature, TestFile}; +use cranelift_codegen::entity::EntityRef; +use cranelift_codegen::ir; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; +use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; +use cranelift_codegen::ir::types::INVALID; +use cranelift_codegen::ir::types::*; +use cranelift_codegen::ir::{ + AbiParam, ArgumentExtension, ArgumentLoc, Block, ConstantData, ExtFuncData, ExternalName, + FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, + JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, + Table, TableData, Type, Value, ValueLoc, +}; +use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa}; +use cranelift_codegen::packed_option::ReservedValue; +use cranelift_codegen::{settings, timing}; +use std::mem; +use std::str::FromStr; +use std::{u16, u32}; +use target_lexicon::Triple; + +/// After some quick benchmarks a program should never have more than 100,000 blocks. +const MAX_BLOCKS_IN_A_FUNCTION: u32 = 100_000; + +/// Parse the entire `text` into a list of functions. +/// +/// Any test commands or target declarations are ignored. +pub fn parse_functions(text: &str) -> ParseResult> { + let _tt = timing::parse_text(); + parse_test(text, ParseOptions::default()) + .map(|file| file.functions.into_iter().map(|(func, _)| func).collect()) +} + +/// Options for configuring the parsing of filetests. +pub struct ParseOptions<'a> { + /// Compiler passes to run on the parsed functions. + pub passes: Option<&'a [String]>, + /// Target ISA for compiling the parsed functions, e.g. "x86_64 skylake". + pub target: Option<&'a str>, + /// Default calling convention used when none is specified for a parsed function. + pub default_calling_convention: CallConv, +} + +impl Default for ParseOptions<'_> { + fn default() -> Self { + Self { + passes: None, + target: None, + default_calling_convention: CallConv::Fast, + } + } +} + +/// Parse the entire `text` as a test case file. +/// +/// The returned `TestFile` contains direct references to substrings of `text`. +pub fn parse_test<'a>(text: &'a str, options: ParseOptions<'a>) -> ParseResult> { + let _tt = timing::parse_text(); + let mut parser = Parser::new(text); + + // Gather the preamble comments. + parser.start_gathering_comments(); + + let isa_spec: isaspec::IsaSpec; + let commands: Vec>; + + // Check for specified passes and target, if present throw out test commands/targets specified + // in file. + match options.passes { + Some(pass_vec) => { + parser.parse_test_commands(); + commands = parser.parse_cmdline_passes(pass_vec); + parser.parse_target_specs()?; + isa_spec = parser.parse_cmdline_target(options.target)?; + } + None => { + commands = parser.parse_test_commands(); + isa_spec = parser.parse_target_specs()?; + } + }; + let features = parser.parse_cranelift_features()?; + + // Decide between using the calling convention passed in the options or using the + // host's calling convention--if any tests are to be run on the host we should default to the + // host's calling convention. + parser = if commands.iter().any(|tc| tc.command == "run") { + let host_default_calling_convention = CallConv::triple_default(&Triple::host()); + parser.with_default_calling_convention(host_default_calling_convention) + } else { + parser.with_default_calling_convention(options.default_calling_convention) + }; + + parser.token(); + parser.claim_gathered_comments(AnyEntity::Function); + + let preamble_comments = parser.take_comments(); + let functions = parser.parse_function_list(isa_spec.unique_isa())?; + + Ok(TestFile { + commands, + isa_spec, + features, + preamble_comments, + functions, + }) +} + +pub struct Parser<'a> { + lex: Lexer<'a>, + + lex_error: Option, + + /// Current lookahead token. + lookahead: Option>, + + /// Location of lookahead. + loc: Location, + + /// Are we gathering any comments that we encounter? + gathering_comments: bool, + + /// The gathered comments; claim them with `claim_gathered_comments`. + gathered_comments: Vec<&'a str>, + + /// Comments collected so far. + comments: Vec>, + + /// Default calling conventions; used when none is specified. + default_calling_convention: CallConv, +} + +/// Context for resolving references when parsing a single function. +struct Context<'a> { + function: Function, + map: SourceMap, + + /// Aliases to resolve once value definitions are known. + aliases: Vec, + + /// Reference to the unique_isa for things like parsing target-specific instruction encoding + /// information. This is only `Some` if exactly one set of `isa` directives were found in the + /// prologue (it is valid to have directives for multiple different targets, but in that case + /// we couldn't know which target the provided encodings are intended for) + unique_isa: Option<&'a dyn TargetIsa>, +} + +impl<'a> Context<'a> { + fn new(f: Function, unique_isa: Option<&'a dyn TargetIsa>) -> Self { + Self { + function: f, + map: SourceMap::new(), + unique_isa, + aliases: Vec::new(), + } + } + + // Get the index of a recipe name if it exists. + fn find_recipe_index(&self, recipe_name: &str) -> Option { + if let Some(unique_isa) = self.unique_isa { + unique_isa + .encoding_info() + .names + .iter() + .position(|&name| name == recipe_name) + .map(|idx| idx as u16) + } else { + None + } + } + + // Allocate a new stack slot. + fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> { + self.map.def_ss(ss, loc)?; + while self.function.stack_slots.next_key().index() <= ss.index() { + self.function + .create_stack_slot(StackSlotData::new(StackSlotKind::SpillSlot, 0)); + } + self.function.stack_slots[ss] = data; + Ok(()) + } + + // Resolve a reference to a stack slot. + fn check_ss(&self, ss: StackSlot, loc: Location) -> ParseResult<()> { + if !self.map.contains_ss(ss) { + err!(loc, "undefined stack slot {}", ss) + } else { + Ok(()) + } + } + + // Allocate a global value slot. + fn add_gv(&mut self, gv: GlobalValue, data: GlobalValueData, loc: Location) -> ParseResult<()> { + self.map.def_gv(gv, loc)?; + while self.function.global_values.next_key().index() <= gv.index() { + self.function.create_global_value(GlobalValueData::Symbol { + name: ExternalName::testcase(""), + offset: Imm64::new(0), + colocated: false, + tls: false, + }); + } + self.function.global_values[gv] = data; + Ok(()) + } + + // Resolve a reference to a global value. + fn check_gv(&self, gv: GlobalValue, loc: Location) -> ParseResult<()> { + if !self.map.contains_gv(gv) { + err!(loc, "undefined global value {}", gv) + } else { + Ok(()) + } + } + + // Allocate a heap slot. + fn add_heap(&mut self, heap: Heap, data: HeapData, loc: Location) -> ParseResult<()> { + self.map.def_heap(heap, loc)?; + while self.function.heaps.next_key().index() <= heap.index() { + self.function.create_heap(HeapData { + base: GlobalValue::reserved_value(), + min_size: Uimm64::new(0), + offset_guard_size: Uimm64::new(0), + style: HeapStyle::Static { + bound: Uimm64::new(0), + }, + index_type: INVALID, + }); + } + self.function.heaps[heap] = data; + Ok(()) + } + + // Resolve a reference to a heap. + fn check_heap(&self, heap: Heap, loc: Location) -> ParseResult<()> { + if !self.map.contains_heap(heap) { + err!(loc, "undefined heap {}", heap) + } else { + Ok(()) + } + } + + // Allocate a table slot. + fn add_table(&mut self, table: Table, data: TableData, loc: Location) -> ParseResult<()> { + while self.function.tables.next_key().index() <= table.index() { + self.function.create_table(TableData { + base_gv: GlobalValue::reserved_value(), + min_size: Uimm64::new(0), + bound_gv: GlobalValue::reserved_value(), + element_size: Uimm64::new(0), + index_type: INVALID, + }); + } + self.function.tables[table] = data; + self.map.def_table(table, loc) + } + + // Resolve a reference to a table. + fn check_table(&self, table: Table, loc: Location) -> ParseResult<()> { + if !self.map.contains_table(table) { + err!(loc, "undefined table {}", table) + } else { + Ok(()) + } + } + + // Allocate a new signature. + fn add_sig( + &mut self, + sig: SigRef, + data: Signature, + loc: Location, + defaultcc: CallConv, + ) -> ParseResult<()> { + self.map.def_sig(sig, loc)?; + while self.function.dfg.signatures.next_key().index() <= sig.index() { + self.function.import_signature(Signature::new(defaultcc)); + } + self.function.dfg.signatures[sig] = data; + Ok(()) + } + + // Resolve a reference to a signature. + fn check_sig(&self, sig: SigRef, loc: Location) -> ParseResult<()> { + if !self.map.contains_sig(sig) { + err!(loc, "undefined signature {}", sig) + } else { + Ok(()) + } + } + + // Allocate a new external function. + fn add_fn(&mut self, fn_: FuncRef, data: ExtFuncData, loc: Location) -> ParseResult<()> { + self.map.def_fn(fn_, loc)?; + while self.function.dfg.ext_funcs.next_key().index() <= fn_.index() { + self.function.import_function(ExtFuncData { + name: ExternalName::testcase(""), + signature: SigRef::reserved_value(), + colocated: false, + }); + } + self.function.dfg.ext_funcs[fn_] = data; + Ok(()) + } + + // Resolve a reference to a function. + fn check_fn(&self, fn_: FuncRef, loc: Location) -> ParseResult<()> { + if !self.map.contains_fn(fn_) { + err!(loc, "undefined function {}", fn_) + } else { + Ok(()) + } + } + + // Allocate a new jump table. + fn add_jt(&mut self, jt: JumpTable, data: JumpTableData, loc: Location) -> ParseResult<()> { + self.map.def_jt(jt, loc)?; + while self.function.jump_tables.next_key().index() <= jt.index() { + self.function.create_jump_table(JumpTableData::new()); + } + self.function.jump_tables[jt] = data; + Ok(()) + } + + // Resolve a reference to a jump table. + fn check_jt(&self, jt: JumpTable, loc: Location) -> ParseResult<()> { + if !self.map.contains_jt(jt) { + err!(loc, "undefined jump table {}", jt) + } else { + Ok(()) + } + } + + // Allocate a new block. + fn add_block(&mut self, block: Block, loc: Location) -> ParseResult { + self.map.def_block(block, loc)?; + while self.function.dfg.num_blocks() <= block.index() { + self.function.dfg.make_block(); + } + self.function.layout.append_block(block); + Ok(block) + } +} + +impl<'a> Parser<'a> { + /// Create a new `Parser` which reads `text`. The referenced text must outlive the parser. + pub fn new(text: &'a str) -> Self { + Self { + lex: Lexer::new(text), + lex_error: None, + lookahead: None, + loc: Location { line_number: 0 }, + gathering_comments: false, + gathered_comments: Vec::new(), + comments: Vec::new(), + default_calling_convention: CallConv::Fast, + } + } + + /// Modify the default calling convention; returns a new parser with the changed calling + /// convention. + pub fn with_default_calling_convention(self, default_calling_convention: CallConv) -> Self { + Self { + default_calling_convention, + ..self + } + } + + // Consume the current lookahead token and return it. + fn consume(&mut self) -> Token<'a> { + self.lookahead.take().expect("No token to consume") + } + + // Consume the whole line following the current lookahead token. + // Return the text of the line tail. + fn consume_line(&mut self) -> &'a str { + let rest = self.lex.rest_of_line(); + self.consume(); + rest + } + + // Get the current lookahead token, after making sure there is one. + fn token(&mut self) -> Option> { + // clippy says self.lookahead is immutable so this loop is either infinite or never + // running. I don't think this is true - self.lookahead is mutated in the loop body - so + // maybe this is a clippy bug? Either way, disable clippy for this. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::while_immutable_condition))] + while self.lookahead == None { + match self.lex.next() { + Some(Ok(LocatedToken { token, location })) => { + match token { + Token::Comment(text) => { + if self.gathering_comments { + self.gathered_comments.push(text); + } + } + _ => self.lookahead = Some(token), + } + self.loc = location; + } + Some(Err(LocatedError { error, location })) => { + self.lex_error = Some(error); + self.loc = location; + break; + } + None => break, + } + } + self.lookahead + } + + // Enable gathering of all comments encountered. + fn start_gathering_comments(&mut self) { + debug_assert!(!self.gathering_comments); + self.gathering_comments = true; + debug_assert!(self.gathered_comments.is_empty()); + } + + // Claim the comments gathered up to the current position for the + // given entity. + fn claim_gathered_comments>(&mut self, entity: E) { + debug_assert!(self.gathering_comments); + let entity = entity.into(); + self.comments.extend( + self.gathered_comments + .drain(..) + .map(|text| Comment { entity, text }), + ); + self.gathering_comments = false; + } + + // Get the comments collected so far, clearing out the internal list. + fn take_comments(&mut self) -> Vec> { + debug_assert!(!self.gathering_comments); + mem::replace(&mut self.comments, Vec::new()) + } + + // Match and consume a token without payload. + fn match_token(&mut self, want: Token<'a>, err_msg: &str) -> ParseResult> { + if self.token() == Some(want) { + Ok(self.consume()) + } else { + err!(self.loc, err_msg) + } + } + + // If the next token is a `want`, consume it, otherwise do nothing. + fn optional(&mut self, want: Token<'a>) -> bool { + if self.token() == Some(want) { + self.consume(); + true + } else { + false + } + } + + // Match and consume a specific identifier string. + // Used for pseudo-keywords like "stack_slot" that only appear in certain contexts. + fn match_identifier(&mut self, want: &'static str, err_msg: &str) -> ParseResult> { + if self.token() == Some(Token::Identifier(want)) { + Ok(self.consume()) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a type. + fn match_type(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Type(t)) = self.token() { + self.consume(); + Ok(t) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a stack slot reference. + fn match_ss(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::StackSlot(ss)) = self.token() { + self.consume(); + if let Some(ss) = StackSlot::with_number(ss) { + return Ok(ss); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a global value reference. + fn match_gv(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::GlobalValue(gv)) = self.token() { + self.consume(); + if let Some(gv) = GlobalValue::with_number(gv) { + return Ok(gv); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a function reference. + fn match_fn(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::FuncRef(fnref)) = self.token() { + self.consume(); + if let Some(fnref) = FuncRef::with_number(fnref) { + return Ok(fnref); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a signature reference. + fn match_sig(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::SigRef(sigref)) = self.token() { + self.consume(); + if let Some(sigref) = SigRef::with_number(sigref) { + return Ok(sigref); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a heap reference. + fn match_heap(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Heap(heap)) = self.token() { + self.consume(); + if let Some(heap) = Heap::with_number(heap) { + return Ok(heap); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a table reference. + fn match_table(&mut self, err_msg: &str) -> ParseResult
{ + if let Some(Token::Table(table)) = self.token() { + self.consume(); + if let Some(table) = Table::with_number(table) { + return Ok(table); + } + } + err!(self.loc, err_msg) + } + + // Match and consume a jump table reference. + fn match_jt(&mut self) -> ParseResult { + if let Some(Token::JumpTable(jt)) = self.token() { + self.consume(); + if let Some(jt) = JumpTable::with_number(jt) { + return Ok(jt); + } + } + err!(self.loc, "expected jump table number: jt«n»") + } + + // Match and consume an block reference. + fn match_block(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Block(block)) = self.token() { + self.consume(); + Ok(block) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a value reference. + fn match_value(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Value(v)) = self.token() { + self.consume(); + Ok(v) + } else { + err!(self.loc, err_msg) + } + } + + fn error(&self, message: &str) -> ParseError { + ParseError { + location: self.loc, + message: message.to_string(), + is_warning: false, + } + } + + // Match and consume an Imm64 immediate. + fn match_imm64(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as an Imm64 to check for overflow and other issues. + text.parse().map_err(|e| self.error(e)) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a hexadeximal immediate + fn match_hexadecimal_constant(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + text.parse().map_err(|e| { + self.error(&format!( + "expected hexadecimal immediate, failed to parse: {}", + e + )) + }) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume either a hexadecimal Uimm128 immediate (e.g. 0x000102...) or its literal list form (e.g. [0 1 2...]) + fn match_constant_data(&mut self, controlling_type: Type) -> ParseResult { + let expected_size = controlling_type.bytes() as usize; + let constant_data = if self.optional(Token::LBracket) { + // parse using a list of values, e.g. vconst.i32x4 [0 1 2 3] + let uimm128 = self.parse_literals_to_constant_data(controlling_type)?; + self.match_token(Token::RBracket, "expected a terminating right bracket")?; + uimm128 + } else { + // parse using a hexadecimal value, e.g. 0x000102... + let uimm128 = + self.match_hexadecimal_constant("expected an immediate hexadecimal operand")?; + uimm128.expand_to(expected_size) + }; + + if constant_data.len() == expected_size { + Ok(constant_data) + } else { + Err(self.error(&format!( + "expected parsed constant to have {} bytes", + expected_size + ))) + } + } + + // Match and consume a Uimm64 immediate. + fn match_uimm64(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as an Uimm64 to check for overflow and other issues. + text.parse() + .map_err(|_| self.error("expected u64 decimal immediate")) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a Uimm32 immediate. + fn match_uimm32(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as an Uimm32 to check for overflow and other issues. + text.parse().map_err(|e| self.error(e)) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a u8 immediate. + // This is used for lane numbers in SIMD vectors. + fn match_uimm8(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + if text.starts_with("0x") { + // Parse it as a u8 in hexadecimal form. + u8::from_str_radix(&text[2..], 16) + .map_err(|_| self.error("unable to parse u8 as a hexadecimal immediate")) + } else { + // Parse it as a u8 to check for overflow and other issues. + text.parse() + .map_err(|_| self.error("expected u8 decimal immediate")) + } + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a signed 16-bit immediate. + fn match_imm16(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as a i16 to check for overflow and other issues. + text.parse() + .map_err(|_| self.error("expected i16 decimal immediate")) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume an i32 immediate. + // This is used for stack argument byte offsets. + fn match_imm32(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as a i32 to check for overflow and other issues. + text.parse() + .map_err(|_| self.error("expected i32 decimal immediate")) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume an optional offset32 immediate. + // + // Note that this will match an empty string as an empty offset, and that if an offset is + // present, it must contain a sign. + fn optional_offset32(&mut self) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + if text.starts_with('+') || text.starts_with('-') { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as an `Offset32` to check for overflow and other issues. + return text.parse().map_err(|e| self.error(e)); + } + } + // An offset32 operand can be absent. + Ok(Offset32::new(0)) + } + + // Match and consume an optional offset32 immediate. + // + // Note that this will match an empty string as an empty offset, and that if an offset is + // present, it must contain a sign. + fn optional_offset_imm64(&mut self) -> ParseResult { + if let Some(Token::Integer(text)) = self.token() { + if text.starts_with('+') || text.starts_with('-') { + self.consume(); + // Lexer just gives us raw text that looks like an integer. + // Parse it as an `Offset32` to check for overflow and other issues. + return text.parse().map_err(|e| self.error(e)); + } + } + // If no explicit offset is present, the offset is 0. + Ok(Imm64::new(0)) + } + + // Match and consume an Ieee32 immediate. + fn match_ieee32(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Float(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like a float. + // Parse it as an Ieee32 to check for the right number of digits and other issues. + text.parse().map_err(|e| self.error(e)) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume an Ieee64 immediate. + fn match_ieee64(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Float(text)) = self.token() { + self.consume(); + // Lexer just gives us raw text that looks like a float. + // Parse it as an Ieee64 to check for the right number of digits and other issues. + text.parse().map_err(|e| self.error(e)) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a boolean immediate. + fn match_bool(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Identifier(text)) = self.token() { + self.consume(); + match text { + "true" => Ok(true), + "false" => Ok(false), + _ => err!(self.loc, err_msg), + } + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume an enumerated immediate, like one of the condition codes. + fn match_enum(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::Identifier(text)) = self.token() { + self.consume(); + text.parse().map_err(|_| self.error(err_msg)) + } else { + err!(self.loc, err_msg) + } + } + + // Match and a consume a possibly empty sequence of memory operation flags. + fn optional_memflags(&mut self) -> MemFlags { + let mut flags = MemFlags::new(); + while let Some(Token::Identifier(text)) = self.token() { + if flags.set_by_name(text) { + self.consume(); + } else { + break; + } + } + flags + } + + // Match and consume an identifier. + fn match_any_identifier(&mut self, err_msg: &str) -> ParseResult<&'a str> { + if let Some(Token::Identifier(text)) = self.token() { + self.consume(); + Ok(text) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a HexSequence that fits into a u16. + // This is used for instruction encodings. + fn match_hex16(&mut self, err_msg: &str) -> ParseResult { + if let Some(Token::HexSequence(bits_str)) = self.token() { + self.consume(); + // The only error we anticipate from this parse is overflow, the lexer should + // already have ensured that the string doesn't contain invalid characters, and + // isn't empty or negative. + u16::from_str_radix(bits_str, 16) + .map_err(|_| self.error("the hex sequence given overflows the u16 type")) + } else { + err!(self.loc, err_msg) + } + } + + // Match and consume a register unit either by number `%15` or by name `%rax`. + fn match_regunit(&mut self, isa: Option<&dyn TargetIsa>) -> ParseResult { + if let Some(Token::Name(name)) = self.token() { + self.consume(); + match isa { + Some(isa) => isa + .register_info() + .parse_regunit(name) + .ok_or_else(|| self.error("invalid register name")), + None => name + .parse() + .map_err(|_| self.error("invalid register number")), + } + } else { + match isa { + Some(isa) => err!(self.loc, "Expected {} register unit", isa.name()), + None => err!(self.loc, "Expected register unit number"), + } + } + } + + /// Parse an optional source location. + /// + /// Return an optional source location if no real location is present. + fn optional_srcloc(&mut self) -> ParseResult { + if let Some(Token::SourceLoc(text)) = self.token() { + match u32::from_str_radix(text, 16) { + Ok(num) => { + self.consume(); + Ok(ir::SourceLoc::new(num)) + } + Err(_) => return err!(self.loc, "invalid source location: {}", text), + } + } else { + Ok(Default::default()) + } + } + + /// Parse a list of literals (i.e. integers, floats, booleans); e.g. `0 1 2 3`, usually as + /// part of something like `vconst.i32x4 [0 1 2 3]`. + fn parse_literals_to_constant_data(&mut self, ty: Type) -> ParseResult { + macro_rules! consume { + ( $ty:ident, $match_fn:expr ) => {{ + assert!($ty.is_vector()); + let mut data = ConstantData::default(); + for _ in 0..$ty.lane_count() { + data = data.append($match_fn); + } + data + }}; + } + + fn boolean_to_vec(value: bool, ty: Type) -> Vec { + let lane_size = ty.bytes() / u32::from(ty.lane_count()); + if lane_size < 1 { + panic!("The boolean lane must have a byte size greater than zero."); + } + let mut buffer = vec![0; lane_size as usize]; + buffer[0] = if value { 1 } else { 0 }; + buffer + } + + if !ty.is_vector() { + err!(self.loc, "Expected a controlling vector type, not {}", ty) + } else { + let constant_data = match ty.lane_type() { + I8 => consume!(ty, self.match_uimm8("Expected an 8-bit unsigned integer")?), + I16 => consume!(ty, self.match_imm16("Expected a 16-bit integer")?), + I32 => consume!(ty, self.match_imm32("Expected a 32-bit integer")?), + I64 => consume!(ty, self.match_imm64("Expected a 64-bit integer")?), + F32 => consume!(ty, self.match_ieee32("Expected a 32-bit float")?), + F64 => consume!(ty, self.match_ieee64("Expected a 64-bit float")?), + b if b.is_bool() => consume!( + ty, + boolean_to_vec(self.match_bool("Expected a boolean")?, ty) + ), + _ => return err!(self.loc, "Expected a type of: float, int, bool"), + }; + Ok(constant_data) + } + } + + /// Parse a list of test command passes specified in command line. + pub fn parse_cmdline_passes(&mut self, passes: &'a [String]) -> Vec> { + let mut list = Vec::new(); + for pass in passes { + list.push(TestCommand::new(pass)); + } + list + } + + /// Parse a list of test commands. + pub fn parse_test_commands(&mut self) -> Vec> { + let mut list = Vec::new(); + while self.token() == Some(Token::Identifier("test")) { + list.push(TestCommand::new(self.consume_line())); + } + list + } + + /// Parse a target spec. + /// + /// Accept the target from the command line for pass command. + /// + fn parse_cmdline_target(&mut self, target_pass: Option<&str>) -> ParseResult { + // Were there any `target` commands specified? + let mut specified_target = false; + + let mut targets = Vec::new(); + let flag_builder = settings::builder(); + + if let Some(targ) = target_pass { + let loc = self.loc; + let triple = match Triple::from_str(targ) { + Ok(triple) => triple, + Err(err) => return err!(loc, err), + }; + let isa_builder = match isa::lookup(triple) { + Err(isa::LookupError::SupportDisabled) => { + return err!(loc, "support disabled target '{}'", targ); + } + Err(isa::LookupError::Unsupported) => { + return warn!(loc, "unsupported target '{}'", targ); + } + Ok(b) => b, + }; + specified_target = true; + + // Construct a trait object with the aggregate settings. + targets.push(isa_builder.finish(settings::Flags::new(flag_builder.clone()))); + } + + if !specified_target { + // No `target` commands. + Ok(isaspec::IsaSpec::None(settings::Flags::new(flag_builder))) + } else { + Ok(isaspec::IsaSpec::Some(targets)) + } + } + + /// Parse a list of target specs. + /// + /// Accept a mix of `target` and `set` command lines. The `set` commands are cumulative. + /// + fn parse_target_specs(&mut self) -> ParseResult { + // Were there any `target` commands? + let mut seen_target = false; + // Location of last `set` command since the last `target`. + let mut last_set_loc = None; + + let mut targets = Vec::new(); + let mut flag_builder = settings::builder(); + + while let Some(Token::Identifier(command)) = self.token() { + match command { + "set" => { + last_set_loc = Some(self.loc); + isaspec::parse_options( + self.consume_line().trim().split_whitespace(), + &mut flag_builder, + self.loc, + )?; + } + "target" => { + let loc = self.loc; + // Grab the whole line so the lexer won't go looking for tokens on the + // following lines. + let mut words = self.consume_line().trim().split_whitespace(); + // Look for `target foo`. + let target_name = match words.next() { + Some(w) => w, + None => return err!(loc, "expected target triple"), + }; + let triple = match Triple::from_str(target_name) { + Ok(triple) => triple, + Err(err) => return err!(loc, err), + }; + let mut isa_builder = match isa::lookup(triple) { + Err(isa::LookupError::SupportDisabled) => { + continue; + } + Err(isa::LookupError::Unsupported) => { + return warn!(loc, "unsupported target '{}'", target_name); + } + Ok(b) => b, + }; + last_set_loc = None; + seen_target = true; + // Apply the target-specific settings to `isa_builder`. + isaspec::parse_options(words, &mut isa_builder, self.loc)?; + + // Construct a trait object with the aggregate settings. + targets.push(isa_builder.finish(settings::Flags::new(flag_builder.clone()))); + } + _ => break, + } + } + + if !seen_target { + // No `target` commands, but we allow for `set` commands. + Ok(isaspec::IsaSpec::None(settings::Flags::new(flag_builder))) + } else if let Some(loc) = last_set_loc { + err!( + loc, + "dangling 'set' command after ISA specification has no effect." + ) + } else { + Ok(isaspec::IsaSpec::Some(targets)) + } + } + + /// Parse a list of expected features that Cranelift should be compiled with, or without. + pub fn parse_cranelift_features(&mut self) -> ParseResult>> { + let mut list = Vec::new(); + while self.token() == Some(Token::Identifier("feature")) { + self.consume(); + let has = !self.optional(Token::Not); + match (self.token(), has) { + (Some(Token::String(flag)), true) => list.push(Feature::With(flag)), + (Some(Token::String(flag)), false) => list.push(Feature::Without(flag)), + (tok, _) => { + return err!( + self.loc, + format!("Expected feature flag string, got {:?}", tok) + ) + } + } + self.consume(); + } + Ok(list) + } + + /// Parse a list of function definitions. + /// + /// This is the top-level parse function matching the whole contents of a file. + pub fn parse_function_list( + &mut self, + unique_isa: Option<&dyn TargetIsa>, + ) -> ParseResult)>> { + let mut list = Vec::new(); + while self.token().is_some() { + list.push(self.parse_function(unique_isa)?); + } + if let Some(err) = self.lex_error { + return match err { + LexError::InvalidChar => err!(self.loc, "invalid character"), + }; + } + Ok(list) + } + + // Parse a whole function definition. + // + // function ::= * "function" name signature "{" preamble function-body "}" + // + fn parse_function( + &mut self, + unique_isa: Option<&dyn TargetIsa>, + ) -> ParseResult<(Function, Details<'a>)> { + // Begin gathering comments. + // Make sure we don't include any comments before the `function` keyword. + self.token(); + debug_assert!(self.comments.is_empty()); + self.start_gathering_comments(); + + self.match_identifier("function", "expected 'function'")?; + + let location = self.loc; + + // function ::= "function" * name signature "{" preamble function-body "}" + let name = self.parse_external_name()?; + + // function ::= "function" name * signature "{" preamble function-body "}" + let sig = self.parse_signature(unique_isa)?; + + let mut ctx = Context::new(Function::with_name_signature(name, sig), unique_isa); + + // function ::= "function" name signature * "{" preamble function-body "}" + self.match_token(Token::LBrace, "expected '{' before function body")?; + + self.token(); + self.claim_gathered_comments(AnyEntity::Function); + + // function ::= "function" name signature "{" * preamble function-body "}" + self.parse_preamble(&mut ctx)?; + // function ::= "function" name signature "{" preamble * function-body "}" + self.parse_function_body(&mut ctx)?; + // function ::= "function" name signature "{" preamble function-body * "}" + self.match_token(Token::RBrace, "expected '}' after function body")?; + + // Collect any comments following the end of the function, then stop gathering comments. + self.start_gathering_comments(); + self.token(); + self.claim_gathered_comments(AnyEntity::Function); + + let details = Details { + location, + comments: self.take_comments(), + map: ctx.map, + }; + + Ok((ctx.function, details)) + } + + // Parse an external name. + // + // For example, in a function decl, the parser would be in this state: + // + // function ::= "function" * name signature { ... } + // + fn parse_external_name(&mut self) -> ParseResult { + match self.token() { + Some(Token::Name(s)) => { + self.consume(); + s.parse() + .map_err(|_| self.error("invalid test case or libcall name")) + } + Some(Token::UserRef(namespace)) => { + self.consume(); + match self.token() { + Some(Token::Colon) => { + self.consume(); + match self.token() { + Some(Token::Integer(index_str)) => { + let index: u32 = + u32::from_str_radix(index_str, 10).map_err(|_| { + self.error("the integer given overflows the u32 type") + })?; + self.consume(); + Ok(ExternalName::user(namespace, index)) + } + _ => err!(self.loc, "expected integer"), + } + } + _ => err!(self.loc, "expected colon"), + } + } + _ => err!(self.loc, "expected external name"), + } + } + + // Parse a function signature. + // + // signature ::= * "(" [paramlist] ")" ["->" retlist] [callconv] + // + fn parse_signature(&mut self, unique_isa: Option<&dyn TargetIsa>) -> ParseResult { + // Calling convention defaults to `fast`, but can be changed. + let mut sig = Signature::new(self.default_calling_convention); + + self.match_token(Token::LPar, "expected function signature: ( args... )")?; + // signature ::= "(" * [abi-param-list] ")" ["->" retlist] [callconv] + if self.token() != Some(Token::RPar) { + sig.params = self.parse_abi_param_list(unique_isa)?; + } + self.match_token(Token::RPar, "expected ')' after function arguments")?; + if self.optional(Token::Arrow) { + sig.returns = self.parse_abi_param_list(unique_isa)?; + } + + // The calling convention is optional. + if let Some(Token::Identifier(text)) = self.token() { + match text.parse() { + Ok(cc) => { + self.consume(); + sig.call_conv = cc; + } + _ => return err!(self.loc, "unknown calling convention: {}", text), + } + } + + Ok(sig) + } + + // Parse list of function parameter / return value types. + // + // paramlist ::= * param { "," param } + // + fn parse_abi_param_list( + &mut self, + unique_isa: Option<&dyn TargetIsa>, + ) -> ParseResult> { + let mut list = Vec::new(); + + // abi-param-list ::= * abi-param { "," abi-param } + list.push(self.parse_abi_param(unique_isa)?); + + // abi-param-list ::= abi-param * { "," abi-param } + while self.optional(Token::Comma) { + // abi-param-list ::= abi-param { "," * abi-param } + list.push(self.parse_abi_param(unique_isa)?); + } + + Ok(list) + } + + // Parse a single argument type with flags. + fn parse_abi_param(&mut self, unique_isa: Option<&dyn TargetIsa>) -> ParseResult { + // abi-param ::= * type { flag } [ argumentloc ] + let mut arg = AbiParam::new(self.match_type("expected parameter type")?); + + // abi-param ::= type * { flag } [ argumentloc ] + while let Some(Token::Identifier(s)) = self.token() { + match s { + "uext" => arg.extension = ArgumentExtension::Uext, + "sext" => arg.extension = ArgumentExtension::Sext, + _ => { + if let Ok(purpose) = s.parse() { + arg.purpose = purpose; + } else { + break; + } + } + } + self.consume(); + } + + // abi-param ::= type { flag } * [ argumentloc ] + arg.location = self.parse_argument_location(unique_isa)?; + + Ok(arg) + } + + // Parse an argument location specifier; either a register or a byte offset into the stack. + fn parse_argument_location( + &mut self, + unique_isa: Option<&dyn TargetIsa>, + ) -> ParseResult { + // argumentloc ::= '[' regname | uimm32 ']' + if self.optional(Token::LBracket) { + let result = match self.token() { + Some(Token::Name(name)) => { + self.consume(); + if let Some(isa) = unique_isa { + isa.register_info() + .parse_regunit(name) + .map(ArgumentLoc::Reg) + .ok_or_else(|| self.error("invalid register name")) + } else { + err!(self.loc, "argument location requires exactly one isa") + } + } + Some(Token::Integer(_)) => { + let offset = self.match_imm32("expected stack argument byte offset")?; + Ok(ArgumentLoc::Stack(offset)) + } + Some(Token::Minus) => { + self.consume(); + Ok(ArgumentLoc::Unassigned) + } + _ => err!(self.loc, "expected argument location"), + }; + + self.match_token( + Token::RBracket, + "expected ']' to end argument location annotation", + )?; + + result + } else { + Ok(ArgumentLoc::Unassigned) + } + } + + // Parse the function preamble. + // + // preamble ::= * { preamble-decl } + // preamble-decl ::= * stack-slot-decl + // * function-decl + // * signature-decl + // * jump-table-decl + // + // The parsed decls are added to `ctx` rather than returned. + fn parse_preamble(&mut self, ctx: &mut Context) -> ParseResult<()> { + loop { + match self.token() { + Some(Token::StackSlot(..)) => { + self.start_gathering_comments(); + let loc = self.loc; + self.parse_stack_slot_decl() + .and_then(|(ss, dat)| ctx.add_ss(ss, dat, loc)) + } + Some(Token::GlobalValue(..)) => { + self.start_gathering_comments(); + self.parse_global_value_decl() + .and_then(|(gv, dat)| ctx.add_gv(gv, dat, self.loc)) + } + Some(Token::Heap(..)) => { + self.start_gathering_comments(); + self.parse_heap_decl() + .and_then(|(heap, dat)| ctx.add_heap(heap, dat, self.loc)) + } + Some(Token::Table(..)) => { + self.start_gathering_comments(); + self.parse_table_decl() + .and_then(|(table, dat)| ctx.add_table(table, dat, self.loc)) + } + Some(Token::SigRef(..)) => { + self.start_gathering_comments(); + self.parse_signature_decl(ctx.unique_isa) + .and_then(|(sig, dat)| { + ctx.add_sig(sig, dat, self.loc, self.default_calling_convention) + }) + } + Some(Token::FuncRef(..)) => { + self.start_gathering_comments(); + self.parse_function_decl(ctx) + .and_then(|(fn_, dat)| ctx.add_fn(fn_, dat, self.loc)) + } + Some(Token::JumpTable(..)) => { + self.start_gathering_comments(); + self.parse_jump_table_decl() + .and_then(|(jt, dat)| ctx.add_jt(jt, dat, self.loc)) + } + // More to come.. + _ => return Ok(()), + }?; + } + } + + // Parse a stack slot decl. + // + // stack-slot-decl ::= * StackSlot(ss) "=" stack-slot-kind Bytes {"," stack-slot-flag} + // stack-slot-kind ::= "explicit_slot" + // | "spill_slot" + // | "incoming_arg" + // | "outgoing_arg" + fn parse_stack_slot_decl(&mut self) -> ParseResult<(StackSlot, StackSlotData)> { + let ss = self.match_ss("expected stack slot number: ss«n»")?; + self.match_token(Token::Equal, "expected '=' in stack slot declaration")?; + let kind = self.match_enum("expected stack slot kind")?; + + // stack-slot-decl ::= StackSlot(ss) "=" stack-slot-kind * Bytes {"," stack-slot-flag} + let bytes: i64 = self + .match_imm64("expected byte-size in stack_slot decl")? + .into(); + if bytes < 0 { + return err!(self.loc, "negative stack slot size"); + } + if bytes > i64::from(u32::MAX) { + return err!(self.loc, "stack slot too large"); + } + let mut data = StackSlotData::new(kind, bytes as u32); + + // Take additional options. + while self.optional(Token::Comma) { + match self.match_any_identifier("expected stack slot flags")? { + "offset" => data.offset = Some(self.match_imm32("expected byte offset")?), + other => return err!(self.loc, "Unknown stack slot flag '{}'", other), + } + } + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(ss); + + // TBD: stack-slot-decl ::= StackSlot(ss) "=" stack-slot-kind Bytes * {"," stack-slot-flag} + Ok((ss, data)) + } + + // Parse a global value decl. + // + // global-val-decl ::= * GlobalValue(gv) "=" global-val-desc + // global-val-desc ::= "vmctx" + // | "load" "." type "notrap" "aligned" GlobalValue(base) [offset] + // | "iadd_imm" "(" GlobalValue(base) ")" imm64 + // | "symbol" ["colocated"] name + imm64 + // + fn parse_global_value_decl(&mut self) -> ParseResult<(GlobalValue, GlobalValueData)> { + let gv = self.match_gv("expected global value number: gv«n»")?; + + self.match_token(Token::Equal, "expected '=' in global value declaration")?; + + let data = match self.match_any_identifier("expected global value kind")? { + "vmctx" => GlobalValueData::VMContext, + "load" => { + self.match_token( + Token::Dot, + "expected '.' followed by type in load global value decl", + )?; + let global_type = self.match_type("expected load type")?; + let flags = self.optional_memflags(); + let base = self.match_gv("expected global value: gv«n»")?; + let offset = self.optional_offset32()?; + + if !(flags.notrap() && flags.aligned()) { + return err!(self.loc, "global-value load must be notrap and aligned"); + } + GlobalValueData::Load { + base, + offset, + global_type, + readonly: flags.readonly(), + } + } + "iadd_imm" => { + self.match_token( + Token::Dot, + "expected '.' followed by type in iadd_imm global value decl", + )?; + let global_type = self.match_type("expected iadd type")?; + let base = self.match_gv("expected global value: gv«n»")?; + self.match_token( + Token::Comma, + "expected ',' followed by rhs in iadd_imm global value decl", + )?; + let offset = self.match_imm64("expected iadd_imm immediate")?; + GlobalValueData::IAddImm { + base, + offset, + global_type, + } + } + "symbol" => { + let colocated = self.optional(Token::Identifier("colocated")); + let tls = self.optional(Token::Identifier("tls")); + let name = self.parse_external_name()?; + let offset = self.optional_offset_imm64()?; + GlobalValueData::Symbol { + name, + offset, + colocated, + tls, + } + } + other => return err!(self.loc, "Unknown global value kind '{}'", other), + }; + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(gv); + + Ok((gv, data)) + } + + // Parse a heap decl. + // + // heap-decl ::= * Heap(heap) "=" heap-desc + // heap-desc ::= heap-style heap-base { "," heap-attr } + // heap-style ::= "static" | "dynamic" + // heap-base ::= GlobalValue(base) + // heap-attr ::= "min" Imm64(bytes) + // | "bound" Imm64(bytes) + // | "offset_guard" Imm64(bytes) + // | "index_type" type + // + fn parse_heap_decl(&mut self) -> ParseResult<(Heap, HeapData)> { + let heap = self.match_heap("expected heap number: heap«n»")?; + self.match_token(Token::Equal, "expected '=' in heap declaration")?; + + let style_name = self.match_any_identifier("expected 'static' or 'dynamic'")?; + + // heap-desc ::= heap-style * heap-base { "," heap-attr } + // heap-base ::= * GlobalValue(base) + let base = match self.token() { + Some(Token::GlobalValue(base_num)) => match GlobalValue::with_number(base_num) { + Some(gv) => gv, + None => return err!(self.loc, "invalid global value number for heap base"), + }, + _ => return err!(self.loc, "expected heap base"), + }; + self.consume(); + + let mut data = HeapData { + base, + min_size: 0.into(), + offset_guard_size: 0.into(), + style: HeapStyle::Static { bound: 0.into() }, + index_type: ir::types::I32, + }; + + // heap-desc ::= heap-style heap-base * { "," heap-attr } + while self.optional(Token::Comma) { + match self.match_any_identifier("expected heap attribute name")? { + "min" => { + data.min_size = self.match_uimm64("expected integer min size")?; + } + "bound" => { + data.style = match style_name { + "dynamic" => HeapStyle::Dynamic { + bound_gv: self.match_gv("expected gv bound")?, + }, + "static" => HeapStyle::Static { + bound: self.match_uimm64("expected integer bound")?, + }, + t => return err!(self.loc, "unknown heap style '{}'", t), + }; + } + "offset_guard" => { + data.offset_guard_size = + self.match_uimm64("expected integer offset-guard size")?; + } + "index_type" => { + data.index_type = self.match_type("expected index type")?; + } + t => return err!(self.loc, "unknown heap attribute '{}'", t), + } + } + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(heap); + + Ok((heap, data)) + } + + // Parse a table decl. + // + // table-decl ::= * Table(table) "=" table-desc + // table-desc ::= table-style table-base { "," table-attr } + // table-style ::= "dynamic" + // table-base ::= GlobalValue(base) + // table-attr ::= "min" Imm64(bytes) + // | "bound" Imm64(bytes) + // | "element_size" Imm64(bytes) + // | "index_type" type + // + fn parse_table_decl(&mut self) -> ParseResult<(Table, TableData)> { + let table = self.match_table("expected table number: table«n»")?; + self.match_token(Token::Equal, "expected '=' in table declaration")?; + + let style_name = self.match_any_identifier("expected 'static' or 'dynamic'")?; + + // table-desc ::= table-style * table-base { "," table-attr } + // table-base ::= * GlobalValue(base) + let base = match self.token() { + Some(Token::GlobalValue(base_num)) => match GlobalValue::with_number(base_num) { + Some(gv) => gv, + None => return err!(self.loc, "invalid global value number for table base"), + }, + _ => return err!(self.loc, "expected table base"), + }; + self.consume(); + + let mut data = TableData { + base_gv: base, + min_size: 0.into(), + bound_gv: GlobalValue::reserved_value(), + element_size: 0.into(), + index_type: ir::types::I32, + }; + + // table-desc ::= * { "," table-attr } + while self.optional(Token::Comma) { + match self.match_any_identifier("expected table attribute name")? { + "min" => { + data.min_size = self.match_uimm64("expected integer min size")?; + } + "bound" => { + data.bound_gv = match style_name { + "dynamic" => self.match_gv("expected gv bound")?, + t => return err!(self.loc, "unknown table style '{}'", t), + }; + } + "element_size" => { + data.element_size = self.match_uimm64("expected integer element size")?; + } + "index_type" => { + data.index_type = self.match_type("expected index type")?; + } + t => return err!(self.loc, "unknown table attribute '{}'", t), + } + } + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(table); + + Ok((table, data)) + } + + // Parse a signature decl. + // + // signature-decl ::= SigRef(sigref) "=" signature + // + fn parse_signature_decl( + &mut self, + unique_isa: Option<&dyn TargetIsa>, + ) -> ParseResult<(SigRef, Signature)> { + let sig = self.match_sig("expected signature number: sig«n»")?; + self.match_token(Token::Equal, "expected '=' in signature decl")?; + let data = self.parse_signature(unique_isa)?; + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(sig); + + Ok((sig, data)) + } + + // Parse a function decl. + // + // Two variants: + // + // function-decl ::= FuncRef(fnref) "=" ["colocated"]" name function-decl-sig + // function-decl-sig ::= SigRef(sig) | signature + // + // The first variant allocates a new signature reference. The second references an existing + // signature which must be declared first. + // + fn parse_function_decl(&mut self, ctx: &mut Context) -> ParseResult<(FuncRef, ExtFuncData)> { + let fn_ = self.match_fn("expected function number: fn«n»")?; + self.match_token(Token::Equal, "expected '=' in function decl")?; + + let loc = self.loc; + + // function-decl ::= FuncRef(fnref) "=" * ["colocated"] name function-decl-sig + let colocated = self.optional(Token::Identifier("colocated")); + + // function-decl ::= FuncRef(fnref) "=" ["colocated"] * name function-decl-sig + let name = self.parse_external_name()?; + + // function-decl ::= FuncRef(fnref) "=" ["colocated"] name * function-decl-sig + let data = match self.token() { + Some(Token::LPar) => { + // function-decl ::= FuncRef(fnref) "=" ["colocated"] name * signature + let sig = self.parse_signature(ctx.unique_isa)?; + let sigref = ctx.function.import_signature(sig); + ctx.map + .def_entity(sigref.into(), loc) + .expect("duplicate SigRef entities created"); + ExtFuncData { + name, + signature: sigref, + colocated, + } + } + Some(Token::SigRef(sig_src)) => { + let sig = match SigRef::with_number(sig_src) { + None => { + return err!(self.loc, "attempted to use invalid signature ss{}", sig_src); + } + Some(sig) => sig, + }; + ctx.check_sig(sig, self.loc)?; + self.consume(); + ExtFuncData { + name, + signature: sig, + colocated, + } + } + _ => return err!(self.loc, "expected 'function' or sig«n» in function decl"), + }; + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(fn_); + + Ok((fn_, data)) + } + + // Parse a jump table decl. + // + // jump-table-decl ::= * JumpTable(jt) "=" "jump_table" "[" jt-entry {"," jt-entry} "]" + fn parse_jump_table_decl(&mut self) -> ParseResult<(JumpTable, JumpTableData)> { + let jt = self.match_jt()?; + self.match_token(Token::Equal, "expected '=' in jump_table decl")?; + self.match_identifier("jump_table", "expected 'jump_table'")?; + self.match_token(Token::LBracket, "expected '[' before jump table contents")?; + + let mut data = JumpTableData::new(); + + // jump-table-decl ::= JumpTable(jt) "=" "jump_table" "[" * Block(dest) {"," Block(dest)} "]" + match self.token() { + Some(Token::Block(dest)) => { + self.consume(); + data.push_entry(dest); + + loop { + match self.token() { + Some(Token::Comma) => { + self.consume(); + if let Some(Token::Block(dest)) = self.token() { + self.consume(); + data.push_entry(dest); + } else { + return err!(self.loc, "expected jump_table entry"); + } + } + Some(Token::RBracket) => break, + _ => return err!(self.loc, "expected ']' after jump table contents"), + } + } + } + Some(Token::RBracket) => (), + _ => return err!(self.loc, "expected jump_table entry"), + } + + self.consume(); + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(jt); + + Ok((jt, data)) + } + + // Parse a function body, add contents to `ctx`. + // + // function-body ::= * { extended-basic-block } + // + fn parse_function_body(&mut self, ctx: &mut Context) -> ParseResult<()> { + while self.token() != Some(Token::RBrace) { + self.parse_basic_block(ctx)?; + } + + // Now that we've seen all defined values in the function, ensure that + // all references refer to a definition. + for block in &ctx.function.layout { + for inst in ctx.function.layout.block_insts(block) { + for value in ctx.function.dfg.inst_args(inst) { + if !ctx.map.contains_value(*value) { + return err!( + ctx.map.location(AnyEntity::Inst(inst)).unwrap(), + "undefined operand value {}", + value + ); + } + } + } + } + + for alias in &ctx.aliases { + if !ctx.function.dfg.set_alias_type_for_parser(*alias) { + let loc = ctx.map.location(AnyEntity::Value(*alias)).unwrap(); + return err!(loc, "alias cycle involving {}", alias); + } + } + + Ok(()) + } + + // Parse a basic block, add contents to `ctx`. + // + // extended-basic-block ::= * block-header { instruction } + // block-header ::= Block(block) [block-params] ":" + // + fn parse_basic_block(&mut self, ctx: &mut Context) -> ParseResult<()> { + // Collect comments for the next block. + self.start_gathering_comments(); + + let block_num = self.match_block("expected block header")?; + let block = ctx.add_block(block_num, self.loc)?; + + if block_num.as_u32() >= MAX_BLOCKS_IN_A_FUNCTION { + return Err(self.error("too many blocks")); + } + + if !self.optional(Token::Colon) { + // block-header ::= Block(block) [ * block-params ] ":" + self.parse_block_params(ctx, block)?; + self.match_token(Token::Colon, "expected ':' after block parameters")?; + } + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(block); + + // extended-basic-block ::= block-header * { instruction } + while match self.token() { + Some(Token::Value(_)) + | Some(Token::Identifier(_)) + | Some(Token::LBracket) + | Some(Token::SourceLoc(_)) => true, + _ => false, + } { + let srcloc = self.optional_srcloc()?; + let (encoding, result_locations) = self.parse_instruction_encoding(ctx)?; + + // We need to parse instruction results here because they are shared + // between the parsing of value aliases and the parsing of instructions. + // + // inst-results ::= Value(v) { "," Value(v) } + let results = self.parse_inst_results()?; + + for result in &results { + while ctx.function.dfg.num_values() <= result.index() { + ctx.function.dfg.make_invalid_value_for_parser(); + } + } + + match self.token() { + Some(Token::Arrow) => { + self.consume(); + self.parse_value_alias(&results, ctx)?; + } + Some(Token::Equal) => { + self.consume(); + self.parse_instruction( + &results, + srcloc, + encoding, + result_locations, + ctx, + block, + )?; + } + _ if !results.is_empty() => return err!(self.loc, "expected -> or ="), + _ => self.parse_instruction( + &results, + srcloc, + encoding, + result_locations, + ctx, + block, + )?, + } + } + + Ok(()) + } + + // Parse parenthesized list of block parameters. Returns a vector of (u32, Type) pairs with the + // value numbers of the defined values and the defined types. + // + // block-params ::= * "(" block-param { "," block-param } ")" + fn parse_block_params(&mut self, ctx: &mut Context, block: Block) -> ParseResult<()> { + // block-params ::= * "(" block-param { "," block-param } ")" + self.match_token(Token::LPar, "expected '(' before block parameters")?; + + // block-params ::= "(" * block-param { "," block-param } ")" + self.parse_block_param(ctx, block)?; + + // block-params ::= "(" block-param * { "," block-param } ")" + while self.optional(Token::Comma) { + // block-params ::= "(" block-param { "," * block-param } ")" + self.parse_block_param(ctx, block)?; + } + + // block-params ::= "(" block-param { "," block-param } * ")" + self.match_token(Token::RPar, "expected ')' after block parameters")?; + + Ok(()) + } + + // Parse a single block parameter declaration, and append it to `block`. + // + // block-param ::= * Value(v) ":" Type(t) arg-loc? + // arg-loc ::= "[" value-location "]" + // + fn parse_block_param(&mut self, ctx: &mut Context, block: Block) -> ParseResult<()> { + // block-param ::= * Value(v) ":" Type(t) arg-loc? + let v = self.match_value("block argument must be a value")?; + let v_location = self.loc; + // block-param ::= Value(v) * ":" Type(t) arg-loc? + self.match_token(Token::Colon, "expected ':' after block argument")?; + // block-param ::= Value(v) ":" * Type(t) arg-loc? + + while ctx.function.dfg.num_values() <= v.index() { + ctx.function.dfg.make_invalid_value_for_parser(); + } + + let t = self.match_type("expected block argument type")?; + // Allocate the block argument. + ctx.function.dfg.append_block_param_for_parser(block, t, v); + ctx.map.def_value(v, v_location)?; + + // block-param ::= Value(v) ":" Type(t) * arg-loc? + if self.optional(Token::LBracket) { + let loc = self.parse_value_location(ctx)?; + ctx.function.locations[v] = loc; + self.match_token(Token::RBracket, "expected ']' after value location")?; + } + + Ok(()) + } + + fn parse_value_location(&mut self, ctx: &Context) -> ParseResult { + match self.token() { + Some(Token::StackSlot(src_num)) => { + self.consume(); + let ss = match StackSlot::with_number(src_num) { + None => { + return err!( + self.loc, + "attempted to use invalid stack slot ss{}", + src_num + ); + } + Some(ss) => ss, + }; + ctx.check_ss(ss, self.loc)?; + Ok(ValueLoc::Stack(ss)) + } + Some(Token::Name(name)) => { + self.consume(); + if let Some(isa) = ctx.unique_isa { + isa.register_info() + .parse_regunit(name) + .map(ValueLoc::Reg) + .ok_or_else(|| self.error("invalid register value location")) + } else { + err!(self.loc, "value location requires exactly one isa") + } + } + Some(Token::Minus) => { + self.consume(); + Ok(ValueLoc::Unassigned) + } + _ => err!(self.loc, "invalid value location"), + } + } + + fn parse_instruction_encoding( + &mut self, + ctx: &Context, + ) -> ParseResult<(Option, Option>)> { + let (mut encoding, mut result_locations) = (None, None); + + // encoding ::= "[" encoding_literal result_locations "]" + if self.optional(Token::LBracket) { + // encoding_literal ::= "-" | Identifier HexSequence + if !self.optional(Token::Minus) { + let recipe = self.match_any_identifier("expected instruction encoding or '-'")?; + let bits = self.match_hex16("expected a hex sequence")?; + + if let Some(recipe_index) = ctx.find_recipe_index(recipe) { + encoding = Some(Encoding::new(recipe_index, bits)); + } else if ctx.unique_isa.is_some() { + return err!(self.loc, "invalid instruction recipe"); + } else { + // We allow encodings to be specified when there's no unique ISA purely + // for convenience, eg when copy-pasting code for a test. + } + } + + // result_locations ::= ("," ( "-" | names ) )? + // names ::= Name { "," Name } + if self.optional(Token::Comma) { + let mut results = Vec::new(); + + results.push(self.parse_value_location(ctx)?); + while self.optional(Token::Comma) { + results.push(self.parse_value_location(ctx)?); + } + + result_locations = Some(results); + } + + self.match_token( + Token::RBracket, + "expected ']' to terminate instruction encoding", + )?; + } + + Ok((encoding, result_locations)) + } + + // Parse instruction results and return them. + // + // inst-results ::= Value(v) { "," Value(v) } + // + fn parse_inst_results(&mut self) -> ParseResult> { + // Result value numbers. + let mut results = Vec::new(); + + // instruction ::= * [inst-results "="] Opcode(opc) ["." Type] ... + // inst-results ::= * Value(v) { "," Value(v) } + if let Some(Token::Value(v)) = self.token() { + self.consume(); + + results.push(v); + + // inst-results ::= Value(v) * { "," Value(v) } + while self.optional(Token::Comma) { + // inst-results ::= Value(v) { "," * Value(v) } + results.push(self.match_value("expected result value")?); + } + } + + Ok(results) + } + + // Parse a value alias, and append it to `block`. + // + // value_alias ::= [inst-results] "->" Value(v) + // + fn parse_value_alias(&mut self, results: &[Value], ctx: &mut Context) -> ParseResult<()> { + if results.len() != 1 { + return err!(self.loc, "wrong number of aliases"); + } + let result = results[0]; + let dest = self.match_value("expected value alias")?; + + // Allow duplicate definitions of aliases, as long as they are identical. + if ctx.map.contains_value(result) { + if let Some(old) = ctx.function.dfg.value_alias_dest_for_serialization(result) { + if old != dest { + return err!( + self.loc, + "value {} is already defined as an alias with destination {}", + result, + old + ); + } + } else { + return err!(self.loc, "value {} is already defined"); + } + } else { + ctx.map.def_value(result, self.loc)?; + } + + if !ctx.map.contains_value(dest) { + return err!(self.loc, "value {} is not yet defined", dest); + } + + ctx.function + .dfg + .make_value_alias_for_serialization(dest, result); + + ctx.aliases.push(result); + Ok(()) + } + + // Parse an instruction, append it to `block`. + // + // instruction ::= [inst-results "="] Opcode(opc) ["." Type] ... + // + fn parse_instruction( + &mut self, + results: &[Value], + srcloc: ir::SourceLoc, + encoding: Option, + result_locations: Option>, + ctx: &mut Context, + block: Block, + ) -> ParseResult<()> { + // Define the result values. + for val in results { + ctx.map.def_value(*val, self.loc)?; + } + + // Collect comments for the next instruction. + self.start_gathering_comments(); + + // instruction ::= [inst-results "="] * Opcode(opc) ["." Type] ... + let opcode = if let Some(Token::Identifier(text)) = self.token() { + match text.parse() { + Ok(opc) => opc, + Err(msg) => return err!(self.loc, "{}: '{}'", msg, text), + } + } else { + return err!(self.loc, "expected instruction opcode"); + }; + let opcode_loc = self.loc; + self.consume(); + + // Look for a controlling type variable annotation. + // instruction ::= [inst-results "="] Opcode(opc) * ["." Type] ... + let explicit_ctrl_type = if self.optional(Token::Dot) { + Some(self.match_type("expected type after 'opcode.'")?) + } else { + None + }; + + // instruction ::= [inst-results "="] Opcode(opc) ["." Type] * ... + let inst_data = self.parse_inst_operands(ctx, opcode, explicit_ctrl_type)?; + + // We're done parsing the instruction now. + // + // We still need to check that the number of result values in the source matches the opcode + // or function call signature. We also need to create values with the right type for all + // the instruction results. + let ctrl_typevar = self.infer_typevar(ctx, opcode, explicit_ctrl_type, &inst_data)?; + let inst = ctx.function.dfg.make_inst(inst_data); + let num_results = + ctx.function + .dfg + .make_inst_results_for_parser(inst, ctrl_typevar, results); + ctx.function.layout.append_inst(inst, block); + ctx.map + .def_entity(inst.into(), opcode_loc) + .expect("duplicate inst references created"); + + if !srcloc.is_default() { + ctx.function.srclocs[inst] = srcloc; + } + + if let Some(encoding) = encoding { + ctx.function.encodings[inst] = encoding; + } + + if results.len() != num_results { + return err!( + self.loc, + "instruction produces {} result values, {} given", + num_results, + results.len() + ); + } + + if let Some(ref result_locations) = result_locations { + if results.len() != result_locations.len() { + return err!( + self.loc, + "instruction produces {} result values, but {} locations were \ + specified", + results.len(), + result_locations.len() + ); + } + } + + if let Some(result_locations) = result_locations { + for (&value, loc) in ctx + .function + .dfg + .inst_results(inst) + .iter() + .zip(result_locations) + { + ctx.function.locations[value] = loc; + } + } + + // Collect any trailing comments. + self.token(); + self.claim_gathered_comments(inst); + + Ok(()) + } + + // Type inference for polymorphic instructions. + // + // The controlling type variable can be specified explicitly as 'splat.i32x4 v5', or it can be + // inferred from `inst_data.typevar_operand` for some opcodes. + // + // Returns the controlling typevar for a polymorphic opcode, or `INVALID` for a non-polymorphic + // opcode. + fn infer_typevar( + &self, + ctx: &Context, + opcode: Opcode, + explicit_ctrl_type: Option, + inst_data: &InstructionData, + ) -> ParseResult { + let constraints = opcode.constraints(); + let ctrl_type = match explicit_ctrl_type { + Some(t) => t, + None => { + if constraints.use_typevar_operand() { + // This is an opcode that supports type inference, AND there was no + // explicit type specified. Look up `ctrl_value` to see if it was defined + // already. + // TBD: If it is defined in another block, the type should have been + // specified explicitly. It is unfortunate that the correctness of IR + // depends on the layout of the blocks. + let ctrl_src_value = inst_data + .typevar_operand(&ctx.function.dfg.value_lists) + .expect("Constraints <-> Format inconsistency"); + if !ctx.map.contains_value(ctrl_src_value) { + return err!( + self.loc, + "type variable required for polymorphic opcode, e.g. '{}.{}'; \ + can't infer from {} which is not yet defined", + opcode, + constraints.ctrl_typeset().unwrap().example(), + ctrl_src_value + ); + } + if !ctx.function.dfg.value_is_valid_for_parser(ctrl_src_value) { + return err!( + self.loc, + "type variable required for polymorphic opcode, e.g. '{}.{}'; \ + can't infer from {} which is not yet resolved", + opcode, + constraints.ctrl_typeset().unwrap().example(), + ctrl_src_value + ); + } + ctx.function.dfg.value_type(ctrl_src_value) + } else if constraints.is_polymorphic() { + // This opcode does not support type inference, so the explicit type + // variable is required. + return err!( + self.loc, + "type variable required for polymorphic opcode, e.g. '{}.{}'", + opcode, + constraints.ctrl_typeset().unwrap().example() + ); + } else { + // This is a non-polymorphic opcode. No typevar needed. + INVALID + } + } + }; + + // Verify that `ctrl_type` is valid for the controlling type variable. We don't want to + // attempt deriving types from an incorrect basis. + // This is not a complete type check. The verifier does that. + if let Some(typeset) = constraints.ctrl_typeset() { + // This is a polymorphic opcode. + if !typeset.contains(ctrl_type) { + return err!( + self.loc, + "{} is not a valid typevar for {}", + ctrl_type, + opcode + ); + } + // Treat it as a syntax error to specify a typevar on a non-polymorphic opcode. + } else if ctrl_type != INVALID { + return err!(self.loc, "{} does not take a typevar", opcode); + } + + Ok(ctrl_type) + } + + // Parse comma-separated value list into a VariableArgs struct. + // + // value_list ::= [ value { "," value } ] + // + fn parse_value_list(&mut self) -> ParseResult { + let mut args = VariableArgs::new(); + + if let Some(Token::Value(v)) = self.token() { + args.push(v); + self.consume(); + } else { + return Ok(args); + } + + while self.optional(Token::Comma) { + args.push(self.match_value("expected value in argument list")?); + } + + Ok(args) + } + + fn parse_value_sequence(&mut self) -> ParseResult { + let mut args = VariableArgs::new(); + + if let Some(Token::Value(v)) = self.token() { + args.push(v); + self.consume(); + } else { + return Ok(args); + } + + while self.optional(Token::Plus) { + args.push(self.match_value("expected value in argument list")?); + } + + Ok(args) + } + + // Parse an optional value list enclosed in parentheses. + fn parse_opt_value_list(&mut self) -> ParseResult { + if !self.optional(Token::LPar) { + return Ok(VariableArgs::new()); + } + + let args = self.parse_value_list()?; + + self.match_token(Token::RPar, "expected ')' after arguments")?; + + Ok(args) + } + + // Parse the operands following the instruction opcode. + // This depends on the format of the opcode. + fn parse_inst_operands( + &mut self, + ctx: &mut Context, + opcode: Opcode, + explicit_control_type: Option, + ) -> ParseResult { + let idata = match opcode.format() { + InstructionFormat::Unary => InstructionData::Unary { + opcode, + arg: self.match_value("expected SSA value operand")?, + }, + InstructionFormat::UnaryImm => InstructionData::UnaryImm { + opcode, + imm: self.match_imm64("expected immediate integer operand")?, + }, + InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 { + opcode, + imm: self.match_ieee32("expected immediate 32-bit float operand")?, + }, + InstructionFormat::UnaryIeee64 => InstructionData::UnaryIeee64 { + opcode, + imm: self.match_ieee64("expected immediate 64-bit float operand")?, + }, + InstructionFormat::UnaryBool => InstructionData::UnaryBool { + opcode, + imm: self.match_bool("expected immediate boolean operand")?, + }, + InstructionFormat::UnaryGlobalValue => { + let gv = self.match_gv("expected global value")?; + ctx.check_gv(gv, self.loc)?; + InstructionData::UnaryGlobalValue { + opcode, + global_value: gv, + } + } + InstructionFormat::Binary => { + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_value("expected SSA value second operand")?; + InstructionData::Binary { + opcode, + args: [lhs, rhs], + } + } + InstructionFormat::BinaryImm => { + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_imm64("expected immediate integer second operand")?; + InstructionData::BinaryImm { + opcode, + arg: lhs, + imm: rhs, + } + } + InstructionFormat::Ternary => { + // Names here refer to the `select` instruction. + // This format is also use by `fma`. + let ctrl_arg = self.match_value("expected SSA value control operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let true_arg = self.match_value("expected SSA value true operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let false_arg = self.match_value("expected SSA value false operand")?; + InstructionData::Ternary { + opcode, + args: [ctrl_arg, true_arg, false_arg], + } + } + InstructionFormat::MultiAry => { + let args = self.parse_value_list()?; + InstructionData::MultiAry { + opcode, + args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::NullAry => InstructionData::NullAry { opcode }, + InstructionFormat::Jump => { + // Parse the destination block number. + let block_num = self.match_block("expected jump destination block")?; + let args = self.parse_opt_value_list()?; + InstructionData::Jump { + opcode, + destination: block_num, + args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::Branch => { + let ctrl_arg = self.match_value("expected SSA value control operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let block_num = self.match_block("expected branch destination block")?; + let args = self.parse_opt_value_list()?; + InstructionData::Branch { + opcode, + destination: block_num, + args: args.into_value_list(&[ctrl_arg], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::BranchInt => { + let cond = self.match_enum("expected intcc condition code")?; + let arg = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let block_num = self.match_block("expected branch destination block")?; + let args = self.parse_opt_value_list()?; + InstructionData::BranchInt { + opcode, + cond, + destination: block_num, + args: args.into_value_list(&[arg], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::BranchFloat => { + let cond = self.match_enum("expected floatcc condition code")?; + let arg = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let block_num = self.match_block("expected branch destination block")?; + let args = self.parse_opt_value_list()?; + InstructionData::BranchFloat { + opcode, + cond, + destination: block_num, + args: args.into_value_list(&[arg], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::BranchIcmp => { + let cond = self.match_enum("expected intcc condition code")?; + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_value("expected SSA value second operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let block_num = self.match_block("expected branch destination block")?; + let args = self.parse_opt_value_list()?; + InstructionData::BranchIcmp { + opcode, + cond, + destination: block_num, + args: args.into_value_list(&[lhs, rhs], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::BranchTable => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let block_num = self.match_block("expected branch destination block")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let table = self.match_jt()?; + ctx.check_jt(table, self.loc)?; + InstructionData::BranchTable { + opcode, + arg, + destination: block_num, + table, + } + } + InstructionFormat::BranchTableBase => { + let table = self.match_jt()?; + ctx.check_jt(table, self.loc)?; + InstructionData::BranchTableBase { opcode, table } + } + InstructionFormat::BranchTableEntry => { + let index = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let base = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let imm = self.match_uimm8("expected width")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let table = self.match_jt()?; + ctx.check_jt(table, self.loc)?; + InstructionData::BranchTableEntry { + opcode, + args: [index, base], + imm, + table, + } + } + InstructionFormat::IndirectJump => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let table = self.match_jt()?; + ctx.check_jt(table, self.loc)?; + InstructionData::IndirectJump { opcode, arg, table } + } + InstructionFormat::InsertLane => { + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let lane = self.match_uimm8("expected lane number")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_value("expected SSA value last operand")?; + InstructionData::InsertLane { + opcode, + lane, + args: [lhs, rhs], + } + } + InstructionFormat::ExtractLane => { + let arg = self.match_value("expected SSA value last operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let lane = self.match_uimm8("expected lane number")?; + InstructionData::ExtractLane { opcode, lane, arg } + } + InstructionFormat::UnaryConst => match explicit_control_type { + None => { + return err!( + self.loc, + "Expected {:?} to have a controlling type variable, e.g. inst.i32x4", + opcode + ) + } + Some(controlling_type) => { + let uimm128 = self.match_constant_data(controlling_type)?; + let constant_handle = ctx.function.dfg.constants.insert(uimm128); + InstructionData::UnaryConst { + opcode, + constant_handle, + } + } + }, + InstructionFormat::Shuffle => { + let a = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let b = self.match_value("expected SSA value second operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let uimm128 = self.match_constant_data(I8X16)?; + let mask = ctx.function.dfg.immediates.push(uimm128); + InstructionData::Shuffle { + opcode, + mask, + args: [a, b], + } + } + InstructionFormat::IntCompare => { + let cond = self.match_enum("expected intcc condition code")?; + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_value("expected SSA value second operand")?; + InstructionData::IntCompare { + opcode, + cond, + args: [lhs, rhs], + } + } + InstructionFormat::IntCompareImm => { + let cond = self.match_enum("expected intcc condition code")?; + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_imm64("expected immediate second operand")?; + InstructionData::IntCompareImm { + opcode, + cond, + arg: lhs, + imm: rhs, + } + } + InstructionFormat::IntCond => { + let cond = self.match_enum("expected intcc condition code")?; + let arg = self.match_value("expected SSA value")?; + InstructionData::IntCond { opcode, cond, arg } + } + InstructionFormat::FloatCompare => { + let cond = self.match_enum("expected floatcc condition code")?; + let lhs = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let rhs = self.match_value("expected SSA value second operand")?; + InstructionData::FloatCompare { + opcode, + cond, + args: [lhs, rhs], + } + } + InstructionFormat::FloatCond => { + let cond = self.match_enum("expected floatcc condition code")?; + let arg = self.match_value("expected SSA value")?; + InstructionData::FloatCond { opcode, cond, arg } + } + InstructionFormat::IntSelect => { + let cond = self.match_enum("expected intcc condition code")?; + let guard = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let v_true = self.match_value("expected SSA value second operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let v_false = self.match_value("expected SSA value third operand")?; + InstructionData::IntSelect { + opcode, + cond, + args: [guard, v_true, v_false], + } + } + InstructionFormat::Call => { + let func_ref = self.match_fn("expected function reference")?; + ctx.check_fn(func_ref, self.loc)?; + self.match_token(Token::LPar, "expected '(' before arguments")?; + let args = self.parse_value_list()?; + self.match_token(Token::RPar, "expected ')' after arguments")?; + InstructionData::Call { + opcode, + func_ref, + args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::CallIndirect => { + let sig_ref = self.match_sig("expected signature reference")?; + ctx.check_sig(sig_ref, self.loc)?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let callee = self.match_value("expected SSA value callee operand")?; + self.match_token(Token::LPar, "expected '(' before arguments")?; + let args = self.parse_value_list()?; + self.match_token(Token::RPar, "expected ')' after arguments")?; + InstructionData::CallIndirect { + opcode, + sig_ref, + args: args.into_value_list(&[callee], &mut ctx.function.dfg.value_lists), + } + } + InstructionFormat::FuncAddr => { + let func_ref = self.match_fn("expected function reference")?; + ctx.check_fn(func_ref, self.loc)?; + InstructionData::FuncAddr { opcode, func_ref } + } + InstructionFormat::StackLoad => { + let ss = self.match_ss("expected stack slot number: ss«n»")?; + ctx.check_ss(ss, self.loc)?; + let offset = self.optional_offset32()?; + InstructionData::StackLoad { + opcode, + stack_slot: ss, + offset, + } + } + InstructionFormat::StackStore => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let ss = self.match_ss("expected stack slot number: ss«n»")?; + ctx.check_ss(ss, self.loc)?; + let offset = self.optional_offset32()?; + InstructionData::StackStore { + opcode, + arg, + stack_slot: ss, + offset, + } + } + InstructionFormat::HeapAddr => { + let heap = self.match_heap("expected heap identifier")?; + ctx.check_heap(heap, self.loc)?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let arg = self.match_value("expected SSA value heap address")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let imm = self.match_uimm32("expected 32-bit integer size")?; + InstructionData::HeapAddr { + opcode, + heap, + arg, + imm, + } + } + InstructionFormat::TableAddr => { + let table = self.match_table("expected table identifier")?; + ctx.check_table(table, self.loc)?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let arg = self.match_value("expected SSA value table address")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let offset = self.optional_offset32()?; + InstructionData::TableAddr { + opcode, + table, + arg, + offset, + } + } + InstructionFormat::Load => { + let flags = self.optional_memflags(); + let addr = self.match_value("expected SSA value address")?; + let offset = self.optional_offset32()?; + InstructionData::Load { + opcode, + flags, + arg: addr, + offset, + } + } + InstructionFormat::LoadComplex => { + let flags = self.optional_memflags(); + let args = self.parse_value_sequence()?; + let offset = self.optional_offset32()?; + InstructionData::LoadComplex { + opcode, + flags, + args: args.into_value_list(&[], &mut ctx.function.dfg.value_lists), + offset, + } + } + InstructionFormat::Store => { + let flags = self.optional_memflags(); + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let addr = self.match_value("expected SSA value address")?; + let offset = self.optional_offset32()?; + InstructionData::Store { + opcode, + flags, + args: [arg, addr], + offset, + } + } + + InstructionFormat::StoreComplex => { + let flags = self.optional_memflags(); + let src = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let args = self.parse_value_sequence()?; + let offset = self.optional_offset32()?; + InstructionData::StoreComplex { + opcode, + flags, + args: args.into_value_list(&[src], &mut ctx.function.dfg.value_lists), + offset, + } + } + InstructionFormat::RegMove => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let src = self.match_regunit(ctx.unique_isa)?; + self.match_token(Token::Arrow, "expected '->' between register units")?; + let dst = self.match_regunit(ctx.unique_isa)?; + InstructionData::RegMove { + opcode, + arg, + src, + dst, + } + } + InstructionFormat::CopySpecial => { + let src = self.match_regunit(ctx.unique_isa)?; + self.match_token(Token::Arrow, "expected '->' between register units")?; + let dst = self.match_regunit(ctx.unique_isa)?; + InstructionData::CopySpecial { opcode, src, dst } + } + InstructionFormat::CopyToSsa => InstructionData::CopyToSsa { + opcode, + src: self.match_regunit(ctx.unique_isa)?, + }, + InstructionFormat::RegSpill => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let src = self.match_regunit(ctx.unique_isa)?; + self.match_token(Token::Arrow, "expected '->' before destination stack slot")?; + let dst = self.match_ss("expected stack slot number: ss«n»")?; + ctx.check_ss(dst, self.loc)?; + InstructionData::RegSpill { + opcode, + arg, + src, + dst, + } + } + InstructionFormat::RegFill => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let src = self.match_ss("expected stack slot number: ss«n»")?; + ctx.check_ss(src, self.loc)?; + self.match_token( + Token::Arrow, + "expected '->' before destination register units", + )?; + let dst = self.match_regunit(ctx.unique_isa)?; + InstructionData::RegFill { + opcode, + arg, + src, + dst, + } + } + InstructionFormat::Trap => { + let code = self.match_enum("expected trap code")?; + InstructionData::Trap { opcode, code } + } + InstructionFormat::CondTrap => { + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let code = self.match_enum("expected trap code")?; + InstructionData::CondTrap { opcode, arg, code } + } + InstructionFormat::IntCondTrap => { + let cond = self.match_enum("expected intcc condition code")?; + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let code = self.match_enum("expected trap code")?; + InstructionData::IntCondTrap { + opcode, + cond, + arg, + code, + } + } + InstructionFormat::FloatCondTrap => { + let cond = self.match_enum("expected floatcc condition code")?; + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let code = self.match_enum("expected trap code")?; + InstructionData::FloatCondTrap { + opcode, + cond, + arg, + code, + } + } + }; + Ok(idata) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::ParseError; + use crate::isaspec::IsaSpec; + use crate::testfile::{Comment, Details}; + use cranelift_codegen::ir::entities::AnyEntity; + use cranelift_codegen::ir::types; + use cranelift_codegen::ir::StackSlotKind; + use cranelift_codegen::ir::{ArgumentExtension, ArgumentPurpose}; + use cranelift_codegen::isa::CallConv; + + #[test] + fn argument_type() { + let mut p = Parser::new("i32 sext"); + let arg = p.parse_abi_param(None).unwrap(); + assert_eq!(arg.value_type, types::I32); + assert_eq!(arg.extension, ArgumentExtension::Sext); + assert_eq!(arg.purpose, ArgumentPurpose::Normal); + let ParseError { + location, + message, + is_warning, + } = p.parse_abi_param(None).unwrap_err(); + assert_eq!(location.line_number, 1); + assert_eq!(message, "expected parameter type"); + assert!(!is_warning); + } + + #[test] + fn aliases() { + let (func, details) = Parser::new( + "function %qux() system_v { + block0: + v4 = iconst.i8 6 + v3 -> v4 + v1 = iadd_imm v3, 17 + }", + ) + .parse_function(None) + .unwrap(); + assert_eq!(func.name.to_string(), "%qux"); + let v4 = details.map.lookup_str("v4").unwrap(); + assert_eq!(v4.to_string(), "v4"); + let v3 = details.map.lookup_str("v3").unwrap(); + assert_eq!(v3.to_string(), "v3"); + match v3 { + AnyEntity::Value(v3) => { + let aliased_to = func.dfg.resolve_aliases(v3); + assert_eq!(aliased_to.to_string(), "v4"); + } + _ => panic!("expected value: {}", v3), + } + } + + #[test] + fn signature() { + let sig = Parser::new("()system_v").parse_signature(None).unwrap(); + assert_eq!(sig.params.len(), 0); + assert_eq!(sig.returns.len(), 0); + assert_eq!(sig.call_conv, CallConv::SystemV); + + let sig2 = Parser::new("(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash_system_v") + .parse_signature(None) + .unwrap(); + assert_eq!( + sig2.to_string(), + "(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash_system_v" + ); + assert_eq!(sig2.call_conv, CallConv::BaldrdashSystemV); + + // Old-style signature without a calling convention. + assert_eq!( + Parser::new("()").parse_signature(None).unwrap().to_string(), + "() fast" + ); + assert_eq!( + Parser::new("() notacc") + .parse_signature(None) + .unwrap_err() + .to_string(), + "1: unknown calling convention: notacc" + ); + + // `void` is not recognized as a type by the lexer. It should not appear in files. + assert_eq!( + Parser::new("() -> void") + .parse_signature(None) + .unwrap_err() + .to_string(), + "1: expected parameter type" + ); + assert_eq!( + Parser::new("i8 -> i8") + .parse_signature(None) + .unwrap_err() + .to_string(), + "1: expected function signature: ( args... )" + ); + assert_eq!( + Parser::new("(i8 -> i8") + .parse_signature(None) + .unwrap_err() + .to_string(), + "1: expected ')' after function arguments" + ); + } + + #[test] + fn stack_slot_decl() { + let (func, _) = Parser::new( + "function %foo() system_v { + ss3 = incoming_arg 13 + ss1 = spill_slot 1 + }", + ) + .parse_function(None) + .unwrap(); + assert_eq!(func.name.to_string(), "%foo"); + let mut iter = func.stack_slots.keys(); + let _ss0 = iter.next().unwrap(); + let ss1 = iter.next().unwrap(); + assert_eq!(ss1.to_string(), "ss1"); + assert_eq!(func.stack_slots[ss1].kind, StackSlotKind::SpillSlot); + assert_eq!(func.stack_slots[ss1].size, 1); + let _ss2 = iter.next().unwrap(); + let ss3 = iter.next().unwrap(); + assert_eq!(ss3.to_string(), "ss3"); + assert_eq!(func.stack_slots[ss3].kind, StackSlotKind::IncomingArg); + assert_eq!(func.stack_slots[ss3].size, 13); + assert_eq!(iter.next(), None); + + // Catch duplicate definitions. + assert_eq!( + Parser::new( + "function %bar() system_v { + ss1 = spill_slot 13 + ss1 = spill_slot 1 + }", + ) + .parse_function(None) + .unwrap_err() + .to_string(), + "3: duplicate entity: ss1" + ); + } + + #[test] + fn block_header() { + let (func, _) = Parser::new( + "function %blocks() system_v { + block0: + block4(v3: i32): + }", + ) + .parse_function(None) + .unwrap(); + assert_eq!(func.name.to_string(), "%blocks"); + + let mut blocks = func.layout.blocks(); + + let block0 = blocks.next().unwrap(); + assert_eq!(func.dfg.block_params(block0), &[]); + + let block4 = blocks.next().unwrap(); + let block4_args = func.dfg.block_params(block4); + assert_eq!(block4_args.len(), 1); + assert_eq!(func.dfg.value_type(block4_args[0]), types::I32); + } + + #[test] + fn duplicate_block() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + block0: + block0: + return 2", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 3); + assert_eq!(message, "duplicate entity: block0"); + assert!(!is_warning); + } + + #[test] + fn number_of_blocks() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %a() { + block100000:", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 2); + assert_eq!(message, "too many blocks"); + assert!(!is_warning); + } + + #[test] + fn duplicate_jt() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + jt0 = jump_table [] + jt0 = jump_table []", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 3); + assert_eq!(message, "duplicate entity: jt0"); + assert!(!is_warning); + } + + #[test] + fn duplicate_ss() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + ss0 = explicit_slot 8 + ss0 = explicit_slot 8", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 3); + assert_eq!(message, "duplicate entity: ss0"); + assert!(!is_warning); + } + + #[test] + fn duplicate_gv() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + gv0 = vmctx + gv0 = vmctx", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 3); + assert_eq!(message, "duplicate entity: gv0"); + assert!(!is_warning); + } + + #[test] + fn duplicate_heap() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + heap0 = static gv0, min 0x1000, bound 0x10_0000, offset_guard 0x1000 + heap0 = static gv0, min 0x1000, bound 0x10_0000, offset_guard 0x1000", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 3); + assert_eq!(message, "duplicate entity: heap0"); + assert!(!is_warning); + } + + #[test] + fn duplicate_sig() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + sig0 = () + sig0 = ()", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 3); + assert_eq!(message, "duplicate entity: sig0"); + assert!(!is_warning); + } + + #[test] + fn duplicate_fn() { + let ParseError { + location, + message, + is_warning, + } = Parser::new( + "function %blocks() system_v { + sig0 = () + fn0 = %foo sig0 + fn0 = %foo sig0", + ) + .parse_function(None) + .unwrap_err(); + + assert_eq!(location.line_number, 4); + assert_eq!(message, "duplicate entity: fn0"); + assert!(!is_warning); + } + + #[test] + fn comments() { + let (func, Details { comments, .. }) = Parser::new( + "; before + function %comment() system_v { ; decl + ss10 = outgoing_arg 13 ; stackslot. + ; Still stackslot. + jt10 = jump_table [block0] + ; Jumptable + block0: ; Basic block + trap user42; Instruction + } ; Trailing. + ; More trailing.", + ) + .parse_function(None) + .unwrap(); + assert_eq!(func.name.to_string(), "%comment"); + assert_eq!(comments.len(), 8); // no 'before' comment. + assert_eq!( + comments[0], + Comment { + entity: AnyEntity::Function, + text: "; decl", + } + ); + assert_eq!(comments[1].entity.to_string(), "ss10"); + assert_eq!(comments[2].entity.to_string(), "ss10"); + assert_eq!(comments[2].text, "; Still stackslot."); + assert_eq!(comments[3].entity.to_string(), "jt10"); + assert_eq!(comments[3].text, "; Jumptable"); + assert_eq!(comments[4].entity.to_string(), "block0"); + assert_eq!(comments[4].text, "; Basic block"); + + assert_eq!(comments[5].entity.to_string(), "inst0"); + assert_eq!(comments[5].text, "; Instruction"); + + assert_eq!(comments[6].entity, AnyEntity::Function); + assert_eq!(comments[7].entity, AnyEntity::Function); + } + + #[test] + fn test_file() { + let tf = parse_test( + r#"; before + test cfg option=5 + test verify + set enable_float=false + feature "foo" + feature !"bar" + ; still preamble + function %comment() system_v {}"#, + ParseOptions::default(), + ) + .unwrap(); + assert_eq!(tf.commands.len(), 2); + assert_eq!(tf.commands[0].command, "cfg"); + assert_eq!(tf.commands[1].command, "verify"); + match tf.isa_spec { + IsaSpec::None(s) => { + assert!(s.enable_verifier()); + assert!(!s.enable_float()); + } + _ => panic!("unexpected ISAs"), + } + assert_eq!(tf.features[0], Feature::With(&"foo")); + assert_eq!(tf.features[1], Feature::Without(&"bar")); + assert_eq!(tf.preamble_comments.len(), 2); + assert_eq!(tf.preamble_comments[0].text, "; before"); + assert_eq!(tf.preamble_comments[1].text, "; still preamble"); + assert_eq!(tf.functions.len(), 1); + assert_eq!(tf.functions[0].0.name.to_string(), "%comment"); + } + + #[test] + #[cfg(feature = "riscv")] + fn isa_spec() { + assert!(parse_test( + "target + function %foo() system_v {}", + ParseOptions::default() + ) + .is_err()); + + assert!(parse_test( + "target riscv32 + set enable_float=false + function %foo() system_v {}", + ParseOptions::default() + ) + .is_err()); + + match parse_test( + "set enable_float=false + isa riscv + function %foo() system_v {}", + ParseOptions::default(), + ) + .unwrap() + .isa_spec + { + IsaSpec::None(_) => panic!("Expected some ISA"), + IsaSpec::Some(v) => { + assert_eq!(v.len(), 1); + assert_eq!(v[0].name(), "riscv"); + } + } + } + + #[test] + fn user_function_name() { + // Valid characters in the name: + let func = Parser::new( + "function u1:2() system_v { + block0: + trap int_divz + }", + ) + .parse_function(None) + .unwrap() + .0; + assert_eq!(func.name.to_string(), "u1:2"); + + // Invalid characters in the name: + let mut parser = Parser::new( + "function u123:abc() system_v { + block0: + trap stk_ovf + }", + ); + assert!(parser.parse_function(None).is_err()); + + // Incomplete function names should not be valid: + let mut parser = Parser::new( + "function u() system_v { + block0: + trap int_ovf + }", + ); + assert!(parser.parse_function(None).is_err()); + + let mut parser = Parser::new( + "function u0() system_v { + block0: + trap int_ovf + }", + ); + assert!(parser.parse_function(None).is_err()); + + let mut parser = Parser::new( + "function u0:() system_v { + block0: + trap int_ovf + }", + ); + assert!(parser.parse_function(None).is_err()); + } + + #[test] + fn change_default_calling_convention() { + let code = "function %test() { + block0: + return + }"; + + // By default the parser will use the fast calling convention if none is specified. + let mut parser = Parser::new(code); + assert_eq!( + parser.parse_function(None).unwrap().0.signature.call_conv, + CallConv::Fast + ); + + // However, we can specify a different calling convention to be the default. + let mut parser = Parser::new(code).with_default_calling_convention(CallConv::Cold); + assert_eq!( + parser.parse_function(None).unwrap().0.signature.call_conv, + CallConv::Cold + ); + } + + #[test] + fn u8_as_hex() { + fn parse_as_uimm8(text: &str) -> ParseResult { + Parser::new(text).match_uimm8("unable to parse u8") + } + + assert_eq!(parse_as_uimm8("0").unwrap(), 0); + assert_eq!(parse_as_uimm8("0xff").unwrap(), 255); + assert!(parse_as_uimm8("-1").is_err()); + assert!(parse_as_uimm8("0xffa").is_err()); + } + + #[test] + fn uimm128() { + macro_rules! parse_as_constant_data { + ($text:expr, $type:expr) => {{ + Parser::new($text).parse_literals_to_constant_data($type) + }}; + } + macro_rules! can_parse_as_constant_data { + ($text:expr, $type:expr) => {{ + assert!(parse_as_constant_data!($text, $type).is_ok()) + }}; + } + macro_rules! cannot_parse_as_constant_data { + ($text:expr, $type:expr) => {{ + assert!(parse_as_constant_data!($text, $type).is_err()) + }}; + } + + can_parse_as_constant_data!("1 2 3 4", I32X4); + can_parse_as_constant_data!("1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", I8X16); + can_parse_as_constant_data!("0x1.1 0x2.2 0x3.3 0x4.4", F32X4); + can_parse_as_constant_data!("true false true false true false true false", B16X8); + can_parse_as_constant_data!("0 -1", I64X2); + can_parse_as_constant_data!("true false", B64X2); + can_parse_as_constant_data!("true true true true true", B32X4); // note that parse_literals_to_constant_data will leave extra tokens unconsumed + + cannot_parse_as_constant_data!("0x0 0x1 0x2 0x3", I32X4); + cannot_parse_as_constant_data!("1 2 3", I32X4); + cannot_parse_as_constant_data!(" ", F32X4); + } + + #[test] + fn parse_constant_from_booleans() { + let c = Parser::new("true false true false") + .parse_literals_to_constant_data(B32X4) + .unwrap(); + assert_eq!( + c.into_vec(), + [1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] + ) + } +} diff --git a/cranelift/reader/src/sourcemap.rs b/cranelift/reader/src/sourcemap.rs new file mode 100644 index 0000000000..126fd219c3 --- /dev/null +++ b/cranelift/reader/src/sourcemap.rs @@ -0,0 +1,239 @@ +//! Source map associating entities with their source locations. +//! +//! When the parser reads in a source file, it records the locations of the +//! definitions of entities like instructions, blocks, and values. +//! +//! The `SourceMap` struct defined in this module makes this mapping available +//! to parser clients. + +use crate::error::{Location, ParseResult}; +use crate::lexer::split_entity_name; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::{ + Block, FuncRef, GlobalValue, Heap, JumpTable, SigRef, StackSlot, Table, Value, +}; +use std::collections::HashMap; + +/// Mapping from entity names to source locations. +#[derive(Debug, Default)] +pub struct SourceMap { + // Store locations for entities, including instructions. + locations: HashMap, +} + +/// Read-only interface which is exposed outside the parser crate. +impl SourceMap { + /// Look up a value entity. + pub fn contains_value(&self, v: Value) -> bool { + self.locations.contains_key(&v.into()) + } + + /// Look up a block entity. + pub fn contains_block(&self, block: Block) -> bool { + self.locations.contains_key(&block.into()) + } + + /// Look up a stack slot entity. + pub fn contains_ss(&self, ss: StackSlot) -> bool { + self.locations.contains_key(&ss.into()) + } + + /// Look up a global value entity. + pub fn contains_gv(&self, gv: GlobalValue) -> bool { + self.locations.contains_key(&gv.into()) + } + + /// Look up a heap entity. + pub fn contains_heap(&self, heap: Heap) -> bool { + self.locations.contains_key(&heap.into()) + } + + /// Look up a table entity. + pub fn contains_table(&self, table: Table) -> bool { + self.locations.contains_key(&table.into()) + } + + /// Look up a signature entity. + pub fn contains_sig(&self, sig: SigRef) -> bool { + self.locations.contains_key(&sig.into()) + } + + /// Look up a function entity. + pub fn contains_fn(&self, fn_: FuncRef) -> bool { + self.locations.contains_key(&fn_.into()) + } + + /// Look up a jump table entity. + pub fn contains_jt(&self, jt: JumpTable) -> bool { + self.locations.contains_key(&jt.into()) + } + + /// Look up an entity by source name. + /// Returns the entity reference corresponding to `name`, if it exists. + pub fn lookup_str(&self, name: &str) -> Option { + split_entity_name(name).and_then(|(ent, num)| match ent { + "v" => Value::with_number(num).and_then(|v| { + if !self.contains_value(v) { + None + } else { + Some(v.into()) + } + }), + "block" => Block::with_number(num).and_then(|block| { + if !self.contains_block(block) { + None + } else { + Some(block.into()) + } + }), + "ss" => StackSlot::with_number(num).and_then(|ss| { + if !self.contains_ss(ss) { + None + } else { + Some(ss.into()) + } + }), + "gv" => GlobalValue::with_number(num).and_then(|gv| { + if !self.contains_gv(gv) { + None + } else { + Some(gv.into()) + } + }), + "heap" => Heap::with_number(num).and_then(|heap| { + if !self.contains_heap(heap) { + None + } else { + Some(heap.into()) + } + }), + "table" => Table::with_number(num).and_then(|table| { + if !self.contains_table(table) { + None + } else { + Some(table.into()) + } + }), + "sig" => SigRef::with_number(num).and_then(|sig| { + if !self.contains_sig(sig) { + None + } else { + Some(sig.into()) + } + }), + "fn" => FuncRef::with_number(num).and_then(|fn_| { + if !self.contains_fn(fn_) { + None + } else { + Some(fn_.into()) + } + }), + "jt" => JumpTable::with_number(num).and_then(|jt| { + if !self.contains_jt(jt) { + None + } else { + Some(jt.into()) + } + }), + _ => None, + }) + } + + /// Get the source location where an entity was defined. + pub fn location(&self, entity: AnyEntity) -> Option { + self.locations.get(&entity).cloned() + } +} + +impl SourceMap { + /// Create a new empty `SourceMap`. + pub fn new() -> Self { + Self { + locations: HashMap::new(), + } + } + + /// Define the value `entity`. + pub fn def_value(&mut self, entity: Value, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the block `entity`. + pub fn def_block(&mut self, entity: Block, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the stack slot `entity`. + pub fn def_ss(&mut self, entity: StackSlot, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the global value `entity`. + pub fn def_gv(&mut self, entity: GlobalValue, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the heap `entity`. + pub fn def_heap(&mut self, entity: Heap, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the table `entity`. + pub fn def_table(&mut self, entity: Table, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the signature `entity`. + pub fn def_sig(&mut self, entity: SigRef, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the external function `entity`. + pub fn def_fn(&mut self, entity: FuncRef, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define the jump table `entity`. + pub fn def_jt(&mut self, entity: JumpTable, loc: Location) -> ParseResult<()> { + self.def_entity(entity.into(), loc) + } + + /// Define an entity. This can be used for instructions whose numbers never + /// appear in source, or implicitly defined signatures. + pub fn def_entity(&mut self, entity: AnyEntity, loc: Location) -> ParseResult<()> { + if self.locations.insert(entity, loc).is_some() { + err!(loc, "duplicate entity: {}", entity) + } else { + Ok(()) + } + } +} + +#[cfg(test)] +mod tests { + use crate::{parse_test, ParseOptions}; + + #[test] + fn details() { + let tf = parse_test( + "function %detail() { + ss10 = incoming_arg 13 + jt10 = jump_table [block0] + block0(v4: i32, v7: i32): + v10 = iadd v4, v7 + }", + ParseOptions::default(), + ) + .unwrap(); + let map = &tf.functions[0].1.map; + + assert_eq!(map.lookup_str("v0"), None); + assert_eq!(map.lookup_str("ss1"), None); + assert_eq!(map.lookup_str("ss10").unwrap().to_string(), "ss10"); + assert_eq!(map.lookup_str("jt10").unwrap().to_string(), "jt10"); + assert_eq!(map.lookup_str("block0").unwrap().to_string(), "block0"); + assert_eq!(map.lookup_str("v4").unwrap().to_string(), "v4"); + assert_eq!(map.lookup_str("v7").unwrap().to_string(), "v7"); + assert_eq!(map.lookup_str("v10").unwrap().to_string(), "v10"); + } +} diff --git a/cranelift/reader/src/testcommand.rs b/cranelift/reader/src/testcommand.rs new file mode 100644 index 0000000000..17896a070c --- /dev/null +++ b/cranelift/reader/src/testcommand.rs @@ -0,0 +1,103 @@ +//! Test commands. +//! +//! A `.clif` file can begin with one or more *test commands* which specify what is to be tested. +//! The general syntax is: +//! +//!
+//! test <command> [options]...
+//! 
+//! +//! The options are either a single identifier flag, or setting values like `identifier=value`. +//! +//! The parser does not understand the test commands or which options are valid. It simply parses +//! the general format into a `TestCommand` data structure. + +use std::fmt::{self, Display, Formatter}; + +/// A command appearing in a test file. +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct TestCommand<'a> { + /// The command name as a string. + pub command: &'a str, + /// The options following the command name. + pub options: Vec>, +} + +/// An option on a test command. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum TestOption<'a> { + /// Single identifier flag: `foo`. + Flag(&'a str), + /// A value assigned to an identifier: `foo=bar`. + Value(&'a str, &'a str), +} + +impl<'a> TestCommand<'a> { + /// Create a new TestCommand by parsing `s`. + /// The returned command contains references into `s`. + pub fn new(s: &'a str) -> Self { + let mut parts = s.split_whitespace(); + let cmd = parts.next().unwrap_or(""); + Self { + command: cmd, + options: parts + .filter(|s| !s.is_empty()) + .map(TestOption::new) + .collect(), + } + } +} + +impl<'a> Display for TestCommand<'a> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}", self.command)?; + for opt in &self.options { + write!(f, " {}", opt)?; + } + writeln!(f) + } +} + +impl<'a> TestOption<'a> { + /// Create a new TestOption by parsing `s`. + /// The returned option contains references into `s`. + pub fn new(s: &'a str) -> Self { + match s.find('=') { + None => TestOption::Flag(s), + Some(p) => TestOption::Value(&s[0..p], &s[p + 1..]), + } + } +} + +impl<'a> Display for TestOption<'a> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match *self { + TestOption::Flag(s) => write!(f, "{}", s), + TestOption::Value(s, v) => write!(f, "{}={}", s, v), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_option() { + assert_eq!(TestOption::new(""), TestOption::Flag("")); + assert_eq!(TestOption::new("foo"), TestOption::Flag("foo")); + assert_eq!(TestOption::new("foo=bar"), TestOption::Value("foo", "bar")); + } + + #[test] + fn parse_command() { + assert_eq!(&TestCommand::new("").to_string(), "\n"); + assert_eq!(&TestCommand::new("cat").to_string(), "cat\n"); + assert_eq!(&TestCommand::new("cat ").to_string(), "cat\n"); + assert_eq!(&TestCommand::new("cat 1 ").to_string(), "cat 1\n"); + assert_eq!( + &TestCommand::new("cat one=4 two t").to_string(), + "cat one=4 two t\n" + ); + } +} diff --git a/cranelift/reader/src/testfile.rs b/cranelift/reader/src/testfile.rs new file mode 100644 index 0000000000..4cfdd8f3e5 --- /dev/null +++ b/cranelift/reader/src/testfile.rs @@ -0,0 +1,73 @@ +//! Data structures representing a parsed test file. +//! +//! A test file is a `.clif` file which contains test commands and settings for running a +//! file-based test case. +//! + +use crate::error::Location; +use crate::isaspec::IsaSpec; +use crate::sourcemap::SourceMap; +use crate::testcommand::TestCommand; +use cranelift_codegen::ir::entities::AnyEntity; +use cranelift_codegen::ir::Function; + +/// A parsed test case. +/// +/// This is the result of parsing a `.clif` file which contains a number of test commands and ISA +/// specs followed by the functions that should be tested. +pub struct TestFile<'a> { + /// `test foo ...` lines. + pub commands: Vec>, + /// `isa bar ...` lines. + pub isa_spec: IsaSpec, + /// `feature ...` lines + pub features: Vec>, + /// Comments appearing before the first function. + /// These are all tagged as 'Function' scope for lack of a better entity. + pub preamble_comments: Vec>, + /// Parsed functions and additional details about each function. + pub functions: Vec<(Function, Details<'a>)>, +} + +/// Additional details about a function parsed from a text string. +/// These are useful for detecting test commands embedded in comments etc. +/// The details to not affect the semantics of the function. +#[derive(Debug)] +pub struct Details<'a> { + /// Location of the `function` keyword that begins this function. + pub location: Location, + /// Annotation comments that appeared inside or after the function. + pub comments: Vec>, + /// Mapping of entity numbers to source locations. + pub map: SourceMap, +} + +/// A comment in a parsed function. +/// +/// The comment belongs to the immediately preceding entity, whether that is an block header, and +/// instruction, or one of the preamble declarations. +/// +/// Comments appearing inside the function but before the preamble, as well as comments appearing +/// after the function are tagged as `AnyEntity::Function`. +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct Comment<'a> { + /// The entity this comment is attached to. + /// Comments always follow their entity. + pub entity: AnyEntity, + /// Text of the comment, including the leading `;`. + pub text: &'a str, +} + +/// A cranelift feature in a test file preamble. +/// +/// This represents the expectation of the test case. Before running any of the +/// functions of the test file, the feature set should be compared with the +/// feature set used to compile Cranelift. If there is any differences, then the +/// test file should be skipped. +#[derive(PartialEq, Eq, Debug)] +pub enum Feature<'a> { + /// `feature "..."` lines + With(&'a str), + /// `feature ! "..."` lines. + Without(&'a str), +} diff --git a/cranelift/rustc.md b/cranelift/rustc.md new file mode 100644 index 0000000000..a887ca5b80 --- /dev/null +++ b/cranelift/rustc.md @@ -0,0 +1,73 @@ +Cranelift in Rustc +================== + +One goal for Cranelift is to be usable as a backend suitable for +compiling Rust in debug mode. This mode doesn't require a lot of +mid-level optimization, and it does want very fast compile times, and +this matches up fairly well with what we expect Cranelift's initial +strengths and weaknesses will be. Cranelift is being designed to take +aggressive advantage of multiple cores, and to be very efficient with +its use of memory. + +Another goal is a "pretty good" backend. The idea here is to do the work +to get MIR-level inlining enabled, do some basic optimizations in +Cranelift to capture the low-hanging fruit, and then use that along with +good low-level optimizations to produce code which has a chance of being +decently fast, with quite fast compile times. It obviously wouldn't +compete with LLVM-based release builds in terms of optimization, but for +some users, completely unoptimized code is too slow to test with, so a +"pretty good" mode might be good enough. + +There's plenty of work to do to achieve these goals, and if we achieve +them, we'll have enabled a Rust compiler written entirely in Rust, and +enabled faster Rust compile times for important use cases. + +See [issues tagged "rustc"](https://github.com/bytecodealliance/cranelift/labels/goal%3Arustc) +for a list of some of the things that will be needed. + +With all that said, there is a potential goal beyond that, which is to +build a full optimizing release-capable backend. We can't predict how +far Cranelift will go yet, but we do have some crazy ideas about what +such a thing might look like, including: + + - Take advantage of Rust language properties in the optimizer. With + LLVM, Rust is able to use annotations to describe some of its + aliasing guarantees, however the annotations are awkward and + limited. An optimizer that can represent the core aliasing + relationships that Rust provides directly has the potential to be + very powerful without the need for complex alias analysis logic. + Unsafe blocks are an interesting challenge, however in many simple + cases, like `Vec`, it may be possible to recover what the optimizer + needs to know. + - Design for superoptimization. Traditionally, compiler development + teams have spent many years of manual effort to identify patterns of + code that can be matched and replaced. Superoptimizers have been + contributing some to this effort, but in the future, we may be able + to reverse roles. Superoptimizers will do the bulk of the work, and + humans will contribute specialized optimizations that + superoptimizers miss. This has the potential to take a new optimizer + from scratch to diminishing-returns territory with much less manual + effort. + - Build an optimizer IR without the constraints of fast-debug-build + compilation. Cranelift's base IR is focused on Codegen, so a + full-strength optimizer would either use an IR layer on top of it + (possibly using cranelift-entity's flexible `SecondaryMap`s), or + possibly an independent IR that could be translated to/from the base + IR. Either way, this overall architecture would keep the optimizer + out of the way of the non-optimizing build path, which keeps that + path fast and simple, and gives the optimizer more flexibility. If we + then want to base the IR on a powerful data structure like the + Value State Dependence Graph (VSDG), we can do so with fewer + compromises. + +And, these ideas build on each other. For example, one of the challenges +for dependence-graph-oriented IRs like the VSDG is getting good enough +memory dependence information. But if we can get high-quality aliasing +information directly from the Rust front-end, we should be in great +shape. As another example, it's often harder for superoptimizers to +reason about control flow than expression graphs. But, graph-oriented +IRs like the VSDG represent control flow as control dependencies. It's +difficult to say how powerful this combination will be until we try it, +but if nothing else, it should be very convenient to express +pattern-matching over a single graph that includes both data and control +dependencies. diff --git a/cranelift/serde/Cargo.toml b/cranelift/serde/Cargo.toml new file mode 100644 index 0000000000..6ab2ae0045 --- /dev/null +++ b/cranelift/serde/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "cranelift-serde" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "Serializer/Deserializer for Cranelift IR" +repository = "https://github.com/bytecodealliance/cranelift" +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +keywords = ["webassembly", "serde"] +edition = "2018" + +[[bin]] +name = "clif-json" +path = "src/clif-json.rs" + +[dependencies] +clap = "2.32.0" +serde = "1.0.8" +serde_derive = "1.0.75" +serde_json = "1.0.26" +cranelift-codegen = { path = "../codegen", version = "0.59.0" } +cranelift-reader = { path = "../reader", version = "0.59.0" } + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/serde/LICENSE b/cranelift/serde/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/serde/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/serde/README.md b/cranelift/serde/README.md new file mode 100644 index 0000000000..b6ce8c62fd --- /dev/null +++ b/cranelift/serde/README.md @@ -0,0 +1,32 @@ +This crate performs serialization of the [Cranelift](https://crates.io/crates/cranelift) IR. + +This crate is structured as an optional ability to serialize and deserialize cranelift IR into JSON +format. + +Status +------ + +Cranelift IR can be serialized into JSON. + +Deserialize is a work in progress, as it currently deserializes into the serializable data structure +that can be utilized by serde instead of the actual Cranelift IR data structure. + + +Building and Using Cranelift Serde +---------------------------------- + +clif-json usage: + + clif-json serialize [-p] + clif-json deserialize + +Where the -p flag outputs Cranelift IR as pretty JSON. + +For example to build and use clif-json: + +``` {.sourceCode .sh} +cd cranelift-serde +cargo build +clif-json serialize -p test.clif +``` + diff --git a/cranelift/serde/src/clif-json.rs b/cranelift/serde/src/clif-json.rs new file mode 100644 index 0000000000..34f97cc971 --- /dev/null +++ b/cranelift/serde/src/clif-json.rs @@ -0,0 +1,114 @@ +//! Utility for `cranelift_serde`. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +use clap::{App, Arg, SubCommand}; +use cranelift_reader::parse_functions; +use std::fs::File; +use std::io::prelude::*; +use std::io::{self, Write}; +use std::process; + +mod serde_clif_json; + +fn call_ser(file: &str, pretty: bool) -> Result<(), String> { + let ret_of_parse = parse_functions(file); + match ret_of_parse { + Ok(funcs) => { + let ser_funcs = serde_clif_json::SerObj::new(&funcs); + let ser_str = if pretty { + serde_json::to_string_pretty(&ser_funcs).unwrap() + } else { + serde_json::to_string(&ser_funcs).unwrap() + }; + println!("{}", ser_str); + Ok(()) + } + Err(_pe) => Err("There was a parsing error".to_string()), + } +} + +fn call_de(file: &File) -> Result<(), String> { + let de: serde_clif_json::SerObj = match serde_json::from_reader(file) { + Result::Ok(val) => val, + Result::Err(err) => panic!("{}", err), + }; + println!("{:?}", de); + Ok(()) +} + +fn main() { + let matches = App::new("Cranelift JSON serializer/deserializer utility") + .subcommand( + SubCommand::with_name("serialize") + .display_order(1) + .about("Serializes Cranelift IR into JSON.") + .arg(Arg::with_name("pretty").short("p").help("pretty json")) + .arg( + Arg::with_name("FILE") + .required(true) + .value_name("FILE") + .help("Input file for serialization"), + ), + ) + .subcommand( + SubCommand::with_name("deserialize") + .about("Deserializes Cranelift IR into JSON.") + .arg( + Arg::with_name("FILE") + .required(true) + .value_name("FILE") + .help("Input file for deserialization"), + ), + ) + .get_matches(); + + let res_serde = match matches.subcommand() { + ("serialize", Some(m)) => { + let mut file = + File::open(m.value_of("FILE").unwrap()).expect("Unable to open the file"); + let mut contents = String::new(); + file.read_to_string(&mut contents) + .expect("Unable to read the file"); + + match m.occurrences_of("pretty") { + 0 => call_ser(&contents, false), + _ => call_ser(&contents, true), + } + } + ("deserialize", Some(m)) => { + let file = File::open(m.value_of("FILE").unwrap()).expect("Unable to open the file"); + call_de(&file) + } + _ => Err("Invalid subcommand.".to_string()), + }; + + if let Err(mut msg) = res_serde { + if !msg.ends_with('\n') { + msg.push('\n'); + } + io::stdout().flush().expect("flushing stdout"); + io::stderr().write_all(msg.as_bytes()).unwrap(); + process::exit(1); + } +} diff --git a/cranelift/serde/src/serde_clif_json.rs b/cranelift/serde/src/serde_clif_json.rs new file mode 100644 index 0000000000..2d950cf3a8 --- /dev/null +++ b/cranelift/serde/src/serde_clif_json.rs @@ -0,0 +1,910 @@ +use cranelift_codegen::ir::{Block, Function, Inst, InstructionData, Signature}; +use serde_derive::{Deserialize, Serialize}; + +/// Serializable version of the original Cranelift IR +#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)] +pub enum SerInstData { + Unary { + opcode: String, + arg: String, + }, + UnaryImm { + opcode: String, + imm: String, + }, + UnaryIeee32 { + opcode: String, + imm: String, + }, + UnaryIeee64 { + opcode: String, + imm: String, + }, + UnaryBool { + opcode: String, + imm: bool, + }, + UnaryGlobalValue { + opcode: String, + global_value: String, + }, + Binary { + opcode: String, + args: [String; 2], + }, + BinaryImm { + opcode: String, + arg: String, + imm: String, + }, + Ternary { + opcode: String, + args: [String; 3], + }, + MultiAry { + opcode: String, + args: Vec, + }, + NullAry { + opcode: String, + }, + InsertLane { + opcode: String, + args: [String; 2], + lane: String, + }, + ExtractLane { + opcode: String, + arg: String, + lane: String, + }, + Shuffle { + opcode: String, + args: [String; 2], + mask: String, + }, + IntCompare { + opcode: String, + args: [String; 2], + cond: String, + }, + IntCompareImm { + opcode: String, + arg: String, + cond: String, + imm: String, + }, + IntCond { + opcode: String, + arg: String, + cond: String, + }, + FloatCompare { + opcode: String, + args: [String; 2], + cond: String, + }, + FloatCond { + opcode: String, + arg: String, + cond: String, + }, + IntSelect { + opcode: String, + args: [String; 3], + cond: String, + }, + Jump { + opcode: String, + args: Vec, + destination: String, + }, + Branch { + opcode: String, + args: Vec, + destination: String, + }, + BranchInt { + opcode: String, + args: Vec, + cond: String, + destination: String, + }, + BranchFloat { + opcode: String, + args: Vec, + cond: String, + destination: String, + }, + BranchIcmp { + opcode: String, + args: Vec, + cond: String, + destination: String, + }, + BranchTable { + opcode: String, + arg: String, + destination: String, + table: String, + }, + BranchTableEntry { + opcode: String, + args: [String; 2], + imm: String, + table: String, + }, + BranchTableBase { + opcode: String, + table: String, + }, + IndirectJump { + opcode: String, + arg: String, + table: String, + }, + Call { + opcode: String, + args: Vec, + func_ref: String, + }, + CallIndirect { + opcode: String, + args: Vec, + sig_ref: String, + }, + FuncAddr { + opcode: String, + func_ref: String, + }, + Load { + opcode: String, + arg: String, + flags: String, + offset: String, + }, + LoadComplex { + opcode: String, + args: Vec, + flags: String, + offset: String, + }, + Store { + opcode: String, + args: [String; 2], + flags: String, + offset: String, + }, + StoreComplex { + opcode: String, + args: Vec, + flags: String, + offset: String, + }, + StackLoad { + opcode: String, + stack_slot: String, + offset: String, + }, + StackStore { + opcode: String, + arg: String, + stack_slot: String, + offset: String, + }, + HeapAddr { + opcode: String, + arg: String, + heap: String, + imm: String, + }, + TableAddr { + opcode: String, + arg: String, + table: String, + offset: String, + }, + RegMove { + opcode: String, + arg: String, + src: String, + dst: String, + }, + CopySpecial { + opcode: String, + src: String, + dst: String, + }, + CopyToSsa { + opcode: String, + src: String, + }, + RegSpill { + opcode: String, + arg: String, + src: String, + dst: String, + }, + RegFill { + opcode: String, + arg: String, + src: String, + dst: String, + }, + Trap { + opcode: String, + code: String, + }, + CondTrap { + opcode: String, + arg: String, + code: String, + }, + IntCondTrap { + opcode: String, + arg: String, + cond: String, + code: String, + }, + FloatCondTrap { + opcode: String, + arg: String, + cond: String, + code: String, + }, +} + +/// Convert Cranelift IR instructions to JSON format. +pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData { + let inst = &func.dfg[inst_index]; + match *inst { + InstructionData::Unary { opcode, arg } => SerInstData::Unary { + opcode: opcode.to_string(), + arg: arg.to_string(), + }, + InstructionData::UnaryImm { opcode, imm } => SerInstData::UnaryImm { + opcode: opcode.to_string(), + imm: imm.to_string(), + }, + InstructionData::UnaryIeee32 { opcode, imm } => SerInstData::UnaryIeee32 { + opcode: opcode.to_string(), + imm: imm.to_string(), + }, + InstructionData::UnaryIeee64 { opcode, imm } => SerInstData::UnaryIeee64 { + opcode: opcode.to_string(), + imm: imm.to_string(), + }, + InstructionData::UnaryBool { opcode, imm } => SerInstData::UnaryBool { + opcode: opcode.to_string(), + imm, + }, + InstructionData::UnaryGlobalValue { + opcode, + global_value, + } => SerInstData::UnaryGlobalValue { + opcode: opcode.to_string(), + global_value: global_value.to_string(), + }, + InstructionData::Binary { opcode, args } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::Binary { + opcode: opcode.to_string(), + args: hold_args, + } + } + InstructionData::BinaryImm { opcode, arg, imm } => SerInstData::BinaryImm { + opcode: opcode.to_string(), + arg: arg.to_string(), + imm: imm.to_string(), + }, + InstructionData::Ternary { opcode, args } => { + let hold_args = [ + args[0].to_string(), + args[1].to_string(), + args[2].to_string(), + ]; + SerInstData::Ternary { + opcode: opcode.to_string(), + args: hold_args, + } + } + InstructionData::MultiAry { opcode, ref args } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + + SerInstData::MultiAry { + opcode: opcode.to_string(), + args: hold_args, + } + } + InstructionData::NullAry { opcode } => SerInstData::NullAry { + opcode: opcode.to_string(), + }, + InstructionData::InsertLane { opcode, args, lane } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::InsertLane { + opcode: opcode.to_string(), + args: hold_args, + lane: lane.to_string(), + } + } + InstructionData::ExtractLane { opcode, arg, lane } => SerInstData::ExtractLane { + opcode: opcode.to_string(), + arg: arg.to_string(), + lane: lane.to_string(), + }, + InstructionData::UnaryConst { + opcode, + constant_handle, + } => { + let constant = func.dfg.constants.get(constant_handle); + SerInstData::UnaryImm { + opcode: opcode.to_string(), + imm: format!("{:?}", constant), + } + } + InstructionData::Shuffle { opcode, args, mask } => { + let mask = func + .dfg + .immediates + .get(mask) + .expect("Expected shuffle mask to already be inserted in immediate mapping"); + SerInstData::Shuffle { + opcode: opcode.to_string(), + args: [args[0].to_string(), args[1].to_string()], + mask: format!("{:?}", mask), + } + } + InstructionData::IntCompare { opcode, args, cond } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::IntCompare { + opcode: opcode.to_string(), + args: hold_args, + cond: cond.to_string(), + } + } + InstructionData::IntCompareImm { + opcode, + arg, + cond, + imm, + } => SerInstData::IntCompareImm { + opcode: opcode.to_string(), + arg: arg.to_string(), + cond: cond.to_string(), + imm: imm.to_string(), + }, + InstructionData::IntCond { opcode, arg, cond } => SerInstData::IntCond { + opcode: opcode.to_string(), + arg: arg.to_string(), + cond: cond.to_string(), + }, + InstructionData::FloatCompare { opcode, args, cond } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::FloatCompare { + opcode: opcode.to_string(), + args: hold_args, + cond: cond.to_string(), + } + } + InstructionData::FloatCond { opcode, arg, cond } => SerInstData::FloatCond { + opcode: opcode.to_string(), + arg: arg.to_string(), + cond: cond.to_string(), + }, + InstructionData::IntSelect { opcode, args, cond } => { + let hold_args = [ + args[0].to_string(), + args[1].to_string(), + args[2].to_string(), + ]; + SerInstData::IntSelect { + opcode: opcode.to_string(), + args: hold_args, + cond: cond.to_string(), + } + } + InstructionData::Jump { + opcode, + ref args, + destination, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::Jump { + opcode: opcode.to_string(), + args: hold_args, + destination: destination.to_string(), + } + } + InstructionData::Branch { + opcode, + ref args, + destination, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::Branch { + opcode: opcode.to_string(), + args: hold_args, + destination: destination.to_string(), + } + } + InstructionData::BranchInt { + opcode, + ref args, + cond, + destination, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::BranchInt { + opcode: opcode.to_string(), + args: hold_args, + cond: cond.to_string(), + destination: destination.to_string(), + } + } + InstructionData::BranchFloat { + opcode, + ref args, + cond, + destination, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::BranchFloat { + opcode: opcode.to_string(), + args: hold_args, + cond: cond.to_string(), + destination: destination.to_string(), + } + } + InstructionData::BranchIcmp { + opcode, + ref args, + cond, + destination, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::BranchIcmp { + opcode: opcode.to_string(), + args: hold_args, + cond: cond.to_string(), + destination: destination.to_string(), + } + } + InstructionData::BranchTable { + opcode, + arg, + destination, + table, + } => SerInstData::BranchTable { + opcode: opcode.to_string(), + arg: arg.to_string(), + destination: destination.to_string(), + table: table.to_string(), + }, + InstructionData::BranchTableBase { opcode, table } => SerInstData::BranchTableBase { + opcode: opcode.to_string(), + table: table.to_string(), + }, + InstructionData::BranchTableEntry { + opcode, + args, + imm, + table, + } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::BranchTableEntry { + opcode: opcode.to_string(), + args: hold_args, + imm: imm.to_string(), + table: table.to_string(), + } + } + InstructionData::IndirectJump { opcode, arg, table } => SerInstData::IndirectJump { + opcode: opcode.to_string(), + arg: arg.to_string(), + table: table.to_string(), + }, + InstructionData::Call { + opcode, + ref args, + func_ref, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::Call { + opcode: opcode.to_string(), + args: hold_args, + func_ref: func_ref.to_string(), + } + } + InstructionData::CallIndirect { + opcode, + ref args, + sig_ref, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::CallIndirect { + opcode: opcode.to_string(), + args: hold_args, + sig_ref: sig_ref.to_string(), + } + } + InstructionData::FuncAddr { opcode, func_ref } => SerInstData::FuncAddr { + opcode: opcode.to_string(), + func_ref: func_ref.to_string(), + }, + InstructionData::Load { + opcode, + arg, + flags, + offset, + } => SerInstData::Load { + opcode: opcode.to_string(), + arg: arg.to_string(), + flags: flags.to_string(), + offset: offset.to_string(), + }, + InstructionData::LoadComplex { + opcode, + ref args, + flags, + offset, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::LoadComplex { + opcode: opcode.to_string(), + args: hold_args, + flags: flags.to_string(), + offset: offset.to_string(), + } + } + InstructionData::Store { + opcode, + args, + flags, + offset, + } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::Store { + opcode: opcode.to_string(), + args: hold_args, + flags: flags.to_string(), + offset: offset.to_string(), + } + } + InstructionData::StoreComplex { + opcode, + ref args, + flags, + offset, + } => { + let mut hold_args = Vec::new(); + let args_iter = args.as_slice(&func.dfg.value_lists); + for arg in args_iter { + hold_args.push(arg.to_string()); + } + SerInstData::StoreComplex { + opcode: opcode.to_string(), + args: hold_args, + flags: flags.to_string(), + offset: offset.to_string(), + } + } + InstructionData::StackLoad { + opcode, + stack_slot, + offset, + } => SerInstData::StackLoad { + opcode: opcode.to_string(), + stack_slot: stack_slot.to_string(), + offset: offset.to_string(), + }, + InstructionData::StackStore { + opcode, + arg, + stack_slot, + offset, + } => SerInstData::StackStore { + opcode: opcode.to_string(), + arg: arg.to_string(), + stack_slot: stack_slot.to_string(), + offset: offset.to_string(), + }, + InstructionData::HeapAddr { + opcode, + arg, + heap, + imm, + } => SerInstData::HeapAddr { + opcode: opcode.to_string(), + arg: arg.to_string(), + heap: heap.to_string(), + imm: imm.to_string(), + }, + InstructionData::TableAddr { + opcode, + arg, + table, + offset, + } => SerInstData::TableAddr { + opcode: opcode.to_string(), + arg: arg.to_string(), + table: table.to_string(), + offset: offset.to_string(), + }, + InstructionData::RegMove { + opcode, + arg, + src, + dst, + } => SerInstData::RegMove { + opcode: opcode.to_string(), + arg: arg.to_string(), + src: src.to_string(), + dst: dst.to_string(), + }, + InstructionData::CopySpecial { opcode, src, dst } => SerInstData::CopySpecial { + opcode: opcode.to_string(), + src: src.to_string(), + dst: dst.to_string(), + }, + InstructionData::CopyToSsa { opcode, src } => SerInstData::CopyToSsa { + opcode: opcode.to_string(), + src: src.to_string(), + }, + InstructionData::RegSpill { + opcode, + arg, + src, + dst, + } => SerInstData::RegSpill { + opcode: opcode.to_string(), + arg: arg.to_string(), + src: src.to_string(), + dst: dst.to_string(), + }, + InstructionData::RegFill { + opcode, + arg, + src, + dst, + } => SerInstData::RegFill { + opcode: opcode.to_string(), + arg: arg.to_string(), + src: src.to_string(), + dst: dst.to_string(), + }, + InstructionData::Trap { opcode, code } => SerInstData::Trap { + opcode: opcode.to_string(), + code: code.to_string(), + }, + InstructionData::CondTrap { opcode, arg, code } => SerInstData::CondTrap { + opcode: opcode.to_string(), + arg: arg.to_string(), + code: code.to_string(), + }, + InstructionData::IntCondTrap { + opcode, + arg, + cond, + code, + } => SerInstData::IntCondTrap { + opcode: opcode.to_string(), + arg: arg.to_string(), + cond: cond.to_string(), + code: code.to_string(), + }, + InstructionData::FloatCondTrap { + opcode, + arg, + cond, + code, + } => SerInstData::FloatCondTrap { + opcode: opcode.to_string(), + arg: arg.to_string(), + cond: cond.to_string(), + code: code.to_string(), + }, + } +} + +/// Serializable version of Cranelift IR instructions. +#[derive(Clone, Deserialize, Serialize, Debug)] +pub struct SerInst { + pub inst_name: String, + pub inst_data: SerInstData, +} + +impl SerInst { + pub fn new(inst: Inst, func: &Function) -> Self { + Self { + inst_name: inst.to_string(), + inst_data: get_inst_data(inst, func), + } + } +} + +/// Serializable version of Cranelift IR Blocks. +#[derive(Clone, Deserialize, Serialize, Debug)] +pub struct SerBlock { + pub block: String, + pub params: Vec, + pub insts: Vec, +} + +impl SerBlock { + pub fn new(name: String) -> Self { + Self { + block: name, + params: Vec::new(), + insts: Vec::new(), + } + } +} + +pub fn populate_inst(func: &Function, block: Block) -> Vec { + let mut ser_vec: Vec = Vec::new(); + let ret_iter = func.layout.block_insts(block); + for inst in ret_iter { + let ser_inst: SerInst = SerInst::new(inst, &func); + ser_vec.push(ser_inst); + } + ser_vec +} + +/// Translating Block parameters into serializable parameters. +pub fn populate_params(func: &Function, block: Block) -> Vec { + let mut ser_vec: Vec = Vec::new(); + let parameters = func.dfg.block_params(block); + for param in parameters { + ser_vec.push(param.to_string()); + } + ser_vec +} + +/// Serializable Data Flow Graph. +#[derive(Deserialize, Serialize, Debug)] +pub struct SerDataFlowGraph { + blocks: Vec, +} + +/// Serialize all parts of the Cranelift Block data structure, this includes name, parameters, and +/// instructions. +pub fn populate_blocks(func: &Function) -> Vec { + let mut block_vec: Vec = Vec::new(); + for block in func.layout.blocks() { + let mut ser_block: SerBlock = SerBlock::new(block.to_string()); + ser_block.params = populate_params(&func, block); + ser_block.insts = populate_inst(&func, block); + block_vec.push(ser_block); + } + block_vec +} + +/// Serializable Cranelift IR data flow graph, including all blocks. +impl SerDataFlowGraph { + pub fn create_new(func: &Function) -> Self { + Self { + blocks: populate_blocks(func), + } + } + + pub fn new(func: &Function) -> Self { + Self::create_new(func) + } +} + +/// Serializable signature including function parameters and returns. +#[derive(Serialize, Deserialize, Debug)] +pub struct SerSignature { + pub func_params: Vec, + pub func_returns: Vec, +} + +impl SerSignature { + /// Creating serializable signature data structure from all Cranelift IR functions. + fn create_new(sig: &Signature) -> Self { + let mut params_vec: Vec = Vec::new(); + let mut returns_vec: Vec = Vec::new(); + for param in &sig.params { + params_vec.push(param.to_string()); + } + for ret in &sig.returns { + returns_vec.push(ret.to_string()); + } + Self { + func_params: params_vec, + func_returns: returns_vec, + } + } + + pub fn new(func: &Function) -> Self { + Self::create_new(&func.signature) + } +} + +/// Serializable Function type, including name, signature, global values, and data flow graph. +#[derive(Serialize, Deserialize, Debug)] +pub struct SerFunction { + pub name: String, + pub signature: SerSignature, + pub globals: Vec, + pub dfg: SerDataFlowGraph, +} + +impl SerFunction { + /// Creates serializable global values, as well as the functions signature, name, and data flow + /// graph. + fn create_new(func: &Function) -> Self { + let mut global_vec: Vec = Vec::new(); + for (glob_name, _) in func.global_values.iter() { + global_vec.push(glob_name.to_string()); + } + Self { + name: func.name.to_string(), + signature: SerSignature::new(&func), + globals: global_vec, + dfg: SerDataFlowGraph::new(&func), + } + } + + pub fn new(func: &Function) -> Self { + Self::create_new(func) + } +} + +/// Must have SerObj for deserialization, contains all of the functions from inside the file to be +/// serialized. Files have one SerObj each, with all SerFunctions contained inside that SerObj. +#[derive(Serialize, Deserialize, Debug)] +pub struct SerObj { + pub functions: Vec, +} + +impl SerObj { + fn create_new(funcs: Vec) -> Self { + Self { functions: funcs } + } + + pub fn new(funcs: &[Function]) -> Self { + let mut func_vec: Vec = Vec::new(); + for func in funcs { + let ser_func: SerFunction = SerFunction::new(&func); + func_vec.push(ser_func); + } + Self::create_new(func_vec) + } +} diff --git a/cranelift/simplejit/Cargo.toml b/cranelift/simplejit/Cargo.toml new file mode 100644 index 0000000000..845551ff3a --- /dev/null +++ b/cranelift/simplejit/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "cranelift-simplejit" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "A simple JIT library backed by Cranelift" +repository = "https://github.com/bytecodealliance/cranelift" +documentation = "https://cranelift.readthedocs.io/" +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +cranelift-module = { path = "../module", version = "0.59.0" } +cranelift-native = { path = "../native", version = "0.59.0" } +region = "2.0.0" +libc = { version = "0.2.42" } +errno = "0.2.4" +target-lexicon = "0.10" +memmap = { version = "0.7.0", optional = true } + +[dependencies.cranelift-codegen] +path = "../codegen" +version = "0.59.0" +default-features = false +features = ["std"] + +[target.'cfg(target_os = "windows")'.dependencies] +winapi = { version = "0.3", features = ["winbase", "memoryapi"] } + +[features] +selinux-fix = ['memmap'] +default = [] + +[dev-dependencies] +cranelift = { path = "../umbrella", version = "0.59.0" } +cranelift-frontend = { path = "../frontend", version = "0.59.0" } +cranelift-entity = { path = "../entity", version = "0.59.0" } + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/simplejit/LICENSE b/cranelift/simplejit/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/simplejit/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/simplejit/README.md b/cranelift/simplejit/README.md new file mode 100644 index 0000000000..d4f393cb31 --- /dev/null +++ b/cranelift/simplejit/README.md @@ -0,0 +1,8 @@ +This crate provides a simple JIT library that uses +[Cranelift](https://crates.io/crates/cranelift). + +This crate is extremely experimental. + +See the [example program] for a brief overview of how to use this. + +[example program]: https://github.com/bytecodealliance/cranelift/tree/master/cranelift-simplejit/examples/simplejit-minimal.rs diff --git a/cranelift/simplejit/examples/simplejit-minimal.rs b/cranelift/simplejit/examples/simplejit-minimal.rs new file mode 100644 index 0000000000..ade2cd3422 --- /dev/null +++ b/cranelift/simplejit/examples/simplejit-minimal.rs @@ -0,0 +1,79 @@ +use cranelift::prelude::*; +use cranelift_module::{default_libcall_names, Linkage, Module}; +use cranelift_simplejit::{SimpleJITBackend, SimpleJITBuilder}; +use std::mem; + +fn main() { + let mut module: Module = + Module::new(SimpleJITBuilder::new(default_libcall_names())); + let mut ctx = module.make_context(); + let mut func_ctx = FunctionBuilderContext::new(); + + let mut sig_a = module.make_signature(); + sig_a.params.push(AbiParam::new(types::I32)); + sig_a.returns.push(AbiParam::new(types::I32)); + + let mut sig_b = module.make_signature(); + sig_b.returns.push(AbiParam::new(types::I32)); + + let func_a = module + .declare_function("a", Linkage::Local, &sig_a) + .unwrap(); + let func_b = module + .declare_function("b", Linkage::Local, &sig_b) + .unwrap(); + + ctx.func.signature = sig_a; + ctx.func.name = ExternalName::user(0, func_a.as_u32()); + { + let mut bcx: FunctionBuilder = FunctionBuilder::new(&mut ctx.func, &mut func_ctx); + let block = bcx.create_block(); + + bcx.switch_to_block(block); + bcx.append_block_params_for_function_params(block); + let param = bcx.block_params(block)[0]; + let cst = bcx.ins().iconst(types::I32, 37); + let add = bcx.ins().iadd(cst, param); + bcx.ins().return_(&[add]); + bcx.seal_all_blocks(); + bcx.finalize(); + } + module.define_function(func_a, &mut ctx).unwrap(); + module.clear_context(&mut ctx); + + ctx.func.signature = sig_b; + ctx.func.name = ExternalName::user(0, func_b.as_u32()); + { + let mut bcx: FunctionBuilder = FunctionBuilder::new(&mut ctx.func, &mut func_ctx); + let block = bcx.create_block(); + + bcx.switch_to_block(block); + let local_func = module.declare_func_in_func(func_a, &mut bcx.func); + let arg = bcx.ins().iconst(types::I32, 5); + let call = bcx.ins().call(local_func, &[arg]); + let value = { + let results = bcx.inst_results(call); + assert_eq!(results.len(), 1); + results[0].clone() + }; + bcx.ins().return_(&[value]); + bcx.seal_all_blocks(); + bcx.finalize(); + } + module.define_function(func_b, &mut ctx).unwrap(); + module.clear_context(&mut ctx); + + // Perform linking. + module.finalize_definitions(); + + // Get a raw pointer to the generated code. + let code_b = module.get_finalized_function(func_b); + + // Cast it to a rust function pointer type. + let ptr_b = unsafe { mem::transmute::<_, fn() -> u32>(code_b) }; + + // Call it! + let res = ptr_b(); + + assert_eq!(res, 42); +} diff --git a/cranelift/simplejit/src/backend.rs b/cranelift/simplejit/src/backend.rs new file mode 100644 index 0000000000..08bc565133 --- /dev/null +++ b/cranelift/simplejit/src/backend.rs @@ -0,0 +1,687 @@ +//! Defines `SimpleJITBackend`. + +use crate::memory::Memory; +use cranelift_codegen::binemit::{ + Addend, CodeOffset, NullTrapSink, Reloc, RelocSink, Stackmap, StackmapSink, +}; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::{self, ir, settings}; +use cranelift_module::{ + Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleNamespace, + ModuleResult, TrapSite, +}; +use cranelift_native; +#[cfg(not(windows))] +use libc; +use std::collections::HashMap; +use std::ffi::CString; +use std::io::Write; +use std::ptr; +use target_lexicon::PointerWidth; +#[cfg(windows)] +use winapi; + +const EXECUTABLE_DATA_ALIGNMENT: u8 = 0x10; +const WRITABLE_DATA_ALIGNMENT: u8 = 0x8; +const READONLY_DATA_ALIGNMENT: u8 = 0x1; + +/// A builder for `SimpleJITBackend`. +pub struct SimpleJITBuilder { + isa: Box, + symbols: HashMap, + libcall_names: Box String>, +} + +impl SimpleJITBuilder { + /// Create a new `SimpleJITBuilder`. + /// + /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` + /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain + /// floating point instructions, and for stack probes. If you don't know what to use for this + /// argument, use `cranelift_module::default_libcall_names()`. + pub fn new(libcall_names: Box String>) -> Self { + let flag_builder = settings::builder(); + let isa_builder = cranelift_native::builder().unwrap_or_else(|msg| { + panic!("host machine is not supported: {}", msg); + }); + let isa = isa_builder.finish(settings::Flags::new(flag_builder)); + Self::with_isa(isa, libcall_names) + } + + /// Create a new `SimpleJITBuilder` with an arbitrary target. This is mainly + /// useful for testing. + /// + /// SimpleJIT requires a `TargetIsa` configured for non-PIC. + /// + /// To create a `SimpleJITBuilder` for native use, use the `new` constructor + /// instead. + /// + /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` + /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain + /// floating point instructions, and for stack probes. If you don't know what to use for this + /// argument, use `cranelift_module::default_libcall_names()`. + pub fn with_isa( + isa: Box, + libcall_names: Box String>, + ) -> Self { + debug_assert!(!isa.flags().is_pic(), "SimpleJIT requires non-PIC code"); + let symbols = HashMap::new(); + Self { + isa, + symbols, + libcall_names, + } + } + + /// Define a symbol in the internal symbol table. + /// + /// The JIT will use the symbol table to resolve names that are declared, + /// but not defined, in the module being compiled. A common example is + /// external functions. With this method, functions and data can be exposed + /// to the code being compiled which are defined by the host. + /// + /// If a symbol is defined more than once, the most recent definition will + /// be retained. + /// + /// If the JIT fails to find a symbol in its internal table, it will fall + /// back to a platform-specific search (this typically involves searching + /// the current process for public symbols, followed by searching the + /// platform's C runtime). + pub fn symbol(&mut self, name: K, ptr: *const u8) -> &Self + where + K: Into, + { + self.symbols.insert(name.into(), ptr); + self + } + + /// Define multiple symbols in the internal symbol table. + /// + /// Using this is equivalent to calling `symbol` on each element. + pub fn symbols(&mut self, symbols: It) -> &Self + where + It: IntoIterator, + K: Into, + { + for (name, ptr) in symbols { + self.symbols.insert(name.into(), ptr); + } + self + } +} + +/// A `SimpleJITBackend` implements `Backend` and emits code and data into memory where it can be +/// directly called and accessed. +/// +/// See the `SimpleJITBuilder` for a convenient way to construct `SimpleJITBackend` instances. +pub struct SimpleJITBackend { + isa: Box, + symbols: HashMap, + libcall_names: Box String>, + memory: SimpleJITMemoryHandle, +} + +/// A record of a relocation to perform. +struct RelocRecord { + offset: CodeOffset, + reloc: Reloc, + name: ir::ExternalName, + addend: Addend, +} + +struct StackmapRecord { + #[allow(dead_code)] + offset: CodeOffset, + #[allow(dead_code)] + stackmap: Stackmap, +} + +pub struct SimpleJITCompiledFunction { + code: *mut u8, + size: usize, + relocs: Vec, +} + +pub struct SimpleJITCompiledData { + storage: *mut u8, + size: usize, + relocs: Vec, +} + +/// A handle to allow freeing memory allocated by the `Backend`. +pub struct SimpleJITMemoryHandle { + code: Memory, + readonly: Memory, + writable: Memory, +} + +impl SimpleJITBackend { + fn lookup_symbol(&self, name: &str) -> *const u8 { + match self.symbols.get(name) { + Some(&ptr) => ptr, + None => lookup_with_dlsym(name), + } + } + + fn get_definition( + &self, + namespace: &ModuleNamespace, + name: &ir::ExternalName, + ) -> *const u8 { + match *name { + ir::ExternalName::User { .. } => { + if namespace.is_function(name) { + let (def, name_str, _signature) = namespace.get_function_definition(&name); + match def { + Some(compiled) => compiled.code, + None => self.lookup_symbol(name_str), + } + } else { + let (def, name_str, _writable) = namespace.get_data_definition(&name); + match def { + Some(compiled) => compiled.storage, + None => self.lookup_symbol(name_str), + } + } + } + ir::ExternalName::LibCall(ref libcall) => { + let sym = (self.libcall_names)(*libcall); + self.lookup_symbol(&sym) + } + _ => panic!("invalid ExternalName {}", name), + } + } + + fn record_function_for_perf(&self, ptr: *mut u8, size: usize, name: &str) { + // The Linux perf tool supports JIT code via a /tmp/perf-$PID.map file, + // which contains memory regions and their associated names. If we + // are profiling with perf and saving binaries to PERF_BUILDID_DIR + // for post-profile analysis, write information about each function + // we define. + if cfg!(target_os = "linux") && ::std::env::var_os("PERF_BUILDID_DIR").is_some() { + let mut map_file = ::std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(format!("/tmp/perf-{}.map", ::std::process::id())) + .unwrap(); + + let _ = writeln!(map_file, "{:x} {:x} {}", ptr as usize, size, name); + } + } +} + +impl<'simple_jit_backend> Backend for SimpleJITBackend { + type Builder = SimpleJITBuilder; + + /// SimpleJIT compiled function and data objects may have outstanding + /// relocations that need to be performed before the memory can be used. + /// These relocations are performed within `finalize_function` and + /// `finalize_data`. + type CompiledFunction = SimpleJITCompiledFunction; + type CompiledData = SimpleJITCompiledData; + + /// SimpleJIT emits code and data into memory, and provides raw pointers + /// to them. They are valid for the remainder of the program's life, unless + /// [`free_memory`] is used. + /// + /// [`free_memory`]: #method.free_memory + type FinalizedFunction = *const u8; + type FinalizedData = (*mut u8, usize); + + /// SimpleJIT emits code and data into memory as it processes them, so it + /// doesn't need to provide anything after the `Module` is complete. + /// The handle object that is returned can optionally be used to free + /// allocated memory if required. + type Product = SimpleJITMemoryHandle; + + /// Create a new `SimpleJITBackend`. + fn new(builder: SimpleJITBuilder) -> Self { + let memory = SimpleJITMemoryHandle { + code: Memory::new(), + readonly: Memory::new(), + writable: Memory::new(), + }; + + Self { + isa: builder.isa, + symbols: builder.symbols, + libcall_names: builder.libcall_names, + memory, + } + } + + fn isa(&self) -> &dyn TargetIsa { + &*self.isa + } + + fn declare_function(&mut self, _id: FuncId, _name: &str, _linkage: Linkage) { + // Nothing to do. + } + + fn declare_data( + &mut self, + _id: DataId, + _name: &str, + _linkage: Linkage, + _writable: bool, + tls: bool, + _align: Option, + ) { + assert!(!tls, "SimpleJIT doesn't yet support TLS"); + // Nothing to do. + } + + fn define_function( + &mut self, + _id: FuncId, + name: &str, + ctx: &cranelift_codegen::Context, + _namespace: &ModuleNamespace, + code_size: u32, + ) -> ModuleResult { + let size = code_size as usize; + let ptr = self + .memory + .code + .allocate(size, EXECUTABLE_DATA_ALIGNMENT) + .expect("TODO: handle OOM etc."); + + self.record_function_for_perf(ptr, size, name); + + let mut reloc_sink = SimpleJITRelocSink::new(); + // Ignore traps for now. For now, frontends should just avoid generating code + // that traps. + let mut trap_sink = NullTrapSink {}; + let mut stackmap_sink = SimpleJITStackmapSink::new(); + unsafe { + ctx.emit_to_memory( + &*self.isa, + ptr, + &mut reloc_sink, + &mut trap_sink, + &mut stackmap_sink, + ) + }; + + Ok(Self::CompiledFunction { + code: ptr, + size, + relocs: reloc_sink.relocs, + }) + } + + fn define_function_bytes( + &mut self, + _id: FuncId, + name: &str, + bytes: &[u8], + _namespace: &ModuleNamespace, + _traps: Vec, + ) -> ModuleResult { + let size = bytes.len(); + let ptr = self + .memory + .code + .allocate(size, EXECUTABLE_DATA_ALIGNMENT) + .expect("TODO: handle OOM etc."); + + self.record_function_for_perf(ptr, size, name); + + unsafe { + ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, size); + } + + Ok(Self::CompiledFunction { + code: ptr, + size, + relocs: vec![], + }) + } + + fn define_data( + &mut self, + _id: DataId, + _name: &str, + writable: bool, + tls: bool, + align: Option, + data: &DataContext, + _namespace: &ModuleNamespace, + ) -> ModuleResult { + assert!(!tls, "SimpleJIT doesn't yet support TLS"); + + let &DataDescription { + ref init, + ref function_decls, + ref data_decls, + ref function_relocs, + ref data_relocs, + } = data.description(); + + let size = init.size(); + let storage = if writable { + self.memory + .writable + .allocate(size, align.unwrap_or(WRITABLE_DATA_ALIGNMENT)) + .expect("TODO: handle OOM etc.") + } else { + self.memory + .readonly + .allocate(size, align.unwrap_or(READONLY_DATA_ALIGNMENT)) + .expect("TODO: handle OOM etc.") + }; + + match *init { + Init::Uninitialized => { + panic!("data is not initialized yet"); + } + Init::Zeros { .. } => { + unsafe { ptr::write_bytes(storage, 0, size) }; + } + Init::Bytes { ref contents } => { + let src = contents.as_ptr(); + unsafe { ptr::copy_nonoverlapping(src, storage, size) }; + } + } + + let reloc = match self.isa.triple().pointer_width().unwrap() { + PointerWidth::U16 => panic!(), + PointerWidth::U32 => Reloc::Abs4, + PointerWidth::U64 => Reloc::Abs8, + }; + let mut relocs = Vec::new(); + for &(offset, id) in function_relocs { + relocs.push(RelocRecord { + reloc, + offset, + name: function_decls[id].clone(), + addend: 0, + }); + } + for &(offset, id, addend) in data_relocs { + relocs.push(RelocRecord { + reloc, + offset, + name: data_decls[id].clone(), + addend, + }); + } + + Ok(Self::CompiledData { + storage, + size, + relocs, + }) + } + + fn write_data_funcaddr( + &mut self, + _data: &mut Self::CompiledData, + _offset: usize, + _what: ir::FuncRef, + ) { + unimplemented!(); + } + + fn write_data_dataaddr( + &mut self, + _data: &mut Self::CompiledData, + _offset: usize, + _what: ir::GlobalValue, + _usize: Addend, + ) { + unimplemented!(); + } + + fn finalize_function( + &mut self, + _id: FuncId, + func: &Self::CompiledFunction, + namespace: &ModuleNamespace, + ) -> Self::FinalizedFunction { + use std::ptr::write_unaligned; + + for &RelocRecord { + reloc, + offset, + ref name, + addend, + } in &func.relocs + { + let ptr = func.code; + debug_assert!((offset as usize) < func.size); + let at = unsafe { ptr.offset(offset as isize) }; + let base = self.get_definition(namespace, name); + // TODO: Handle overflow. + let what = unsafe { base.offset(addend as isize) }; + match reloc { + Reloc::Abs4 => { + // TODO: Handle overflow. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + unsafe { + write_unaligned(at as *mut u32, what as u32) + }; + } + Reloc::Abs8 => { + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + unsafe { + write_unaligned(at as *mut u64, what as u64) + }; + } + Reloc::X86PCRel4 | Reloc::X86CallPCRel4 => { + // TODO: Handle overflow. + let pcrel = ((what as isize) - (at as isize)) as i32; + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + unsafe { + write_unaligned(at as *mut i32, pcrel) + }; + } + Reloc::X86GOTPCRel4 | Reloc::X86CallPLTRel4 => panic!("unexpected PIC relocation"), + _ => unimplemented!(), + } + } + func.code + } + + fn get_finalized_function(&self, func: &Self::CompiledFunction) -> Self::FinalizedFunction { + func.code + } + + fn finalize_data( + &mut self, + _id: DataId, + data: &Self::CompiledData, + namespace: &ModuleNamespace, + ) -> Self::FinalizedData { + use std::ptr::write_unaligned; + + for &RelocRecord { + reloc, + offset, + ref name, + addend, + } in &data.relocs + { + let ptr = data.storage; + debug_assert!((offset as usize) < data.size); + let at = unsafe { ptr.offset(offset as isize) }; + let base = self.get_definition(namespace, name); + // TODO: Handle overflow. + let what = unsafe { base.offset(addend as isize) }; + match reloc { + Reloc::Abs4 => { + // TODO: Handle overflow. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + unsafe { + write_unaligned(at as *mut u32, what as u32) + }; + } + Reloc::Abs8 => { + #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] + unsafe { + write_unaligned(at as *mut u64, what as u64) + }; + } + Reloc::X86PCRel4 + | Reloc::X86CallPCRel4 + | Reloc::X86GOTPCRel4 + | Reloc::X86CallPLTRel4 => panic!("unexpected text relocation in data"), + _ => unimplemented!(), + } + } + (data.storage, data.size) + } + + fn get_finalized_data(&self, data: &Self::CompiledData) -> Self::FinalizedData { + (data.storage, data.size) + } + + fn publish(&mut self) { + // Now that we're done patching, prepare the memory for execution! + self.memory.readonly.set_readonly(); + self.memory.code.set_readable_and_executable(); + } + + /// SimpleJIT emits code and data into memory as it processes them. This + /// method performs no additional processing, but returns a handle which + /// allows freeing the allocated memory. Otherwise said memory is leaked + /// to enable safe handling of the resulting pointers. + /// + /// This method does not need to be called when access to the memory + /// handle is not required. + fn finish(self, _namespace: &ModuleNamespace) -> Self::Product { + self.memory + } +} + +#[cfg(not(windows))] +fn lookup_with_dlsym(name: &str) -> *const u8 { + let c_str = CString::new(name).unwrap(); + let c_str_ptr = c_str.as_ptr(); + let sym = unsafe { libc::dlsym(libc::RTLD_DEFAULT, c_str_ptr) }; + if sym.is_null() { + panic!("can't resolve symbol {}", name); + } + sym as *const u8 +} + +#[cfg(windows)] +fn lookup_with_dlsym(name: &str) -> *const u8 { + const MSVCRT_DLL: &[u8] = b"msvcrt.dll\0"; + + let c_str = CString::new(name).unwrap(); + let c_str_ptr = c_str.as_ptr(); + + unsafe { + let handles = [ + // try to find the searched symbol in the currently running executable + ptr::null_mut(), + // try to find the searched symbol in local c runtime + winapi::um::libloaderapi::GetModuleHandleA(MSVCRT_DLL.as_ptr() as *const i8), + ]; + + for handle in &handles { + let addr = winapi::um::libloaderapi::GetProcAddress(*handle, c_str_ptr); + if addr.is_null() { + continue; + } + return addr as *const u8; + } + + let msg = if handles[1].is_null() { + "(msvcrt not loaded)" + } else { + "" + }; + panic!("cannot resolve address of symbol {} {}", name, msg); + } +} + +impl SimpleJITMemoryHandle { + /// Free memory allocated for code and data segments of compiled functions. + /// + /// # Safety + /// + /// Because this function invalidates any pointers retrived from the + /// corresponding module, it should only be used when none of the functions + /// from that module are currently executing and none of the`fn` pointers + /// are called afterwards. + pub unsafe fn free_memory(&mut self) { + self.code.free_memory(); + self.readonly.free_memory(); + self.writable.free_memory(); + } +} + +struct SimpleJITRelocSink { + pub relocs: Vec, +} + +impl SimpleJITRelocSink { + pub fn new() -> Self { + Self { relocs: Vec::new() } + } +} + +impl RelocSink for SimpleJITRelocSink { + fn reloc_block(&mut self, _offset: CodeOffset, _reloc: Reloc, _block_offset: CodeOffset) { + unimplemented!(); + } + + fn reloc_external( + &mut self, + offset: CodeOffset, + reloc: Reloc, + name: &ir::ExternalName, + addend: Addend, + ) { + self.relocs.push(RelocRecord { + offset, + reloc, + name: name.clone(), + addend, + }); + } + + fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } + + fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) { + match reloc { + Reloc::X86PCRelRodata4 => { + // Not necessary to record this unless we are going to split apart code and its + // jumptbl/rodata. + } + _ => { + panic!("Unhandled reloc"); + } + } + } +} + +struct SimpleJITStackmapSink { + pub stackmaps: Vec, +} + +impl SimpleJITStackmapSink { + pub fn new() -> Self { + Self { + stackmaps: Vec::new(), + } + } +} + +impl StackmapSink for SimpleJITStackmapSink { + fn add_stackmap(&mut self, offset: CodeOffset, stackmap: Stackmap) { + self.stackmaps.push(StackmapRecord { offset, stackmap }); + } +} diff --git a/cranelift/simplejit/src/lib.rs b/cranelift/simplejit/src/lib.rs new file mode 100644 index 0000000000..d907964cff --- /dev/null +++ b/cranelift/simplejit/src/lib.rs @@ -0,0 +1,32 @@ +//! Top-level lib.rs for `cranelift_simplejit`. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +mod backend; +mod memory; + +pub use crate::backend::{SimpleJITBackend, SimpleJITBuilder}; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/simplejit/src/memory.rs b/cranelift/simplejit/src/memory.rs new file mode 100644 index 0000000000..68c3fd768c --- /dev/null +++ b/cranelift/simplejit/src/memory.rs @@ -0,0 +1,258 @@ +#[cfg(not(feature = "selinux-fix"))] +use errno; + +#[cfg(not(any(feature = "selinux-fix", windows)))] +use libc; + +#[cfg(feature = "selinux-fix")] +use memmap::MmapMut; + +use region; +use std::mem; +use std::ptr; + +/// Round `size` up to the nearest multiple of `page_size`. +fn round_up_to_page_size(size: usize, page_size: usize) -> usize { + (size + (page_size - 1)) & !(page_size - 1) +} + +/// A simple struct consisting of a pointer and length. +struct PtrLen { + #[cfg(feature = "selinux-fix")] + map: Option, + + ptr: *mut u8, + len: usize, +} + +impl PtrLen { + /// Create a new empty `PtrLen`. + fn new() -> Self { + Self { + #[cfg(feature = "selinux-fix")] + map: None, + + ptr: ptr::null_mut(), + len: 0, + } + } + + /// Create a new `PtrLen` pointing to at least `size` bytes of memory, + /// suitably sized and aligned for memory protection. + #[cfg(all(not(target_os = "windows"), feature = "selinux-fix"))] + fn with_size(size: usize) -> Result { + let page_size = region::page::size(); + let alloc_size = round_up_to_page_size(size, page_size); + let map = MmapMut::map_anon(alloc_size); + + match map { + Ok(mut map) => { + // The order here is important; we assign the pointer first to get + // around compile time borrow errors. + Ok(Self { + ptr: map.as_mut_ptr(), + map: Some(map), + len: alloc_size, + }) + } + Err(e) => Err(e.to_string()), + } + } + + #[cfg(all(not(target_os = "windows"), not(feature = "selinux-fix")))] + fn with_size(size: usize) -> Result { + let mut ptr = ptr::null_mut(); + let page_size = region::page::size(); + let alloc_size = round_up_to_page_size(size, page_size); + unsafe { + let err = libc::posix_memalign(&mut ptr, page_size, alloc_size); + + if err == 0 { + Ok(Self { + ptr: ptr as *mut u8, + len: alloc_size, + }) + } else { + Err(errno::Errno(err).to_string()) + } + } + } + + #[cfg(target_os = "windows")] + fn with_size(size: usize) -> Result { + use winapi::um::memoryapi::VirtualAlloc; + use winapi::um::winnt::{MEM_COMMIT, MEM_RESERVE, PAGE_READWRITE}; + + let page_size = region::page::size(); + + // VirtualAlloc always rounds up to the next multiple of the page size + let ptr = unsafe { + VirtualAlloc( + ptr::null_mut(), + size, + MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE, + ) + }; + if !ptr.is_null() { + Ok(Self { + ptr: ptr as *mut u8, + len: round_up_to_page_size(size, page_size), + }) + } else { + Err(errno::errno().to_string()) + } + } +} + +// `MMapMut` from `cfg(feature = "selinux-fix")` already deallocates properly. +#[cfg(all(not(target_os = "windows"), not(feature = "selinux-fix")))] +impl Drop for PtrLen { + fn drop(&mut self) { + if !self.ptr.is_null() { + unsafe { + region::protect(self.ptr, self.len, region::Protection::ReadWrite) + .expect("unable to unprotect memory"); + libc::free(self.ptr as _); + } + } + } +} + +// TODO: add a `Drop` impl for `cfg(target_os = "windows")` + +/// JIT memory manager. This manages pages of suitably aligned and +/// accessible memory. Memory will be leaked by default to have +/// function pointers remain valid for the remainder of the +/// program's life. +pub struct Memory { + allocations: Vec, + executable: usize, + current: PtrLen, + position: usize, +} + +impl Memory { + pub fn new() -> Self { + Self { + allocations: Vec::new(), + executable: 0, + current: PtrLen::new(), + position: 0, + } + } + + fn finish_current(&mut self) { + self.allocations + .push(mem::replace(&mut self.current, PtrLen::new())); + self.position = 0; + } + + /// TODO: Use a proper error type. + pub fn allocate(&mut self, size: usize, align: u8) -> Result<*mut u8, String> { + if self.position % align as usize != 0 { + self.position += align as usize - self.position % align as usize; + debug_assert!(self.position % align as usize == 0); + } + + if size <= self.current.len - self.position { + // TODO: Ensure overflow is not possible. + let ptr = unsafe { self.current.ptr.add(self.position) }; + self.position += size; + return Ok(ptr); + } + + self.finish_current(); + + // TODO: Allocate more at a time. + self.current = PtrLen::with_size(size)?; + self.position = size; + Ok(self.current.ptr) + } + + /// Set all memory allocated in this `Memory` up to now as readable and executable. + pub fn set_readable_and_executable(&mut self) { + self.finish_current(); + + #[cfg(feature = "selinux-fix")] + { + for &PtrLen { ref map, ptr, len } in &self.allocations[self.executable..] { + if len != 0 && map.is_some() { + unsafe { + region::protect(ptr, len, region::Protection::ReadExecute) + .expect("unable to make memory readable+executable"); + } + } + } + } + + #[cfg(not(feature = "selinux-fix"))] + { + for &PtrLen { ptr, len } in &self.allocations[self.executable..] { + if len != 0 { + unsafe { + region::protect(ptr, len, region::Protection::ReadExecute) + .expect("unable to make memory readable+executable"); + } + } + } + } + } + + /// Set all memory allocated in this `Memory` up to now as readonly. + pub fn set_readonly(&mut self) { + self.finish_current(); + + #[cfg(feature = "selinux-fix")] + { + for &PtrLen { ref map, ptr, len } in &self.allocations[self.executable..] { + if len != 0 && map.is_some() { + unsafe { + region::protect(ptr, len, region::Protection::Read) + .expect("unable to make memory readonly"); + } + } + } + } + + #[cfg(not(feature = "selinux-fix"))] + { + for &PtrLen { ptr, len } in &self.allocations[self.executable..] { + if len != 0 { + unsafe { + region::protect(ptr, len, region::Protection::Read) + .expect("unable to make memory readonly"); + } + } + } + } + } + + /// Frees all allocated memory regions that would be leaked otherwise. + /// Likely to invalidate existing function pointers, causing unsafety. + pub unsafe fn free_memory(&mut self) { + self.allocations.clear(); + } +} + +impl Drop for Memory { + fn drop(&mut self) { + // leak memory to guarantee validity of function pointers + mem::replace(&mut self.allocations, Vec::new()) + .into_iter() + .for_each(mem::forget); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_round_up_to_page_size() { + assert_eq!(round_up_to_page_size(0, 4096), 0); + assert_eq!(round_up_to_page_size(1, 4096), 4096); + assert_eq!(round_up_to_page_size(4096, 4096), 4096); + assert_eq!(round_up_to_page_size(4097, 4096), 8192); + } +} diff --git a/cranelift/simplejit/tests/basic.rs b/cranelift/simplejit/tests/basic.rs new file mode 100644 index 0000000000..0e8ea0aa9d --- /dev/null +++ b/cranelift/simplejit/tests/basic.rs @@ -0,0 +1,197 @@ +use cranelift_codegen::ir::*; +use cranelift_codegen::isa::CallConv; +use cranelift_codegen::{ir::types::I16, Context}; +use cranelift_entity::EntityRef; +use cranelift_frontend::*; +use cranelift_module::*; +use cranelift_simplejit::*; + +#[test] +fn error_on_incompatible_sig_in_declare_function() { + let mut module: Module = + Module::new(SimpleJITBuilder::new(default_libcall_names())); + let mut sig = Signature { + params: vec![AbiParam::new(types::I64)], + returns: vec![], + call_conv: CallConv::SystemV, + }; + module + .declare_function("abc", Linkage::Local, &sig) + .unwrap(); + sig.params[0] = AbiParam::new(types::I32); + module + .declare_function("abc", Linkage::Local, &sig) + .err() + .unwrap(); // Make sure this is an error +} + +fn define_simple_function(module: &mut Module) -> FuncId { + let sig = Signature { + params: vec![], + returns: vec![], + call_conv: CallConv::SystemV, + }; + + let func_id = module + .declare_function("abc", Linkage::Local, &sig) + .unwrap(); + + let mut ctx = Context::new(); + ctx.func = Function::with_name_signature(ExternalName::user(0, func_id.as_u32()), sig); + let mut func_ctx = FunctionBuilderContext::new(); + { + let mut bcx: FunctionBuilder = FunctionBuilder::new(&mut ctx.func, &mut func_ctx); + let block = bcx.create_block(); + bcx.switch_to_block(block); + bcx.ins().return_(&[]); + } + + module.define_function(func_id, &mut ctx).unwrap(); + + func_id +} + +#[test] +fn double_finalize() { + let mut module: Module = + Module::new(SimpleJITBuilder::new(default_libcall_names())); + + define_simple_function(&mut module); + module.finalize_definitions(); + + // Calling `finalize_definitions` a second time without any new definitions + // should have no effect. + module.finalize_definitions(); +} + +#[test] +#[should_panic(expected = "Result::unwrap()` on an `Err` value: DuplicateDefinition(\"abc\")")] +fn panic_on_define_after_finalize() { + let mut module: Module = + Module::new(SimpleJITBuilder::new(default_libcall_names())); + + define_simple_function(&mut module); + module.finalize_definitions(); + define_simple_function(&mut module); +} + +#[test] +fn switch_error() { + use cranelift_codegen::settings; + + let sig = Signature { + params: vec![AbiParam::new(types::I32)], + returns: vec![AbiParam::new(types::I32)], + call_conv: CallConv::SystemV, + }; + + let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); + + let mut func_ctx = FunctionBuilderContext::new(); + { + let mut bcx: FunctionBuilder = FunctionBuilder::new(&mut func, &mut func_ctx); + let start = bcx.create_block(); + let bb0 = bcx.create_block(); + let bb1 = bcx.create_block(); + let bb2 = bcx.create_block(); + let bb3 = bcx.create_block(); + println!("{} {} {} {} {}", start, bb0, bb1, bb2, bb3); + + bcx.declare_var(Variable::new(0), types::I32); + bcx.declare_var(Variable::new(1), types::I32); + let in_val = bcx.append_block_param(start, types::I32); + bcx.switch_to_block(start); + bcx.def_var(Variable::new(0), in_val); + bcx.ins().jump(bb0, &[]); + + bcx.switch_to_block(bb0); + let discr = bcx.use_var(Variable::new(0)); + let mut switch = cranelift_frontend::Switch::new(); + for &(index, bb) in &[ + (9, bb1), + (13, bb1), + (10, bb1), + (92, bb1), + (39, bb1), + (34, bb1), + ] { + switch.set_entry(index, bb); + } + switch.emit(&mut bcx, discr, bb2); + + bcx.switch_to_block(bb1); + let v = bcx.use_var(Variable::new(0)); + bcx.def_var(Variable::new(1), v); + bcx.ins().jump(bb3, &[]); + + bcx.switch_to_block(bb2); + let v = bcx.use_var(Variable::new(0)); + bcx.def_var(Variable::new(1), v); + bcx.ins().jump(bb3, &[]); + + bcx.switch_to_block(bb3); + let r = bcx.use_var(Variable::new(1)); + bcx.ins().return_(&[r]); + + bcx.seal_all_blocks(); + bcx.finalize(); + } + + let flags = settings::Flags::new(settings::builder()); + match cranelift_codegen::verify_function(&func, &flags) { + Ok(_) => {} + Err(err) => { + let pretty_error = + cranelift_codegen::print_errors::pretty_verifier_error(&func, None, None, err); + panic!("pretty_error:\n{}", pretty_error); + } + } +} + +#[test] +fn libcall_function() { + let mut module: Module = + Module::new(SimpleJITBuilder::new(default_libcall_names())); + + let sig = Signature { + params: vec![], + returns: vec![], + call_conv: CallConv::SystemV, + }; + + let func_id = module + .declare_function("function", Linkage::Local, &sig) + .unwrap(); + + let mut ctx = Context::new(); + ctx.func = Function::with_name_signature(ExternalName::user(0, func_id.as_u32()), sig); + let mut func_ctx = FunctionBuilderContext::new(); + { + let mut bcx: FunctionBuilder = FunctionBuilder::new(&mut ctx.func, &mut func_ctx); + let block = bcx.create_block(); + bcx.switch_to_block(block); + + let int = module.target_config().pointer_type(); + let zero = bcx.ins().iconst(I16, 0); + let size = bcx.ins().iconst(int, 10); + + let mut signature = module.make_signature(); + signature.params.push(AbiParam::new(int)); + signature.returns.push(AbiParam::new(int)); + let callee = module + .declare_function("malloc", Linkage::Import, &signature) + .expect("declare malloc function"); + let local_callee = module.declare_func_in_func(callee, &mut bcx.func); + let argument_exprs = vec![size]; + let call = bcx.ins().call(local_callee, &argument_exprs); + let buffer = bcx.inst_results(call)[0]; + + bcx.call_memset(module.target_config(), buffer, zero, size); + + bcx.ins().return_(&[]); + } + + module.define_function(func_id, &mut ctx).unwrap(); + + module.finalize_definitions(); +} diff --git a/cranelift/spidermonkey.md b/cranelift/spidermonkey.md new file mode 100644 index 0000000000..516aa84a81 --- /dev/null +++ b/cranelift/spidermonkey.md @@ -0,0 +1,40 @@ +Cranelift in SpiderMonkey +========================= + +[SpiderMonkey](https://developer.mozilla.org/en-US/docs/Mozilla/Projects/SpiderMonkey) +is the JavaScript and WebAssembly engine in Firefox. Cranelift is +designed to be used in SpiderMonkey with the goal of enabling better +code generation for ARM's 32-bit and 64-bit architectures, and building +a framework for improved low-level code optimizations in the future. + +Phase 1: WebAssembly +-------------------- + +SpiderMonkey currently has two WebAssembly compilers: The tier 1 +baseline compiler (not shown below) and the tier 2 compiler using the +IonMonkey JavaScript compiler's optimizations and register allocation. + +![Cranelift in SpiderMonkey phase 1](media/spidermonkey1.png) + +In phase 1, Cranelift aims to replace the IonMonkey-based tier 2 +compiler for WebAssembly only. It will still be orchestrated by the +BaldrMonkey engine and compile WebAssembly modules on multiple threads. +Cranelift translates binary wasm functions directly into its own +intermediate representation, and it generates binary machine code +without depending on SpiderMonkey's macro assembler. + +Phase 2: IonMonkey +------------------ + +The IonMonkey JIT compiler is designed to compile JavaScript code. It +uses two separate intermediate representations to do that: + + - MIR is used for optimizations that are specific to JavaScript JIT + compilation. It has good support for JS types and the special tricks + needed to make JS fast. + - LIR is used for register allocation. + +![Cranelift in SpiderMonkey phase 2](media/spidermonkey2.png) + +Cranelift has its own register allocator, so the LIR representation can +be skipped when using Cranelift as a backend for IonMonkey. diff --git a/cranelift/src/bugpoint.rs b/cranelift/src/bugpoint.rs new file mode 100644 index 0000000000..b409143be4 --- /dev/null +++ b/cranelift/src/bugpoint.rs @@ -0,0 +1,960 @@ +//! CLI tool to reduce Cranelift IR files crashing during compilation. + +use crate::disasm::{PrintRelocs, PrintStackmaps, PrintTraps}; +use crate::utils::{parse_sets_and_triple, read_to_string}; +use cranelift_codegen::cursor::{Cursor, FuncCursor}; +use cranelift_codegen::flowgraph::ControlFlowGraph; +use cranelift_codegen::ir::types::{F32, F64}; +use cranelift_codegen::ir::{ + self, Block, FuncRef, Function, GlobalValueData, Inst, InstBuilder, InstructionData, + StackSlots, TrapCode, +}; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::Context; +use cranelift_entity::PrimaryMap; +use cranelift_reader::{parse_test, ParseOptions}; +use std::collections::HashMap; +use std::path::Path; + +use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; + +pub fn run( + filename: &str, + flag_set: &[String], + flag_isa: &str, + verbose: bool, +) -> Result<(), String> { + let parsed = parse_sets_and_triple(flag_set, flag_isa)?; + let fisa = parsed.as_fisa(); + + let path = Path::new(&filename).to_path_buf(); + + let buffer = read_to_string(&path).map_err(|e| format!("{}: {}", filename, e))?; + let test_file = + parse_test(&buffer, ParseOptions::default()).map_err(|e| format!("{}: {}", filename, e))?; + + // If we have an isa from the command-line, use that. Otherwise if the + // file contains a unique isa, use that. + let isa = if let Some(isa) = fisa.isa { + isa + } else if let Some(isa) = test_file.isa_spec.unique_isa() { + isa + } else { + return Err(String::from("compilation requires a target isa")); + }; + + std::env::set_var("RUST_BACKTRACE", "0"); // Disable backtraces to reduce verbosity + + for (func, _) in test_file.functions { + let (orig_block_count, orig_inst_count) = (block_count(&func), inst_count(&func)); + + match reduce(isa, func, verbose) { + Ok((func, crash_msg)) => { + println!("Crash message: {}", crash_msg); + println!("\n{}", func); + println!( + "{} blocks {} insts -> {} blocks {} insts", + orig_block_count, + orig_inst_count, + block_count(&func), + inst_count(&func) + ); + } + Err(err) => println!("Warning: {}", err), + } + } + + Ok(()) +} + +enum ProgressStatus { + /// The mutation raised or reduced the amount of instructions or blocks. + ExpandedOrShrinked, + + /// The mutation only changed an instruction. Performing another round of mutations may only + /// reduce the test case if another mutation shrank the test case. + Changed, + + /// No need to re-test if the program crashes, because the mutation had no effect, but we want + /// to keep on iterating. + Skip, +} + +trait Mutator { + fn name(&self) -> &'static str; + fn mutation_count(&self, func: &Function) -> usize; + fn mutate(&mut self, func: Function) -> Option<(Function, String, ProgressStatus)>; + + /// Gets called when the returned mutated function kept on causing the crash. This can be used + /// to update position of the next item to look at. Does nothing by default. + fn did_crash(&mut self) {} +} + +/// Try to remove instructions. +struct RemoveInst { + block: Block, + inst: Inst, +} + +impl RemoveInst { + fn new(func: &Function) -> Self { + let first_block = func.layout.entry_block().unwrap(); + let first_inst = func.layout.first_inst(first_block).unwrap(); + Self { + block: first_block, + inst: first_inst, + } + } +} + +impl Mutator for RemoveInst { + fn name(&self) -> &'static str { + "remove inst" + } + + fn mutation_count(&self, func: &Function) -> usize { + inst_count(func) + } + + fn mutate(&mut self, mut func: Function) -> Option<(Function, String, ProgressStatus)> { + next_inst_ret_prev(&func, &mut self.block, &mut self.inst).map(|(prev_block, prev_inst)| { + func.layout.remove_inst(prev_inst); + let msg = if func.layout.block_insts(prev_block).next().is_none() { + // Make sure empty blocks are removed, as `next_inst_ret_prev` depends on non empty blocks + func.layout.remove_block(prev_block); + format!("Remove inst {} and empty block {}", prev_inst, prev_block) + } else { + format!("Remove inst {}", prev_inst) + }; + (func, msg, ProgressStatus::ExpandedOrShrinked) + }) + } +} + +/// Try to replace instructions with `iconst` or `fconst`. +struct ReplaceInstWithConst { + block: Block, + inst: Inst, +} + +impl ReplaceInstWithConst { + fn new(func: &Function) -> Self { + let first_block = func.layout.entry_block().unwrap(); + let first_inst = func.layout.first_inst(first_block).unwrap(); + Self { + block: first_block, + inst: first_inst, + } + } + + fn const_for_type<'f, T: InstBuilder<'f>>(builder: T, ty: ir::Type) -> &'static str { + // Try to keep the result type consistent, and default to an integer type + // otherwise: this will cover all the cases for f32/f64 and integer types, or + // create verifier errors otherwise. + if ty == F32 { + builder.f32const(0.0); + "f32const" + } else if ty == F64 { + builder.f64const(0.0); + "f64const" + } else { + builder.iconst(ty, 0); + "iconst" + } + } +} + +impl Mutator for ReplaceInstWithConst { + fn name(&self) -> &'static str { + "replace inst with const" + } + + fn mutation_count(&self, func: &Function) -> usize { + inst_count(func) + } + + fn mutate(&mut self, mut func: Function) -> Option<(Function, String, ProgressStatus)> { + next_inst_ret_prev(&func, &mut self.block, &mut self.inst).map( + |(_prev_block, prev_inst)| { + let num_results = func.dfg.inst_results(prev_inst).len(); + + let opcode = func.dfg[prev_inst].opcode(); + if num_results == 0 + || opcode == ir::Opcode::Iconst + || opcode == ir::Opcode::F32const + || opcode == ir::Opcode::F64const + { + return (func, format!(""), ProgressStatus::Skip); + } + + if num_results == 1 { + let ty = func.dfg.value_type(func.dfg.first_result(prev_inst)); + let new_inst_name = Self::const_for_type(func.dfg.replace(prev_inst), ty); + return ( + func, + format!("Replace inst {} with {}.", prev_inst, new_inst_name), + ProgressStatus::Changed, + ); + } + + // At least 2 results. Replace each instruction with as many const instructions as + // there are results. + let mut pos = FuncCursor::new(&mut func).at_inst(prev_inst); + + // Copy result SSA names into our own vector; otherwise we couldn't mutably borrow pos + // in the loop below. + let results = pos.func.dfg.inst_results(prev_inst).to_vec(); + + // Detach results from the previous instruction, since we're going to reuse them. + pos.func.dfg.clear_results(prev_inst); + + let mut inst_names = Vec::new(); + for r in results { + let ty = pos.func.dfg.value_type(r); + let builder = pos.ins().with_results([Some(r)]); + let new_inst_name = Self::const_for_type(builder, ty); + inst_names.push(new_inst_name); + } + + // Remove the instruction. + assert_eq!(pos.remove_inst(), prev_inst); + + ( + func, + format!("Replace inst {} with {}", prev_inst, inst_names.join(" / ")), + ProgressStatus::ExpandedOrShrinked, + ) + }, + ) + } +} + +/// Try to replace instructions with `trap`. +struct ReplaceInstWithTrap { + block: Block, + inst: Inst, +} + +impl ReplaceInstWithTrap { + fn new(func: &Function) -> Self { + let first_block = func.layout.entry_block().unwrap(); + let first_inst = func.layout.first_inst(first_block).unwrap(); + Self { + block: first_block, + inst: first_inst, + } + } +} + +impl Mutator for ReplaceInstWithTrap { + fn name(&self) -> &'static str { + "replace inst with trap" + } + + fn mutation_count(&self, func: &Function) -> usize { + inst_count(func) + } + + fn mutate(&mut self, mut func: Function) -> Option<(Function, String, ProgressStatus)> { + next_inst_ret_prev(&func, &mut self.block, &mut self.inst).map( + |(_prev_block, prev_inst)| { + let status = if func.dfg[prev_inst].opcode() == ir::Opcode::Trap { + ProgressStatus::Skip + } else { + func.dfg.replace(prev_inst).trap(TrapCode::User(0)); + ProgressStatus::Changed + }; + ( + func, + format!("Replace inst {} with trap", prev_inst), + status, + ) + }, + ) + } +} + +/// Try to remove an block. +struct RemoveBlock { + block: Block, +} + +impl RemoveBlock { + fn new(func: &Function) -> Self { + Self { + block: func.layout.entry_block().unwrap(), + } + } +} + +impl Mutator for RemoveBlock { + fn name(&self) -> &'static str { + "remove block" + } + + fn mutation_count(&self, func: &Function) -> usize { + block_count(func) + } + + fn mutate(&mut self, mut func: Function) -> Option<(Function, String, ProgressStatus)> { + func.layout.next_block(self.block).map(|next_block| { + self.block = next_block; + while let Some(inst) = func.layout.last_inst(self.block) { + func.layout.remove_inst(inst); + } + func.layout.remove_block(self.block); + ( + func, + format!("Remove block {}", next_block), + ProgressStatus::ExpandedOrShrinked, + ) + }) + } +} + +/// Try to remove unused entities. +struct RemoveUnusedEntities { + kind: u32, +} + +impl RemoveUnusedEntities { + fn new() -> Self { + Self { kind: 0 } + } +} + +impl Mutator for RemoveUnusedEntities { + fn name(&self) -> &'static str { + "remove unused entities" + } + + fn mutation_count(&self, _func: &Function) -> usize { + 4 + } + + #[allow(clippy::cognitive_complexity)] + fn mutate(&mut self, mut func: Function) -> Option<(Function, String, ProgressStatus)> { + let name = match self.kind { + 0 => { + let mut ext_func_usage_map = HashMap::new(); + for block in func.layout.blocks() { + for inst in func.layout.block_insts(block) { + match func.dfg[inst] { + // Add new cases when there are new instruction formats taking a `FuncRef`. + InstructionData::Call { func_ref, .. } + | InstructionData::FuncAddr { func_ref, .. } => { + ext_func_usage_map + .entry(func_ref) + .or_insert_with(Vec::new) + .push(inst); + } + _ => {} + } + } + } + + let mut ext_funcs = PrimaryMap::new(); + + for (func_ref, ext_func_data) in func.dfg.ext_funcs.clone().into_iter() { + if let Some(func_ref_usage) = ext_func_usage_map.get(&func_ref) { + let new_func_ref = ext_funcs.push(ext_func_data.clone()); + for &inst in func_ref_usage { + match func.dfg[inst] { + // Keep in sync with the above match. + InstructionData::Call { + ref mut func_ref, .. + } + | InstructionData::FuncAddr { + ref mut func_ref, .. + } => { + *func_ref = new_func_ref; + } + _ => unreachable!(), + } + } + } + } + + func.dfg.ext_funcs = ext_funcs; + + "Remove unused ext funcs" + } + 1 => { + #[derive(Copy, Clone)] + enum SigRefUser { + Instruction(Inst), + ExtFunc(FuncRef), + } + + let mut signatures_usage_map = HashMap::new(); + for block in func.layout.blocks() { + for inst in func.layout.block_insts(block) { + // Add new cases when there are new instruction formats taking a `SigRef`. + if let InstructionData::CallIndirect { sig_ref, .. } = func.dfg[inst] { + signatures_usage_map + .entry(sig_ref) + .or_insert_with(Vec::new) + .push(SigRefUser::Instruction(inst)); + } + } + } + for (func_ref, ext_func_data) in func.dfg.ext_funcs.iter() { + signatures_usage_map + .entry(ext_func_data.signature) + .or_insert_with(Vec::new) + .push(SigRefUser::ExtFunc(func_ref)); + } + + let mut signatures = PrimaryMap::new(); + + for (sig_ref, sig_data) in func.dfg.signatures.clone().into_iter() { + if let Some(sig_ref_usage) = signatures_usage_map.get(&sig_ref) { + let new_sig_ref = signatures.push(sig_data.clone()); + for &sig_ref_user in sig_ref_usage { + match sig_ref_user { + SigRefUser::Instruction(inst) => match func.dfg[inst] { + // Keep in sync with the above match. + InstructionData::CallIndirect { + ref mut sig_ref, .. + } => { + *sig_ref = new_sig_ref; + } + _ => unreachable!(), + }, + SigRefUser::ExtFunc(func_ref) => { + func.dfg.ext_funcs[func_ref].signature = new_sig_ref; + } + } + } + } + } + + func.dfg.signatures = signatures; + + "Remove unused signatures" + } + 2 => { + let mut stack_slot_usage_map = HashMap::new(); + for block in func.layout.blocks() { + for inst in func.layout.block_insts(block) { + match func.dfg[inst] { + // Add new cases when there are new instruction formats taking a `StackSlot`. + InstructionData::StackLoad { stack_slot, .. } + | InstructionData::StackStore { stack_slot, .. } => { + stack_slot_usage_map + .entry(stack_slot) + .or_insert_with(Vec::new) + .push(inst); + } + + InstructionData::RegSpill { dst, .. } => { + stack_slot_usage_map + .entry(dst) + .or_insert_with(Vec::new) + .push(inst); + } + InstructionData::RegFill { src, .. } => { + stack_slot_usage_map + .entry(src) + .or_insert_with(Vec::new) + .push(inst); + } + _ => {} + } + } + } + + let mut stack_slots = StackSlots::new(); + + for (stack_slot, stack_slot_data) in func.stack_slots.clone().iter() { + if let Some(stack_slot_usage) = stack_slot_usage_map.get(&stack_slot) { + let new_stack_slot = stack_slots.push(stack_slot_data.clone()); + for &inst in stack_slot_usage { + match &mut func.dfg[inst] { + // Keep in sync with the above match. + InstructionData::StackLoad { stack_slot, .. } + | InstructionData::StackStore { stack_slot, .. } => { + *stack_slot = new_stack_slot; + } + InstructionData::RegSpill { dst, .. } => { + *dst = new_stack_slot; + } + InstructionData::RegFill { src, .. } => { + *src = new_stack_slot; + } + _ => unreachable!(), + } + } + } + } + + func.stack_slots = stack_slots; + + "Remove unused stack slots" + } + 3 => { + let mut global_value_usage_map = HashMap::new(); + for block in func.layout.blocks() { + for inst in func.layout.block_insts(block) { + // Add new cases when there are new instruction formats taking a `GlobalValue`. + if let InstructionData::UnaryGlobalValue { global_value, .. } = + func.dfg[inst] + { + global_value_usage_map + .entry(global_value) + .or_insert_with(Vec::new) + .push(inst); + } + } + } + + for (_global_value, global_value_data) in func.global_values.iter() { + match *global_value_data { + GlobalValueData::VMContext | GlobalValueData::Symbol { .. } => {} + // These can create cyclic references, which cause complications. Just skip + // the global value removal for now. + // FIXME Handle them in a better way. + GlobalValueData::Load { .. } | GlobalValueData::IAddImm { .. } => { + return None + } + } + } + + let mut global_values = PrimaryMap::new(); + + for (global_value, global_value_data) in func.global_values.clone().into_iter() { + if let Some(global_value_usage) = global_value_usage_map.get(&global_value) { + let new_global_value = global_values.push(global_value_data.clone()); + for &inst in global_value_usage { + match &mut func.dfg[inst] { + // Keep in sync with the above match. + InstructionData::UnaryGlobalValue { global_value, .. } => { + *global_value = new_global_value; + } + _ => unreachable!(), + } + } + } + } + + func.global_values = global_values; + + "Remove unused global values" + } + _ => return None, + }; + self.kind += 1; + Some((func, name.to_owned(), ProgressStatus::Changed)) + } +} + +struct MergeBlocks { + block: Block, + prev_block: Option, +} + +impl MergeBlocks { + fn new(func: &Function) -> Self { + Self { + block: func.layout.entry_block().unwrap(), + prev_block: None, + } + } +} + +impl Mutator for MergeBlocks { + fn name(&self) -> &'static str { + "merge blocks" + } + + fn mutation_count(&self, func: &Function) -> usize { + // N blocks may result in at most N-1 merges. + block_count(func) - 1 + } + + fn mutate(&mut self, mut func: Function) -> Option<(Function, String, ProgressStatus)> { + let block = match func.layout.next_block(self.block) { + Some(block) => block, + None => return None, + }; + + self.block = block; + + let mut cfg = ControlFlowGraph::new(); + cfg.compute(&func); + + if cfg.pred_iter(block).count() != 1 { + return Some(( + func, + format!("did nothing for {}", block), + ProgressStatus::Skip, + )); + } + + let pred = cfg.pred_iter(block).next().unwrap(); + + // If the branch instruction that lead us to this block is preceded by another branch + // instruction, then we have a conditional jump sequence that we should not break by + // replacing the second instruction by more of them. + if let Some(pred_pred_inst) = func.layout.prev_inst(pred.inst) { + if func.dfg[pred_pred_inst].opcode().is_branch() { + return Some(( + func, + format!("did nothing for {}", block), + ProgressStatus::Skip, + )); + } + } + + assert!(func.dfg.block_params(block).len() == func.dfg.inst_variable_args(pred.inst).len()); + + // If there were any block parameters in block, then the last instruction in pred will + // fill these parameters. Make the block params aliases of the terminator arguments. + for (block_param, arg) in func + .dfg + .detach_block_params(block) + .as_slice(&func.dfg.value_lists) + .iter() + .cloned() + .zip(func.dfg.inst_variable_args(pred.inst).iter().cloned()) + .collect::>() + { + if block_param != arg { + func.dfg.change_to_alias(block_param, arg); + } + } + + // Remove the terminator branch to the current block. + func.layout.remove_inst(pred.inst); + + // Move all the instructions to the predecessor. + while let Some(inst) = func.layout.first_inst(block) { + func.layout.remove_inst(inst); + func.layout.append_inst(inst, pred.block); + } + + // Remove the predecessor block. + func.layout.remove_block(block); + + // Record the previous block: if we caused a crash (as signaled by a call to did_crash), then + // we'll start back to this block. + self.prev_block = Some(pred.block); + + Some(( + func, + format!("merged {} and {}", pred.block, block), + ProgressStatus::ExpandedOrShrinked, + )) + } + + fn did_crash(&mut self) { + self.block = self.prev_block.unwrap(); + } +} + +fn next_inst_ret_prev( + func: &Function, + block: &mut Block, + inst: &mut Inst, +) -> Option<(Block, Inst)> { + let prev = (*block, *inst); + if let Some(next_inst) = func.layout.next_inst(*inst) { + *inst = next_inst; + return Some(prev); + } + if let Some(next_block) = func.layout.next_block(*block) { + *block = next_block; + *inst = func.layout.first_inst(*block).expect("no inst"); + return Some(prev); + } + None +} + +fn block_count(func: &Function) -> usize { + func.layout.blocks().count() +} + +fn inst_count(func: &Function) -> usize { + func.layout + .blocks() + .map(|block| func.layout.block_insts(block).count()) + .sum() +} + +fn resolve_aliases(func: &mut Function) { + for block in func.layout.blocks() { + for inst in func.layout.block_insts(block) { + func.dfg.resolve_aliases_in_arguments(inst); + } + } +} + +fn reduce( + isa: &dyn TargetIsa, + mut func: Function, + verbose: bool, +) -> Result<(Function, String), String> { + let mut context = CrashCheckContext::new(isa); + + match context.check_for_crash(&func) { + CheckResult::Succeed => { + return Err( + "Given function compiled successfully or gave a verifier error.".to_string(), + ); + } + CheckResult::Crash(_) => {} + } + + resolve_aliases(&mut func); + + let progress_bar = ProgressBar::with_draw_target(0, ProgressDrawTarget::stdout()); + progress_bar.set_style( + ProgressStyle::default_bar().template("{bar:60} {prefix:40} {pos:>4}/{len:>4} {msg}"), + ); + + for pass_idx in 0..100 { + let mut should_keep_reducing = false; + let mut phase = 0; + + loop { + let mut mutator: Box = match phase { + 0 => Box::new(RemoveInst::new(&func)), + 1 => Box::new(ReplaceInstWithConst::new(&func)), + 2 => Box::new(ReplaceInstWithTrap::new(&func)), + 3 => Box::new(RemoveBlock::new(&func)), + 4 => Box::new(RemoveUnusedEntities::new()), + 5 => Box::new(MergeBlocks::new(&func)), + _ => break, + }; + + progress_bar.set_prefix(&format!("pass {} phase {}", pass_idx, mutator.name())); + progress_bar.set_length(mutator.mutation_count(&func) as u64); + + // Reset progress bar. + progress_bar.set_position(0); + progress_bar.set_draw_delta(0); + + for _ in 0..10000 { + progress_bar.inc(1); + + let (mutated_func, msg, mutation_kind) = match mutator.mutate(func.clone()) { + Some(res) => res, + None => { + break; + } + }; + + if let ProgressStatus::Skip = mutation_kind { + // The mutator didn't change anything, but we want to try more mutator + // iterations. + continue; + } + + progress_bar.set_message(&msg); + + match context.check_for_crash(&mutated_func) { + CheckResult::Succeed => { + // Mutating didn't hit the problem anymore, discard changes. + continue; + } + CheckResult::Crash(_) => { + // Panic remained while mutating, make changes definitive. + func = mutated_func; + + // Notify the mutator that the mutation was successful. + mutator.did_crash(); + + let verb = match mutation_kind { + ProgressStatus::ExpandedOrShrinked => { + should_keep_reducing = true; + "shrink" + } + ProgressStatus::Changed => "changed", + ProgressStatus::Skip => unreachable!(), + }; + if verbose { + progress_bar.println(format!("{}: {}", msg, verb)); + } + } + } + } + + phase += 1; + } + + progress_bar.println(format!( + "After pass {}, remaining insts/blocks: {}/{} ({})", + pass_idx, + inst_count(&func), + block_count(&func), + if should_keep_reducing { + "will keep reducing" + } else { + "stop reducing" + } + )); + + if !should_keep_reducing { + // No new shrinking opportunities have been found this pass. This means none will ever + // be found. Skip the rest of the passes over the function. + break; + } + } + + progress_bar.finish(); + + let crash_msg = match context.check_for_crash(&func) { + CheckResult::Succeed => unreachable!("Used to crash, but doesn't anymore???"), + CheckResult::Crash(crash_msg) => crash_msg, + }; + + Ok((func, crash_msg)) +} + +struct CrashCheckContext<'a> { + /// Cached `Context`, to prevent repeated allocation. + context: Context, + + /// Cached code memory, to prevent repeated allocation. + code_memory: Vec, + + /// The target isa to compile for. + isa: &'a dyn TargetIsa, +} + +fn get_panic_string(panic: Box) -> String { + let panic = match panic.downcast::<&'static str>() { + Ok(panic_msg) => { + return panic_msg.to_string(); + } + Err(panic) => panic, + }; + match panic.downcast::() { + Ok(panic_msg) => *panic_msg, + Err(_) => "Box".to_string(), + } +} + +enum CheckResult { + /// The function compiled fine, or the verifier noticed an error. + Succeed, + + /// The compilation of the function panicked. + Crash(String), +} + +impl<'a> CrashCheckContext<'a> { + fn new(isa: &'a dyn TargetIsa) -> Self { + CrashCheckContext { + context: Context::new(), + code_memory: Vec::new(), + isa, + } + } + + #[cfg_attr(test, allow(unreachable_code))] + fn check_for_crash(&mut self, func: &Function) -> CheckResult { + self.context.clear(); + self.code_memory.clear(); + + self.context.func = func.clone(); + + use std::io::Write; + std::io::stdout().flush().unwrap(); // Flush stdout to sync with panic messages on stderr + + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + cranelift_codegen::verifier::verify_function(&func, self.isa).err() + })) { + Ok(Some(_)) => return CheckResult::Succeed, + Ok(None) => {} + // The verifier panicked. Compiling it will probably give the same panic. + // We treat it as succeeding to make it possible to reduce for the actual error. + // FIXME prevent verifier panic on removing block0. + Err(_) => return CheckResult::Succeed, + } + + #[cfg(test)] + { + // For testing purposes we emulate a panic caused by the existence of + // a `call` instruction. + let contains_call = func.layout.blocks().any(|block| { + func.layout + .block_insts(block) + .any(|inst| match func.dfg[inst] { + InstructionData::Call { .. } => true, + _ => false, + }) + }); + if contains_call { + return CheckResult::Crash("test crash".to_string()); + } else { + return CheckResult::Succeed; + } + } + + let old_panic_hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(|_| {})); // silence panics + + let res = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut relocs = PrintRelocs::new(false); + let mut traps = PrintTraps::new(false); + let mut stackmaps = PrintStackmaps::new(false); + + let _ = self.context.compile_and_emit( + self.isa, + &mut self.code_memory, + &mut relocs, + &mut traps, + &mut stackmaps, + ); + })) { + Ok(()) => CheckResult::Succeed, + Err(err) => CheckResult::Crash(get_panic_string(err)), + }; + + std::panic::set_hook(old_panic_hook); + + res + } +} + +#[cfg(test)] +mod tests { + use super::*; + use cranelift_reader::ParseOptions; + + #[test] + fn test_reduce() { + const TEST: &str = include_str!("../tests/bugpoint_test.clif"); + const EXPECTED: &str = include_str!("../tests/bugpoint_test_expected.clif"); + + let test_file = parse_test(TEST, ParseOptions::default()).unwrap(); + + // If we have an isa from the command-line, use that. Otherwise if the + // file contains a unique isa, use that. + let isa = test_file.isa_spec.unique_isa().expect("Unknown isa"); + + for (func, _) in test_file.functions { + let (reduced_func, crash_msg) = + reduce(isa, func, false).expect("Couldn't reduce test case"); + assert_eq!(crash_msg, "test crash"); + + let (func_reduced_twice, crash_msg) = + reduce(isa, reduced_func.clone(), false).expect("Couldn't re-reduce test case"); + assert_eq!(crash_msg, "test crash"); + + assert_eq!( + block_count(&func_reduced_twice), + block_count(&reduced_func), + "reduction wasn't maximal for blocks" + ); + assert_eq!( + inst_count(&func_reduced_twice), + inst_count(&reduced_func), + "reduction wasn't maximal for insts" + ); + + assert_eq!(format!("{}", reduced_func), EXPECTED.replace("\r\n", "\n")); + } + } +} diff --git a/cranelift/src/cat.rs b/cranelift/src/cat.rs new file mode 100644 index 0000000000..4477f10222 --- /dev/null +++ b/cranelift/src/cat.rs @@ -0,0 +1,32 @@ +//! The `cat` sub-command. +//! +//! Read a sequence of Cranelift IR files and print them again to stdout. This has the effect of +//! normalizing formatting and removing comments. + +use crate::utils::read_to_string; +use crate::CommandResult; +use cranelift_reader::parse_functions; + +pub fn run(files: &[String]) -> CommandResult { + for (i, f) in files.iter().enumerate() { + if i != 0 { + println!(); + } + cat_one(f)? + } + Ok(()) +} + +fn cat_one(filename: &str) -> CommandResult { + let buffer = read_to_string(&filename).map_err(|e| format!("{}: {}", filename, e))?; + let items = parse_functions(&buffer).map_err(|e| format!("{}: {}", filename, e))?; + + for (idx, func) in items.into_iter().enumerate() { + if idx != 0 { + println!(); + } + print!("{}", func); + } + + Ok(()) +} diff --git a/cranelift/src/clif-util.rs b/cranelift/src/clif-util.rs new file mode 100755 index 0000000000..4066ef0fda --- /dev/null +++ b/cranelift/src/clif-util.rs @@ -0,0 +1,335 @@ +#![deny(trivial_numeric_casts)] +#![warn(unused_import_braces, unstable_features, unused_extern_crates)] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] + +use clap::{App, Arg, SubCommand}; +use cranelift_codegen::dbg::LOG_FILENAME_PREFIX; +use cranelift_codegen::VERSION; +use std::io::{self, Write}; +use std::option::Option; +use std::process; + +mod bugpoint; +mod cat; +mod compile; +mod disasm; +mod print_cfg; +mod run; +mod utils; + +#[cfg(feature = "wasm")] +mod wasm; + +/// A command either succeeds or fails with an error message. +pub type CommandResult = Result<(), String>; + +fn add_input_file_arg<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("file") + .default_value("-") + .multiple(true) + .value_name("file") + .help("Specify file(s) to be used for test. Defaults to reading from stdin.") +} + +fn add_single_input_file_arg<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("single-file") + .required(true) + .value_name("single-file") + .help("Specify a file to be used. Use '-' for stdin.") +} + +fn add_pass_arg<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("pass") + .required(true) + .multiple(true) + .value_name("pass") + .help("Specify pass(s) to be run on test file") +} + +fn add_verbose_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("verbose").short("v").help("Be more verbose") +} + +fn add_time_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("time-passes") + .short("T") + .help("Print pass timing report for test") +} + +fn add_size_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("print-size") + .short("X") + .help("Print bytecode size") +} + +fn add_disasm_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("disasm") + .long("disasm") + .short("D") + .help("Print machine code disassembly") +} + +fn add_set_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("set") + .long("set") + .takes_value(true) + .multiple(true) + .help("Configure Cranelift settings") +} + +fn add_target_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("target") + .takes_value(true) + .long("target") + .help("Specify the Cranelift target") +} + +fn add_print_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("print") + .short("p") + .help("Print the resulting Cranelift IR") +} + +fn add_debug_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("debug") + .short("d") + .help("Enable debug output on stderr/stdout") +} + +fn add_just_decode_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("just-decode") + .short("t") + .help("Just decode into Cranelift IR") +} + +fn add_check_translation_flag<'a>() -> clap::Arg<'a, 'a> { + Arg::with_name("check-translation") + .short("c") + .help("Just checks the correctness of Cranelift IR translated from WebAssembly") +} + +/// Returns a vector of clap value options and changes these options into a vector of strings +fn get_vec(argument_vec: Option) -> Vec { + let mut ret_vec: Vec = Vec::new(); + if let Some(clap_vec) = argument_vec { + for val in clap_vec { + ret_vec.push(val.to_string()); + } + } + + ret_vec +} + +fn add_wasm_or_compile<'a>(cmd: &str) -> clap::App<'a, 'a> { + let about_str = match cmd { + "wasm" => "Compiles Wasm binary/text into Cranelift IR and then into target language", + "compile" => "Compiles Cranelift IR into target language", + _ => panic!("Invalid command"), + }; + + SubCommand::with_name(cmd) + .about(about_str) + .arg(add_verbose_flag()) + .arg(add_print_flag()) + .arg(add_time_flag()) + .arg(add_size_flag()) + .arg(add_disasm_flag()) + .arg(add_set_flag()) + .arg(add_target_flag()) + .arg(add_input_file_arg()) + .arg(add_debug_flag()) + .arg(add_just_decode_flag()) + .arg(add_check_translation_flag()) +} + +fn handle_debug_flag(debug: bool) { + if debug { + pretty_env_logger::init(); + } else { + file_per_thread_logger::initialize(LOG_FILENAME_PREFIX); + } +} + +fn main() { + let app_cmds = App::new("Cranelift code generator utility") + .version(VERSION) + .subcommand( + SubCommand::with_name("test") + .about("Run Cranelift tests") + .arg(add_verbose_flag()) + .arg(add_time_flag()) + .arg(add_input_file_arg()) + .arg(add_debug_flag()), + ) + .subcommand( + SubCommand::with_name("run") + .about("Execute CLIF code and verify with test expressions") + .arg(add_verbose_flag()) + .arg(add_input_file_arg()) + .arg(add_debug_flag()), + ) + .subcommand( + SubCommand::with_name("cat") + .about("Outputs .clif file") + .arg(add_input_file_arg()) + .arg(add_debug_flag()), + ) + .subcommand( + SubCommand::with_name("print-cfg") + .about("Prints out cfg in dot format") + .arg(add_input_file_arg()) + .arg(add_debug_flag()), + ) + .subcommand(add_wasm_or_compile("compile")) + .subcommand( + add_wasm_or_compile("wasm").arg( + Arg::with_name("value-ranges") + .long("value-ranges") + .help("Display values ranges and their locations"), + ), + ) + .subcommand( + SubCommand::with_name("pass") + .about("Run specified pass(s) on an input file.") + .arg(add_single_input_file_arg()) + .arg(add_target_flag()) + .arg(add_pass_arg()) + .arg(add_debug_flag()) + .arg(add_time_flag()), + ) + .subcommand( + SubCommand::with_name("bugpoint") + .about("Reduce size of clif file causing panic during compilation.") + .arg(add_single_input_file_arg()) + .arg(add_set_flag()) + .arg(add_target_flag()) + .arg(add_verbose_flag()), + ); + + let res_util = match app_cmds.get_matches().subcommand() { + ("cat", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + cat::run(&get_vec(rest_cmd.values_of("file"))) + } + ("test", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + cranelift_filetests::run( + rest_cmd.is_present("verbose"), + rest_cmd.is_present("time-passes"), + &get_vec(rest_cmd.values_of("file")), + ) + .map(|_time| ()) + } + ("run", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + run::run( + get_vec(rest_cmd.values_of("file")), + rest_cmd.is_present("verbose"), + ) + .map(|_time| ()) + } + ("pass", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + + let mut target_val: &str = ""; + if let Some(clap_target) = rest_cmd.value_of("target") { + target_val = clap_target; + } + + // Can be unwrapped because 'single-file' is required + cranelift_filetests::run_passes( + rest_cmd.is_present("verbose"), + rest_cmd.is_present("time-passes"), + &get_vec(rest_cmd.values_of("pass")), + target_val, + rest_cmd.value_of("single-file").unwrap(), + ) + .map(|_time| ()) + } + ("print-cfg", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + print_cfg::run(&get_vec(rest_cmd.values_of("file"))) + } + ("compile", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + + let mut target_val: &str = ""; + if let Some(clap_target) = rest_cmd.value_of("target") { + target_val = clap_target; + } + + compile::run( + get_vec(rest_cmd.values_of("file")), + rest_cmd.is_present("print"), + rest_cmd.is_present("disasm"), + rest_cmd.is_present("time-passes"), + &get_vec(rest_cmd.values_of("set")), + target_val, + ) + } + ("wasm", Some(rest_cmd)) => { + handle_debug_flag(rest_cmd.is_present("debug")); + + #[cfg(feature = "wasm")] + let result = { + let mut target_val: &str = ""; + if let Some(clap_target) = rest_cmd.value_of("target") { + target_val = clap_target; + } + + wasm::run( + get_vec(rest_cmd.values_of("file")), + rest_cmd.is_present("verbose"), + rest_cmd.is_present("just-decode"), + rest_cmd.is_present("check-translation"), + rest_cmd.is_present("print"), + rest_cmd.is_present("disasm"), + &get_vec(rest_cmd.values_of("set")), + target_val, + rest_cmd.is_present("print-size"), + rest_cmd.is_present("time-passes"), + rest_cmd.is_present("value-ranges"), + ) + }; + + #[cfg(not(feature = "wasm"))] + let result = Err("Error: clif-util was compiled without wasm support.".to_owned()); + + result + } + ("bugpoint", Some(rest_cmd)) => { + let mut target_val: &str = ""; + if let Some(clap_target) = rest_cmd.value_of("target") { + target_val = clap_target; + } + + bugpoint::run( + rest_cmd.value_of("single-file").unwrap(), + &get_vec(rest_cmd.values_of("set")), + target_val, + rest_cmd.is_present("verbose"), + ) + } + _ => Err("Invalid subcommand.".to_owned()), + }; + + if let Err(mut msg) = res_util { + if !msg.ends_with('\n') { + msg.push('\n'); + } + io::stdout().flush().expect("flushing stdout"); + io::stderr().write_all(msg.as_bytes()).unwrap(); + process::exit(1); + } +} diff --git a/cranelift/src/compile.rs b/cranelift/src/compile.rs new file mode 100644 index 0000000000..7d888f3113 --- /dev/null +++ b/cranelift/src/compile.rs @@ -0,0 +1,96 @@ +//! CLI tool to read Cranelift IR files and compile them into native code. + +use crate::disasm::{print_all, PrintRelocs, PrintStackmaps, PrintTraps}; +use crate::utils::{parse_sets_and_triple, read_to_string}; +use cranelift_codegen::print_errors::pretty_error; +use cranelift_codegen::settings::FlagsOrIsa; +use cranelift_codegen::timing; +use cranelift_codegen::Context; +use cranelift_reader::{parse_test, ParseOptions}; +use std::path::Path; +use std::path::PathBuf; + +pub fn run( + files: Vec, + flag_print: bool, + flag_disasm: bool, + flag_report_times: bool, + flag_set: &[String], + flag_isa: &str, +) -> Result<(), String> { + let parsed = parse_sets_and_triple(flag_set, flag_isa)?; + + for filename in files { + let path = Path::new(&filename); + let name = String::from(path.as_os_str().to_string_lossy()); + handle_module( + flag_print, + flag_disasm, + flag_report_times, + &path.to_path_buf(), + &name, + parsed.as_fisa(), + )?; + } + Ok(()) +} + +fn handle_module( + flag_print: bool, + flag_disasm: bool, + flag_report_times: bool, + path: &PathBuf, + name: &str, + fisa: FlagsOrIsa, +) -> Result<(), String> { + let buffer = read_to_string(&path).map_err(|e| format!("{}: {}", name, e))?; + let test_file = + parse_test(&buffer, ParseOptions::default()).map_err(|e| format!("{}: {}", name, e))?; + + // If we have an isa from the command-line, use that. Otherwise if the + // file contains a unique isa, use that. + let isa = if let Some(isa) = fisa.isa { + isa + } else if let Some(isa) = test_file.isa_spec.unique_isa() { + isa + } else { + return Err(String::from("compilation requires a target isa")); + }; + + for (func, _) in test_file.functions { + let mut context = Context::new(); + context.func = func; + + let mut relocs = PrintRelocs::new(flag_print); + let mut traps = PrintTraps::new(flag_print); + let mut stackmaps = PrintStackmaps::new(flag_print); + let mut mem = vec![]; + + // Compile and encode the result to machine code. + let code_info = context + .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stackmaps) + .map_err(|err| pretty_error(&context.func, Some(isa), err))?; + + if flag_print { + println!("{}", context.func.display(isa)); + } + + if flag_disasm { + print_all( + isa, + &mem, + code_info.code_size, + code_info.jumptables_size + code_info.rodata_size, + &relocs, + &traps, + &stackmaps, + )?; + } + } + + if flag_report_times { + print!("{}", timing::take_current()); + } + + Ok(()) +} diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs new file mode 100644 index 0000000000..ba80e3d57f --- /dev/null +++ b/cranelift/src/disasm.rs @@ -0,0 +1,258 @@ +use cfg_if::cfg_if; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::{binemit, ir}; +use std::fmt::Write; + +pub struct PrintRelocs { + pub flag_print: bool, + pub text: String, +} + +impl PrintRelocs { + pub fn new(flag_print: bool) -> Self { + Self { + flag_print, + text: String::new(), + } + } +} + +impl binemit::RelocSink for PrintRelocs { + fn reloc_block( + &mut self, + where_: binemit::CodeOffset, + r: binemit::Reloc, + offset: binemit::CodeOffset, + ) { + if self.flag_print { + writeln!( + &mut self.text, + "reloc_block: {} {} at {}", + r, offset, where_ + ) + .unwrap(); + } + } + + fn reloc_external( + &mut self, + where_: binemit::CodeOffset, + r: binemit::Reloc, + name: &ir::ExternalName, + addend: binemit::Addend, + ) { + if self.flag_print { + writeln!( + &mut self.text, + "reloc_external: {} {} {} at {}", + r, name, addend, where_ + ) + .unwrap(); + } + } + + fn reloc_jt(&mut self, where_: binemit::CodeOffset, r: binemit::Reloc, jt: ir::JumpTable) { + if self.flag_print { + writeln!(&mut self.text, "reloc_jt: {} {} at {}", r, jt, where_).unwrap(); + } + } + + fn reloc_constant( + &mut self, + code_offset: binemit::CodeOffset, + reloc: binemit::Reloc, + constant: ir::ConstantOffset, + ) { + if self.flag_print { + writeln!( + &mut self.text, + "reloc_constant: {} {} at {}", + reloc, constant, code_offset + ) + .unwrap(); + } + } +} + +pub struct PrintTraps { + pub flag_print: bool, + pub text: String, +} + +impl PrintTraps { + pub fn new(flag_print: bool) -> Self { + Self { + flag_print, + text: String::new(), + } + } +} + +impl binemit::TrapSink for PrintTraps { + fn trap(&mut self, offset: binemit::CodeOffset, _srcloc: ir::SourceLoc, code: ir::TrapCode) { + if self.flag_print { + writeln!(&mut self.text, "trap: {} at {}", code, offset).unwrap(); + } + } +} + +pub struct PrintStackmaps { + pub flag_print: bool, + pub text: String, +} + +impl PrintStackmaps { + pub fn new(flag_print: bool) -> Self { + Self { + flag_print, + text: String::new(), + } + } +} + +impl binemit::StackmapSink for PrintStackmaps { + fn add_stackmap(&mut self, offset: binemit::CodeOffset, _: binemit::Stackmap) { + if self.flag_print { + writeln!(&mut self.text, "add_stackmap at {}", offset).unwrap(); + } + } +} + +cfg_if! { + if #[cfg(feature = "disas")] { + use capstone::prelude::*; + use target_lexicon::Architecture; + + fn get_disassembler(isa: &dyn TargetIsa) -> Result { + let cs = match isa.triple().architecture { + Architecture::Riscv32 | Architecture::Riscv64 => { + return Err(String::from("No disassembler for RiscV")) + } + Architecture::I386 | Architecture::I586 | Architecture::I686 => Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode32) + .build(), + Architecture::X86_64 => Capstone::new() + .x86() + .mode(arch::x86::ArchMode::Mode64) + .build(), + Architecture::Arm(arm) => { + if arm.is_thumb() { + Capstone::new() + .arm() + .mode(arch::arm::ArchMode::Thumb) + .build() + } else { + Capstone::new() + .arm() + .mode(arch::arm::ArchMode::Arm) + .build() + } + } + Architecture::Aarch64 {..} => Capstone::new() + .arm64() + .mode(arch::arm64::ArchMode::Arm) + .build(), + _ => return Err(String::from("Unknown ISA")), + }; + + cs.map_err(|err| err.to_string()) + } + + pub fn print_disassembly(isa: &dyn TargetIsa, mem: &[u8]) -> Result<(), String> { + let cs = get_disassembler(isa)?; + + println!("\nDisassembly of {} bytes:", mem.len()); + let insns = cs.disasm_all(&mem, 0x0).unwrap(); + for i in insns.iter() { + let mut line = String::new(); + + write!(&mut line, "{:4x}:\t", i.address()).unwrap(); + + let mut bytes_str = String::new(); + let mut len = 0; + let mut first = true; + for b in i.bytes() { + if !first { + write!(&mut bytes_str, " ").unwrap(); + } + write!(&mut bytes_str, "{:02x}", b).unwrap(); + len += 1; + first = false; + } + write!(&mut line, "{:21}\t", bytes_str).unwrap(); + if len > 8 { + write!(&mut line, "\n\t\t\t\t").unwrap(); + } + + if let Some(s) = i.mnemonic() { + write!(&mut line, "{}\t", s).unwrap(); + } + + if let Some(s) = i.op_str() { + write!(&mut line, "{}", s).unwrap(); + } + + println!("{}", line); + } + Ok(()) + } + } else { + pub fn print_disassembly(_: &dyn TargetIsa, _: &[u8]) -> Result<(), String> { + println!("\nNo disassembly available."); + Ok(()) + } + } +} + +pub fn print_all( + isa: &dyn TargetIsa, + mem: &[u8], + code_size: u32, + rodata_size: u32, + relocs: &PrintRelocs, + traps: &PrintTraps, + stackmaps: &PrintStackmaps, +) -> Result<(), String> { + print_bytes(&mem); + print_disassembly(isa, &mem[0..code_size as usize])?; + print_readonly_data(&mem[code_size as usize..(code_size + rodata_size) as usize]); + println!("\n{}\n{}\n{}", &relocs.text, &traps.text, &stackmaps.text); + Ok(()) +} + +pub fn print_bytes(mem: &[u8]) { + print!(".byte "); + let mut first = true; + for byte in mem.iter() { + if first { + first = false; + } else { + print!(", "); + } + print!("{}", byte); + } + println!(); +} + +pub fn print_readonly_data(mem: &[u8]) { + if mem.is_empty() { + return; + } + + println!("\nFollowed by {} bytes of read-only data:", mem.len()); + + for (i, byte) in mem.iter().enumerate() { + if i % 16 == 0 { + if i != 0 { + println!(); + } + print!("{:4}: ", i); + } + if i % 4 == 0 { + print!(" "); + } + print!("{:02x} ", byte); + } + println!(); +} diff --git a/cranelift/src/print_cfg.rs b/cranelift/src/print_cfg.rs new file mode 100644 index 0000000000..2f739f9f9d --- /dev/null +++ b/cranelift/src/print_cfg.rs @@ -0,0 +1,33 @@ +//! The `print-cfg` sub-command. +//! +//! Read a series of Cranelift IR files and print their control flow graphs +//! in graphviz format. + +use crate::utils::read_to_string; +use crate::CommandResult; +use cranelift_codegen::cfg_printer::CFGPrinter; +use cranelift_reader::parse_functions; + +pub fn run(files: &[String]) -> CommandResult { + for (i, f) in files.iter().enumerate() { + if i != 0 { + println!(); + } + print_cfg(f)? + } + Ok(()) +} + +fn print_cfg(filename: &str) -> CommandResult { + let buffer = read_to_string(filename).map_err(|e| format!("{}: {}", filename, e))?; + let items = parse_functions(&buffer).map_err(|e| format!("{}: {}", filename, e))?; + + for (idx, func) in items.into_iter().enumerate() { + if idx != 0 { + println!(); + } + print!("{}", CFGPrinter::new(&func)); + } + + Ok(()) +} diff --git a/cranelift/src/run.rs b/cranelift/src/run.rs new file mode 100644 index 0000000000..0f553c8702 --- /dev/null +++ b/cranelift/src/run.rs @@ -0,0 +1,132 @@ +//! CLI tool to compile Cranelift IR files to native code in memory and execute them. + +use crate::utils::read_to_string; +use cranelift_codegen::isa::{CallConv, TargetIsa}; +use cranelift_filetests::FunctionRunner; +use cranelift_native::builder as host_isa_builder; +use cranelift_reader::{parse_test, Details, IsaSpec, ParseOptions}; +use std::path::PathBuf; +use target_lexicon::Triple; +use walkdir::WalkDir; + +pub fn run(files: Vec, flag_print: bool) -> Result<(), String> { + let stdin_exist = files.iter().find(|file| *file == "-").is_some(); + let filtered_files = files + .iter() + .filter(|file| *file != "-") + .map(|file| file.to_string()) + .collect::>(); + let mut total = 0; + let mut errors = 0; + let mut special_files: Vec = vec![]; + if stdin_exist { + special_files.push("-".into()); + } + for file in iterate_files(filtered_files).chain(special_files) { + total += 1; + match run_single_file(&file) { + Ok(_) => { + if flag_print { + println!("{}", file.to_string_lossy()); + } + } + Err(e) => { + if flag_print { + println!("{}: {}", file.to_string_lossy(), e); + } + errors += 1; + } + } + } + + if flag_print { + match total { + 0 => println!("0 files"), + 1 => println!("1 file"), + n => println!("{} files", n), + } + } + + match errors { + 0 => Ok(()), + 1 => Err(String::from("1 failure")), + n => Err(format!("{} failures", n)), + } +} + +/// Iterate over all of the files passed as arguments, recursively iterating through directories +fn iterate_files(files: Vec) -> impl Iterator { + files + .into_iter() + .flat_map(WalkDir::new) + .filter(|f| match f { + Ok(d) => { + // filter out hidden files (starting with .) + !d.file_name().to_str().map_or(false, |s| s.starts_with('.')) + // filter out directories + && !d.file_type().is_dir() + } + Err(e) => { + println!("Unable to read file: {}", e); + false + } + }) + .map(|f| { + f.expect("This should not happen: we have already filtered out the errors") + .into_path() + }) +} + +/// Run all functions in a file that are succeeded by "run:" comments +fn run_single_file(path: &PathBuf) -> Result<(), String> { + let file_contents = read_to_string(&path).map_err(|e| e.to_string())?; + run_file_contents(file_contents) +} + +/// Main body of `run_single_file` separated for testing +fn run_file_contents(file_contents: String) -> Result<(), String> { + let options = ParseOptions { + default_calling_convention: CallConv::triple_default(&Triple::host()), // use the host's default calling convention + ..ParseOptions::default() + }; + let test_file = parse_test(&file_contents, options).map_err(|e| e.to_string())?; + for (func, Details { comments, .. }) in test_file.functions { + if comments.iter().any(|c| c.text.contains("run")) { + let isa = create_target_isa(&test_file.isa_spec)?; + FunctionRunner::new(func, isa).run()? + } + } + Ok(()) +} + +/// Build an ISA based on the current machine running this code (the host) +fn create_target_isa(isa_spec: &IsaSpec) -> Result, String> { + if let IsaSpec::None(flags) = isa_spec { + // build an ISA for the current machine + let builder = host_isa_builder()?; + Ok(builder.finish(flags.clone())) + } else { + Err(String::from("A target ISA was specified in the file but should not have been--only the host ISA can be used for running CLIF files")) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn nop() { + let code = String::from( + " + function %test() -> b8 { + block0: + nop + v1 = bconst.b8 true + return v1 + } + ; run + ", + ); + run_file_contents(code).unwrap() + } +} diff --git a/cranelift/src/utils.rs b/cranelift/src/utils.rs new file mode 100644 index 0000000000..bb5a2ac485 --- /dev/null +++ b/cranelift/src/utils.rs @@ -0,0 +1,82 @@ +//! Utility functions. + +use cranelift_codegen::isa; +use cranelift_codegen::isa::TargetIsa; +use cranelift_codegen::settings::{self, FlagsOrIsa}; +use cranelift_reader::{parse_options, Location}; +use std::fs::File; +use std::io::{self, Read}; +use std::path::Path; +use std::str::FromStr; +use target_lexicon::Triple; + +/// Read an entire file into a string. +pub fn read_to_string>(path: P) -> io::Result { + let mut buffer = String::new(); + if path.as_ref() == Path::new("-") { + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + stdin.read_to_string(&mut buffer)?; + } else { + let mut file = File::open(path)?; + file.read_to_string(&mut buffer)?; + } + Ok(buffer) +} + +/// Like `FlagsOrIsa`, but holds ownership. +pub enum OwnedFlagsOrIsa { + Flags(settings::Flags), + Isa(Box), +} + +impl OwnedFlagsOrIsa { + /// Produce a FlagsOrIsa reference. + pub fn as_fisa(&self) -> FlagsOrIsa { + match *self { + Self::Flags(ref flags) => FlagsOrIsa::from(flags), + Self::Isa(ref isa) => FlagsOrIsa::from(&**isa), + } + } +} + +/// Parse "set" and "triple" commands. +pub fn parse_sets_and_triple( + flag_set: &[String], + flag_triple: &str, +) -> Result { + let mut flag_builder = settings::builder(); + parse_options( + flag_set.iter().map(|x| x.as_str()), + &mut flag_builder, + Location { line_number: 0 }, + ) + .map_err(|err| err.to_string())?; + + let mut words = flag_triple.trim().split_whitespace(); + // Look for `target foo`. + if let Some(triple_name) = words.next() { + let triple = match Triple::from_str(triple_name) { + Ok(triple) => triple, + Err(parse_error) => return Err(parse_error.to_string()), + }; + let mut isa_builder = isa::lookup(triple).map_err(|err| match err { + isa::LookupError::SupportDisabled => { + format!("support for triple '{}' is disabled", triple_name) + } + isa::LookupError::Unsupported => format!( + "support for triple '{}' is not implemented yet", + triple_name + ), + })?; + // Apply the ISA-specific settings to `isa_builder`. + parse_options(words, &mut isa_builder, Location { line_number: 0 }) + .map_err(|err| err.to_string())?; + + Ok(OwnedFlagsOrIsa::Isa( + isa_builder.finish(settings::Flags::new(flag_builder)), + )) + } else { + Ok(OwnedFlagsOrIsa::Flags(settings::Flags::new(flag_builder))) + } +} diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs new file mode 100644 index 0000000000..f93471393e --- /dev/null +++ b/cranelift/src/wasm.rs @@ -0,0 +1,271 @@ +//! CLI tool to use the functions provided by the [cranelift-wasm](../cranelift_wasm/index.html) +//! crate. +//! +//! Reads Wasm binary/text files, translates the functions' code to Cranelift IR. +#![cfg_attr( + feature = "cargo-clippy", + allow(clippy::too_many_arguments, clippy::cognitive_complexity) +)] + +use crate::disasm::{print_all, PrintRelocs, PrintStackmaps, PrintTraps}; +use crate::utils::parse_sets_and_triple; +use cranelift_codegen::ir::DisplayFunctionAnnotations; +use cranelift_codegen::print_errors::{pretty_error, pretty_verifier_error}; +use cranelift_codegen::settings::FlagsOrIsa; +use cranelift_codegen::timing; +use cranelift_codegen::Context; +use cranelift_entity::EntityRef; +use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex, ReturnMode}; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; +use term; + +macro_rules! vprintln { + ($x: expr, $($tts:tt)*) => { + if $x { + println!($($tts)*); + } + } +} + +macro_rules! vprint { + ($x: expr, $($tts:tt)*) => { + if $x { + print!($($tts)*); + } + } +} + +pub fn run( + files: Vec, + flag_verbose: bool, + flag_just_decode: bool, + flag_check_translation: bool, + flag_print: bool, + flag_print_disasm: bool, + flag_set: &[String], + flag_triple: &str, + flag_print_size: bool, + flag_report_times: bool, + flag_calc_value_ranges: bool, +) -> Result<(), String> { + let parsed = parse_sets_and_triple(flag_set, flag_triple)?; + + for filename in files { + let path = Path::new(&filename); + let name = String::from(path.as_os_str().to_string_lossy()); + handle_module( + flag_verbose, + flag_just_decode, + flag_check_translation, + flag_print, + flag_print_size, + flag_print_disasm, + flag_report_times, + flag_calc_value_ranges, + &path.to_path_buf(), + &name, + parsed.as_fisa(), + )?; + } + Ok(()) +} + +fn handle_module( + flag_verbose: bool, + flag_just_decode: bool, + flag_check_translation: bool, + flag_print: bool, + flag_print_size: bool, + flag_print_disasm: bool, + flag_report_times: bool, + flag_calc_value_ranges: bool, + path: &PathBuf, + name: &str, + fisa: FlagsOrIsa, +) -> Result<(), String> { + let mut terminal = term::stdout().unwrap(); + let _ = terminal.fg(term::color::YELLOW); + vprint!(flag_verbose, "Handling: "); + let _ = terminal.reset(); + vprintln!(flag_verbose, "\"{}\"", name); + let _ = terminal.fg(term::color::MAGENTA); + vprint!(flag_verbose, "Translating... "); + let _ = terminal.reset(); + + let module_binary = if path.to_str() == Some("-") { + let stdin = std::io::stdin(); + let mut buf = Vec::new(); + stdin + .lock() + .read_to_end(&mut buf) + .map_err(|e| e.to_string())?; + wat::parse_bytes(&buf) + .map_err(|err| format!("{:?}", err))? + .into() + } else { + wat::parse_file(path).map_err(|err| format!("{:?}", err))? + }; + + let isa = match fisa.isa { + Some(isa) => isa, + None => { + return Err(String::from( + "Error: the wasm command requires an explicit isa.", + )); + } + }; + + let debug_info = flag_calc_value_ranges; + let mut dummy_environ = + DummyEnvironment::new(isa.frontend_config(), ReturnMode::NormalReturns, debug_info); + translate_module(&module_binary, &mut dummy_environ).map_err(|e| e.to_string())?; + + let _ = terminal.fg(term::color::GREEN); + vprintln!(flag_verbose, "ok"); + let _ = terminal.reset(); + + if flag_just_decode { + if !flag_print { + return Ok(()); + } + + let num_func_imports = dummy_environ.get_num_func_imports(); + for (def_index, func) in dummy_environ.info.function_bodies.iter() { + let func_index = num_func_imports + def_index.index(); + let mut context = Context::new(); + context.func = func.clone(); + if let Some(start_func) = dummy_environ.info.start_func { + if func_index == start_func.index() { + println!("; Selected as wasm start function"); + } + } + vprintln!(flag_verbose, ""); + for export_name in + &dummy_environ.info.functions[FuncIndex::new(func_index)].export_names + { + println!("; Exported as \"{}\"", export_name); + } + println!("{}", context.func.display(None)); + vprintln!(flag_verbose, ""); + } + let _ = terminal.reset(); + return Ok(()); + } + + let _ = terminal.fg(term::color::MAGENTA); + if flag_check_translation { + vprint!(flag_verbose, "Checking... "); + } else { + vprint!(flag_verbose, "Compiling... "); + } + let _ = terminal.reset(); + + if flag_print_size { + vprintln!(flag_verbose, ""); + } + + let num_func_imports = dummy_environ.get_num_func_imports(); + let mut total_module_code_size = 0; + let mut context = Context::new(); + for (def_index, func) in dummy_environ.info.function_bodies.iter() { + context.func = func.clone(); + + let mut saved_sizes = None; + let func_index = num_func_imports + def_index.index(); + let mut mem = vec![]; + let mut relocs = PrintRelocs::new(flag_print); + let mut traps = PrintTraps::new(flag_print); + let mut stackmaps = PrintStackmaps::new(flag_print); + if flag_check_translation { + if let Err(errors) = context.verify(fisa) { + return Err(pretty_verifier_error(&context.func, fisa.isa, None, errors)); + } + } else { + let code_info = context + .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stackmaps) + .map_err(|err| pretty_error(&context.func, fisa.isa, err))?; + + if flag_print_size { + println!( + "Function #{} code size: {} bytes", + func_index, code_info.total_size, + ); + total_module_code_size += code_info.total_size; + println!( + "Function #{} bytecode size: {} bytes", + func_index, + dummy_environ.func_bytecode_sizes[def_index.index()] + ); + } + + if flag_print_disasm { + saved_sizes = Some(( + code_info.code_size, + code_info.jumptables_size + code_info.rodata_size, + )); + } + } + + if flag_print { + vprintln!(flag_verbose, ""); + if let Some(start_func) = dummy_environ.info.start_func { + if func_index == start_func.index() { + println!("; Selected as wasm start function"); + } + } + for export_name in + &dummy_environ.info.functions[FuncIndex::new(func_index)].export_names + { + println!("; Exported as \"{}\"", export_name); + } + let value_ranges = if flag_calc_value_ranges { + Some( + context + .build_value_labels_ranges(isa) + .expect("value location ranges"), + ) + } else { + None + }; + println!( + "{}", + context.func.display_with(DisplayFunctionAnnotations { + isa: fisa.isa, + value_ranges: value_ranges.as_ref(), + }) + ); + vprintln!(flag_verbose, ""); + } + + if let Some((code_size, rodata_size)) = saved_sizes { + print_all( + isa, + &mem, + code_size, + rodata_size, + &relocs, + &traps, + &stackmaps, + )?; + } + + context.clear(); + } + + if !flag_check_translation && flag_print_size { + println!("Total module code size: {} bytes", total_module_code_size); + let total_bytecode_size: usize = dummy_environ.func_bytecode_sizes.iter().sum(); + println!("Total module bytecode size: {} bytes", total_bytecode_size); + } + + if flag_report_times { + println!("{}", timing::take_current()); + } + + let _ = terminal.fg(term::color::GREEN); + vprintln!(flag_verbose, "ok"); + let _ = terminal.reset(); + Ok(()) +} diff --git a/cranelift/tests/bugpoint_test.clif b/cranelift/tests/bugpoint_test.clif new file mode 100644 index 0000000000..b2e9acc37e --- /dev/null +++ b/cranelift/tests/bugpoint_test.clif @@ -0,0 +1,1912 @@ +test compile +set is_pic +target x86_64-unknown-linux-gnu + +function u0:0(i64, i64, i64) system_v { + + + ss0 = explicit_slot 16 + ss1 = explicit_slot 1 ss2 = explicit_slot 16 ss3 = explicit_slot 1 ss4 = explicit_slot 16 ss5 = explicit_slot 8 ss6 = explicit_slot 16 ss7 = explicit_slot 16 ss8 = explicit_slot 16 ss9 = explicit_slot 16 ss10 = explicit_slot 16 ss11 = explicit_slot 16 ss12 = explicit_slot 16 ss13 = explicit_slot 16 ss14 = explicit_slot 16 ss15 = explicit_slot 16 ss16 = explicit_slot 16 ss17 = explicit_slot 16 ss18 = explicit_slot 24 ss19 = explicit_slot 4 ss20 = explicit_slot 4 ss21 = explicit_slot 4 ss22 = explicit_slot 4 ss23 = explicit_slot 16 ss24 = explicit_slot 16 ss25 = explicit_slot 16 ss26 = explicit_slot 16 ss27 = explicit_slot 48 ss28 = explicit_slot 16 ss29 = explicit_slot 16 ss30 = explicit_slot 32 ss31 = explicit_slot 16 ss32 = explicit_slot 8 ss33 = explicit_slot 8 ss34 = explicit_slot 16 ss35 = explicit_slot 16 ss36 = explicit_slot 16 ss37 = explicit_slot 48 ss38 = explicit_slot 16 ss39 = explicit_slot 16 ss40 = explicit_slot 32 ss41 = explicit_slot 16 ss42 = explicit_slot 8 ss43 = explicit_slot 8 ss44 = explicit_slot 16 ss45 = explicit_slot 16 ss46 = explicit_slot 16 ss47 = explicit_slot 16 ss48 = explicit_slot 16 ss49 = explicit_slot 16 ss50 = explicit_slot 16 ss51 = explicit_slot 8 ss52 = explicit_slot 4 ss53 = explicit_slot 4 ss54 = explicit_slot 16 ss55 = explicit_slot 16 ss56 = explicit_slot 16 ss57 = explicit_slot 2 ss58 = explicit_slot 4 ss59 = explicit_slot 2 ss60 = explicit_slot 16 ss61 = explicit_slot 16 ss62 = explicit_slot 16 ss63 = explicit_slot 16 ss64 = explicit_slot 16 ss65 = explicit_slot 16 ss66 = explicit_slot 16 ss67 = explicit_slot 16 ss68 = explicit_slot 8 ss69 = explicit_slot 16 ss70 = explicit_slot 16 ss71 = explicit_slot 48 ss72 = explicit_slot 16 ss73 = explicit_slot 16 ss74 = explicit_slot 32 ss75 = explicit_slot 16 ss76 = explicit_slot 8 ss77 = explicit_slot 8 ss78 = explicit_slot 16 ss79 = explicit_slot 16 ss80 = explicit_slot 16 ss81 = explicit_slot 48 ss82 = explicit_slot 16 ss83 = explicit_slot 16 ss84 = explicit_slot 32 ss85 = explicit_slot 16 ss86 = explicit_slot 8 ss87 = explicit_slot 8 ss88 = explicit_slot 16 ss89 = explicit_slot 16 ss90 = explicit_slot 4 ss91 = explicit_slot 16 ss92 = explicit_slot 16 ss93 = explicit_slot 16 ss94 = explicit_slot 16 ss95 = explicit_slot 16 ss96 = explicit_slot 16 ss97 = explicit_slot 2 ss98 = explicit_slot 16 ss99 = explicit_slot 16 ss100 = explicit_slot 16 ss101 = explicit_slot 16 ss102 = explicit_slot 16 ss103 = explicit_slot 16 ss104 = explicit_slot 8 ss105 = explicit_slot 16 ss106 = explicit_slot 16 ss107 = explicit_slot 4 ss108 = explicit_slot 16 + ss109 = explicit_slot 16 + ss110 = explicit_slot 16 + ss111 = explicit_slot 16 + ss112 = explicit_slot 4 + ss113 = explicit_slot 4 + ss114 = explicit_slot 4 + ss115 = explicit_slot 4 + ss116 = explicit_slot 16 + ss117 = explicit_slot 16 + ss118 = explicit_slot 16 + ss119 = explicit_slot 16 + ss120 = explicit_slot 16 + ss121 = explicit_slot 4 + ss122 = explicit_slot 4 + ss123 = explicit_slot 16 + ss124 = explicit_slot 16 + ss125 = explicit_slot 16 + ss126 = explicit_slot 2 + ss127 = explicit_slot 16 + ss128 = explicit_slot 16 + ss129 = explicit_slot 16 + ss130 = explicit_slot 16 + ss131 = explicit_slot 16 + ss132 = explicit_slot 4 + ss133 = explicit_slot 16 + ss134 = explicit_slot 16 + ss135 = explicit_slot 16 + ss136 = explicit_slot 16 + ss137 = explicit_slot 16 + ss138 = explicit_slot 16 + ss139 = explicit_slot 2 + ss140 = explicit_slot 16 + ss141 = explicit_slot 16 + ss142 = explicit_slot 16 + ss143 = explicit_slot 16 + ss144 = explicit_slot 4 + gv0 = symbol colocated u1:22 + gv1 = symbol colocated u1:23 + gv2 = symbol colocated u1:24 + gv3 = symbol colocated u1:23 + gv4 = symbol colocated u1:25 + gv5 = symbol colocated u1:23 + gv6 = symbol colocated u1:26 + gv7 = symbol colocated u1:23 + gv8 = symbol colocated u1:27 + gv9 = symbol colocated u1:23 + gv10 = symbol colocated u1:28 + gv11 = symbol colocated u1:23 + gv12 = symbol colocated u1:29 + gv13 = symbol colocated u1:30 + gv14 = symbol colocated u1:31 + gv15 = symbol colocated u1:23 + gv16 = symbol colocated u1:29 + gv17 = symbol colocated u1:32 + gv18 = symbol colocated u1:32 + gv19 = symbol colocated u1:32 + gv20 = symbol colocated u1:32 + gv21 = symbol colocated u1:32 + gv22 = symbol colocated u1:33 + gv23 = symbol colocated u1:34 + gv24 = symbol colocated u1:23 + gv25 = symbol colocated u1:35 + gv26 = symbol colocated u1:36 + gv27 = symbol colocated u1:23 + gv28 = symbol colocated u1:29 + gv29 = symbol colocated u1:32 + gv30 = symbol colocated u1:37 + gv31 = symbol colocated u1:38 + gv32 = symbol colocated u1:30 + gv33 = symbol colocated u1:32 + gv34 = symbol colocated u1:32 + gv35 = symbol colocated u1:29 + gv36 = symbol colocated u1:32 + gv37 = symbol colocated u1:30 + gv38 = symbol colocated u1:32 + gv39 = symbol colocated u1:39 + gv40 = symbol colocated u1:40 + gv41 = symbol colocated u1:41 + gv42 = symbol colocated u1:23 + gv43 = symbol colocated u1:29 + gv44 = symbol colocated u1:42 + gv45 = symbol colocated u1:29 + gv46 = symbol colocated u1:30 + gv47 = symbol colocated u1:29 + gv48 = symbol colocated u1:30 + gv49 = symbol colocated u1:32 + gv50 = symbol colocated u1:43 + gv51 = symbol colocated u1:44 + gv52 = symbol colocated u1:45 + gv53 = symbol colocated u1:23 + gv54 = symbol colocated u1:46 + gv55 = symbol colocated u1:47 + gv56 = symbol colocated u1:48 + gv57 = symbol colocated u1:23 + gv58 = symbol colocated u1:32 + gv59 = symbol colocated u1:39 + gv60 = symbol colocated u1:49 + gv61 = symbol colocated u1:49 + gv62 = symbol colocated u1:49 + gv63 = symbol colocated u1:38 + gv64 = symbol colocated u1:30 + gv65 = symbol colocated u1:32 + gv66 = symbol colocated u1:50 + gv67 = symbol colocated u1:23 + gv68 = symbol colocated u1:29 + gv69 = symbol colocated u1:51 + gv70 = symbol colocated u1:29 + gv71 = symbol colocated u1:30 + gv72 = symbol colocated u1:32 + gv73 = symbol colocated u1:49 + gv74 = symbol colocated u1:32 + sig0 = (i64) system_v + sig1 = (i64) system_v + sig2 = (i64) system_v + sig3 = (i64) system_v + sig4 = (i64) system_v + sig5 = (i64) system_v + sig6 = (i64, i64, i64) system_v + sig7 = (i64) -> i8 system_v + sig8 = (i64) system_v + sig9 = (i64) system_v + sig10 = (i64, i64, i64) system_v + sig11 = (i64) -> i8 system_v + sig12 = (i64) system_v + sig13 = (i64) system_v + sig14 = (i64) -> i64 system_v + sig15 = (i64) system_v + sig16 = (i64) system_v + sig17 = (i64) system_v + sig18 = (i64) system_v + sig19 = (i64) system_v + sig20 = (i64) system_v + sig21 = (i64) system_v + sig22 = (i64, i64) system_v + sig23 = (i64) system_v + sig24 = (i64, i64, i16) system_v + sig25 = (i64, i64, i16) system_v + sig26 = (i64) system_v + sig27 = (i64) system_v + sig28 = (i64) system_v + sig29 = (i64) system_v + sig30 = (i64, i16, i16) system_v + sig31 = (i64, i64, i64) system_v + sig32 = (i64, i64, i64) system_v + sig33 = (i64, i64, i64) system_v + sig34 = (i64, i64) -> i8 system_v + sig35 = (i64, i64, i64) system_v + sig36 = (i64, i64) -> i8 system_v + sig37 = (i64, i64, i64) system_v + sig38 = (i64, i64, i64) system_v + sig39 = (i64, i64) system_v + sig40 = (i64) system_v + sig41 = (i64, i64) -> i8 system_v + sig42 = (i64, i64, i64) system_v + sig43 = (i64, i64) -> i8 system_v + sig44 = (i64, i64, i64) system_v + sig45 = (i64, i64, i64) system_v + sig46 = (i64, i64) system_v + sig47 = (i64) system_v + sig48 = (i64) system_v + sig49 = (i64) system_v + sig50 = (i64) system_v + sig51 = (i64) system_v + sig52 = (i64) system_v + sig53 = (i64) system_v + sig54 = (i64, i32) system_v + sig55 = (i64) system_v + sig56 = (i64) system_v + sig57 = (i64) system_v + sig58 = (i64) system_v + sig59 = (i64) system_v + sig60 = (i64) system_v + sig61 = (i64) system_v + sig62 = (i64) system_v + sig63 = (i64) system_v + sig64 = (i64) system_v + sig65 = (i64) system_v + sig66 = (i64) system_v + sig67 = (i64) system_v + sig68 = (i64) system_v + sig69 = (i64) system_v + sig70 = (i64, i64, i64) system_v + sig71 = (i64) system_v + sig72 = (i64, i64, i16, i64, i64, i64, i64, i64) system_v + sig73 = (i64, i64) -> i8 system_v + sig74 = (i64, i64, i64) system_v + sig75 = (i64, i64) -> i8 system_v + sig76 = (i64, i64, i64) system_v + sig77 = (i64, i64, i64) system_v + sig78 = (i64, i64) system_v + sig79 = (i64) system_v + sig80 = (i64, i64) -> i8 system_v + sig81 = (i64, i64, i64) system_v + sig82 = (i64, i64) -> i8 system_v + sig83 = (i64, i64, i64) system_v + sig84 = (i64, i64, i64) system_v + sig85 = (i64, i64) system_v + sig86 = (i64) system_v + sig87 = (i64) system_v + sig88 = (i64) system_v + sig89 = (i64) system_v + sig90 = (i64) system_v + sig91 = (i64) system_v + sig92 = (i64) system_v + sig93 = (i64) system_v + sig94 = (i64) system_v + sig95 = (i64) system_v + sig96 = (i64) system_v + sig97 = (i64) system_v + sig98 = (i64) system_v + sig99 = (i64) system_v + sig100 = (i64) system_v + sig101 = (i64, i64, i64) system_v + sig102 = (i64) system_v + sig103 = (i64) system_v + sig104 = (i64, i64, i16, i64, i64, i64, i64, i64) system_v + sig105 = (i64) system_v + fn0 = u0:83 sig0 + fn1 = u0:13 sig1 + fn2 = u0:83 sig2 + fn3 = u0:13 sig3 + fn4 = u0:83 sig4 + fn5 = u0:13 sig5 + fn6 = u0:84 sig6 + fn7 = u0:85 sig7 + fn8 = u0:83 sig8 + fn9 = u0:13 sig9 + fn10 = u0:86 sig10 fn11 = u0:85 sig11 fn12 = u0:83 sig12 fn13 = u0:13 sig13 + fn14 = u0:16 sig14 fn15 = u0:83 sig15 fn16 = u0:13 sig16 + fn17 = u0:13 sig17 + fn18 = u0:13 sig18 + fn19 = u0:83 sig19 fn20 = u0:13 sig20 + fn21 = u0:13 sig21 + fn22 = u0:87 sig22 fn23 = u0:13 sig23 + fn24 = u0:88 sig24 fn25 = u0:88 sig25 fn26 = u0:13 sig26 + fn27 = u0:13 sig27 + fn28 = u0:13 sig28 + fn29 = u0:13 sig29 + fn30 = u0:89 sig30 fn31 = u0:90 sig31 fn32 = u0:90 sig32 fn33 = u0:90 sig33 fn34 = u0:91 sig34 fn35 = u0:92 sig35 fn36 = u0:91 sig36 fn37 = u0:92 sig37 fn38 = u0:11 sig38 fn39 = u0:12 sig39 fn40 = u0:13 sig40 + fn41 = u0:91 sig41 fn42 = u0:92 sig42 fn43 = u0:91 sig43 fn44 = u0:92 sig44 fn45 = u0:11 sig45 fn46 = u0:12 sig46 fn47 = u0:13 sig47 + fn48 = u0:13 sig48 + fn49 = u0:13 sig49 + fn50 = u0:13 sig50 + fn51 = u0:13 sig51 + fn52 = u0:13 sig52 + fn53 = u0:13 sig53 + fn54 = u0:93 sig54 fn55 = u0:13 sig55 + fn56 = u0:13 sig56 + fn57 = u0:13 sig57 + fn58 = u0:13 sig58 + fn59 = u0:13 sig59 + fn60 = u0:13 sig60 + fn61 = u0:13 sig61 + fn62 = u0:83 sig62 fn63 = u0:13 sig63 + fn64 = u0:13 sig64 + fn65 = u0:13 sig65 + fn66 = u0:13 sig66 + fn67 = u0:13 sig67 + fn68 = u0:13 sig68 + fn69 = u0:13 sig69 + fn70 = u0:94 sig70 fn71 = u0:13 sig71 + fn72 = u0:95 sig72 fn73 = u0:96 sig73 fn74 = u0:97 sig74 fn75 = u0:96 sig75 fn76 = u0:97 sig76 fn77 = u0:11 sig77 fn78 = u0:12 sig78 fn79 = u0:13 sig79 + fn80 = u0:91 sig80 fn81 = u0:92 sig81 fn82 = u0:91 sig82 fn83 = u0:92 sig83 fn84 = u0:11 sig84 fn85 = u0:12 sig85 fn86 = u0:13 sig86 + fn87 = u0:13 sig87 + fn88 = u0:13 sig88 + fn89 = u0:13 sig89 + fn90 = u0:13 sig90 + fn91 = u0:13 sig91 + fn92 = u0:13 sig92 + fn93 = u0:13 sig93 + fn94 = u0:13 sig94 + fn95 = u0:83 sig95 fn96 = u0:13 sig96 + fn97 = u0:13 sig97 + fn98 = u0:13 sig98 + fn99 = u0:13 sig99 + fn100 = u0:13 sig100 + fn101 = u0:94 sig101 + fn102 = u0:13 sig102 + fn103 = u0:13 sig103 + fn104 = u0:95 sig104 + +block0(v0: i64, v1: i64, v2: i64): + v113 -> v1 + v124 -> v1 + v136 -> v1 + v148 -> v1 + v160 -> v1 + v185 -> v1 + v222 -> v1 + v237 -> v1 + v241 -> v1 + v256 -> v1 + v262 -> v1 + v3, v4 = x86_sdivmodx v0, v1, v2 + store aligned v4, v3 + v5 = load.i64 aligned v2+8 + store aligned v5, v3+8 + v6 = stack_addr.i64 ss1 + v7 = stack_addr.i64 ss2 + v8 = stack_addr.i64 ss3 + v9 = stack_addr.i64 ss4 + v10 = stack_addr.i64 ss5 + v11 = stack_addr.i64 ss6 + v12 = stack_addr.i64 ss7 + v13 = stack_addr.i64 ss8 + v14 = stack_addr.i64 ss9 + v15 = stack_addr.i64 ss10 + v16 = stack_addr.i64 ss11 + v17 = stack_addr.i64 ss12 + v18 = stack_addr.i64 ss13 + v19 = stack_addr.i64 ss14 + v20 = stack_addr.i64 ss15 + v21 = stack_addr.i64 ss16 + v22 = stack_addr.i64 ss17 + v23 = stack_addr.i64 ss18 + v24 = stack_addr.i64 ss19 + v25 = stack_addr.i64 ss20 + v26 = stack_addr.i64 ss21 + v27 = stack_addr.i64 ss22 + v28 = stack_addr.i64 ss23 + v29 = stack_addr.i64 ss24 + v30 = stack_addr.i64 ss25 + v31 = stack_addr.i64 ss26 + v32 = stack_addr.i64 ss27 + v33 = stack_addr.i64 ss28 + v34 = stack_addr.i64 ss29 + v35 = stack_addr.i64 ss30 + v36 = stack_addr.i64 ss31 + v37 = stack_addr.i64 ss32 + v38 = stack_addr.i64 ss33 + v39 = stack_addr.i64 ss34 + v40 = stack_addr.i64 ss35 + v41 = stack_addr.i64 ss36 + v42 = stack_addr.i64 ss37 + v43 = stack_addr.i64 ss38 + v44 = stack_addr.i64 ss39 + v45 = stack_addr.i64 ss40 + v46 = stack_addr.i64 ss41 + v47 = stack_addr.i64 ss42 + v48 = stack_addr.i64 ss43 + v49 = stack_addr.i64 ss44 + v50 = stack_addr.i64 ss45 + v51 = stack_addr.i64 ss46 + v52 = stack_addr.i64 ss47 + v53 = stack_addr.i64 ss48 + v54 = stack_addr.i64 ss49 + v55 = stack_addr.i64 ss50 + v56 = stack_addr.i64 ss51 + v57 = stack_addr.i64 ss52 + v58 = stack_addr.i64 ss53 + v59 = stack_addr.i64 ss54 + v60 = stack_addr.i64 ss55 + v61 = stack_addr.i64 ss56 + v62 = stack_addr.i64 ss57 + v63 = stack_addr.i64 ss58 + v64 = stack_addr.i64 ss59 + v65 = stack_addr.i64 ss60 + v66 = stack_addr.i64 ss61 + v67 = stack_addr.i64 ss62 + v68 = stack_addr.i64 ss63 + v69 = stack_addr.i64 ss64 + v70 = stack_addr.i64 ss65 + v71 = stack_addr.i64 ss66 + v72 = stack_addr.i64 ss67 + v73 = stack_addr.i64 ss68 + v74 = stack_addr.i64 ss69 + v75 = stack_addr.i64 ss70 + v76 = stack_addr.i64 ss71 + v77 = stack_addr.i64 ss72 + v78 = stack_addr.i64 ss73 + v79 = stack_addr.i64 ss74 + v80 = stack_addr.i64 ss75 + v81 = stack_addr.i64 ss76 + v82 = stack_addr.i64 ss77 + v83 = stack_addr.i64 ss78 + v84 = stack_addr.i64 ss79 + v85 = stack_addr.i64 ss80 + v86 = stack_addr.i64 ss81 + v87 = stack_addr.i64 ss82 + v88 = stack_addr.i64 ss83 + v89 = stack_addr.i64 ss84 + v90 = stack_addr.i64 ss85 + v91 = stack_addr.i64 ss86 + v92 = stack_addr.i64 ss87 + v93 = stack_addr.i64 ss88 + v94 = stack_addr.i64 ss89 + v95 = stack_addr.i64 ss90 + v96 = stack_addr.i64 ss91 + v97 = stack_addr.i64 ss92 + v98 = stack_addr.i64 ss93 + v99 = stack_addr.i64 ss94 + v100 = stack_addr.i64 ss95 + v101 = stack_addr.i64 ss96 + v102 = stack_addr.i64 ss97 + v103 = stack_addr.i64 ss98 + v104 = stack_addr.i64 ss99 + v105 = stack_addr.i64 ss100 + v106 = stack_addr.i64 ss101 + v107 = stack_addr.i64 ss102 + v108 = stack_addr.i64 ss103 + v109 = stack_addr.i64 ss104 + v110 = stack_addr.i64 ss105 + v111 = stack_addr.i64 ss106 + v112 = stack_addr.i64 ss107 + jump block1 + +block1: + v114 = load.i64 v113 + v115 = iconst.i64 0 + v116 = icmp ugt v114, v115 + v117 = bint.i8 v116 + v118 = uextend.i32 v117 + v119 = icmp_imm eq v118, 0 + v120 = bint.i8 v119 + v121 = uextend.i32 v120 + brz v121, block3 + jump block2 + +block2: + v122 = global_value.i64 gv0 + v123 = global_value.i64 gv1 + trap user65535 + +block3: + v125 = iadd_imm.i64 v124, 8 + v126 = load.i64 v125 + v127 = iconst.i64 0 + v128 = icmp ugt v126, v127 + v129 = bint.i8 v128 + v130 = uextend.i32 v129 + v131 = icmp_imm eq v130, 0 + v132 = bint.i8 v131 + v133 = uextend.i32 v132 + brz v133, block5 + jump block4 + +block4: + v134 = global_value.i64 gv2 + v135 = global_value.i64 gv3 + trap user65535 + +block5: + v137 = iadd_imm.i64 v136, 16 + v138 = load.i64 v137+42 + v139 = iconst.i64 0 + v140 = icmp ugt v138, v139 + v141 = bint.i8 v140 + v142 = uextend.i32 v141 + v143 = icmp_imm eq v142, 0 + v144 = bint.i8 v143 + v145 = uextend.i32 v144 + brz v145, block7 + jump block6 + +block6: + v146 = global_value.i64 gv4 + v147 = global_value.i64 gv5 + trap user65535 + +block7: + v149 = load.i64 v148 + v150 = iadd_imm.i64 v148, 16 + v151 = load.i64 v150 + call fn6(v7, v149, v151) + jump block8 + +block8: + v152 = call fn7(v7) + jump block9 + +block9: + v153 = load.i8 v6 + v154 = uextend.i32 v153 + v155 = icmp_imm eq v154, 0 + v156 = bint.i8 v155 + v157 = uextend.i32 v156 + brz v157, block11 + jump block10 + +block10: + v158 = global_value.i64 gv6 + v159 = global_value.i64 gv7 + trap user65535 + +block11: + v161 = load.i64 v160 + v162 = iadd_imm.i64 v160, 8 + v163 = load.i64 v162 + call fn10(v9, v161, v163) + jump block12 + +block12: + v164 = call fn11(v9) + jump block13 + +block13: + v165 = load.i8 v8 + v166 = uextend.i32 v165 + v167 = icmp_imm eq v166, 0 + v168 = bint.i8 v167 + v169 = uextend.i32 v168 + brz v169, block15 + jump block14 + +block14: + v170 = global_value.i64 gv8 + v171 = global_value.i64 gv9 + trap user65535 + +block15: + v172 = load.i64 aligned v3 + v173 = load.i64 aligned v3+8 + v174 = call fn14(v11) + jump block16 + +block16: + v175 = iconst.i64 17 + v176 = load.i64 v10 + v177 = icmp uge v176, v175 + v178 = bint.i8 v177 + v179 = uextend.i32 v178 + v180 = icmp_imm eq v179, 0 + v181 = bint.i8 v180 + v182 = uextend.i32 v181 + brz v182, block18 + jump block17 + +block17: + v183 = global_value.i64 gv10 + v184 = global_value.i64 gv11 + trap user65535 + +block18: + v186 = load.i64 v185 + v187 = iadd_imm.i64 v185, 16 + v188 = load.i64 v187 + v189 = iadd v186, v188 + v190 = iconst.i8 0 + v191 = stack_addr.i64 ss108 + v192 = stack_addr.i64 ss108 + v193 = load.i64 aligned v192 + v194 = load.i64 aligned v192+8 + v195 = iadd_imm.i64 v12, 8 + v196 = load.i8 v195 + v197 = uextend.i32 v196 + brz v197, block19 + jump block164 + +block164: + v198 = global_value.i64 gv12 + trap user0 + +block19: + v199 = load.i64 v12 + v213 -> v199 + v200 = iconst.i64 1 + v201 = iconst.i32 61 + v202 = ishl v200, v201 + v203 = iconst.i8 0 + v204 = stack_addr.i64 ss109 + v205 = stack_addr.i64 ss109 + v206 = load.i64 aligned v205 + v207 = load.i64 aligned v205+8 + v208 = iadd_imm.i64 v13, 8 + v209 = load.i8 v208 + v210 = uextend.i32 v209 + brz v210, block20 + jump block163 + +block163: + v211 = global_value.i64 gv13 + trap user0 + +block20: + v212 = load.i64 v13 + v214 = icmp.i64 ult v213, v212 + v215 = bint.i8 v214 + v216 = uextend.i32 v215 + v217 = icmp_imm eq v216, 0 + v218 = bint.i8 v217 + v219 = uextend.i32 v218 + brz v219, block22 + jump block21 + +block21: + v220 = global_value.i64 gv14 + v221 = global_value.i64 gv15 + trap user65535 + +block22: + v223 = load.i64 v222 + v224 = iadd_imm.i64 v222, 16 + v225 = load.i64 v224 + v226 = iadd v223, v225 + v227 = iconst.i8 0 + v228 = stack_addr.i64 ss110 + v229 = stack_addr.i64 ss110 + v230 = load.i64 aligned v229 + v231 = load.i64 aligned v229+8 + v232 = iadd_imm.i64 v16, 8 + v233 = load.i8 v232 + v234 = uextend.i32 v233 + brz v234, block23 + jump block162 + +block162: + v235 = global_value.i64 gv16 + trap user0 + +block23: + v236 = load.i64 v16 + v238 = iadd_imm.i64 v237, 24 + v239 = load.i16 v238 + v240 = iadd_imm.i64 v15, 8 + call fn22(v14, v15) + jump block24 + +block24: + v242 = load.i64 v241 + v243 = iadd_imm.i64 v241, 8 + v244 = load.i64 v243 + v245 = isub v242, v244 + v246 = iconst.i8 0 + v247 = stack_addr.i64 ss111 + v248 = stack_addr.i64 ss111 + v249 = load.i64 aligned v248 + v250 = load.i64 aligned v248+8 + v251 = iadd_imm.i64 v19, 8 + v252 = load.i8 v251 + v253 = uextend.i32 v252 + brz v253, block25 + jump block161 + +block161: + v254 = global_value.i64 gv17 + trap user0 + +block25: + v255 = load.i64 v19 + v257 = iadd_imm.i64 v256, 24 + v258 = load.i16 v257 + v259 = iadd_imm.i64 v18, 8 + v260 = iadd_imm.i64 v14, 8 + v261 = load.i16 v260 + call fn24(v17, v18, v261) + jump block26 + +block26: + v263 = load.i64 v262 + v264 = iadd_imm.i64 v262, 24 + v265 = load.i16 v264 + v266 = iadd_imm.i64 v21, 8 + v267 = iadd_imm.i64 v14, 8 + v268 = load.i16 v267 + call fn25(v20, v21, v268) + jump block27 + +block27: + v269 = iadd_imm.i64 v14, 8 + v270 = load.i16 v269 + v271 = iconst.i16 -60 + v272 = isub v271, v270 + v273 = iconst.i8 0 + v274 = stack_addr.i64 ss112 + v275 = stack_addr.i64 ss112 + v276 = load.i32 aligned v275 + v277 = iadd_imm.i64 v24, 2 + v278 = load.i8 v277 + v279 = uextend.i32 v278 + brz v279, block28 + jump block160 + +block160: + v280 = global_value.i64 gv18 + trap user0 + +block28: + v281 = load.i16 v24 + v282 = iconst.i16 64 + v283 = isub v281, v282 + v284 = iconst.i8 0 + v285 = stack_addr.i64 ss113 + v286 = stack_addr.i64 ss113 + v287 = load.i32 aligned v286 + v288 = iadd_imm.i64 v25, 2 + v289 = load.i8 v288 + v290 = uextend.i32 v289 + brz v290, block29 + jump block159 + +block159: + v291 = global_value.i64 gv19 + trap user0 + +block29: + v292 = load.i16 v25 + v317 -> v292 + v293 = iadd_imm.i64 v14, 8 + v294 = load.i16 v293 + v295 = iconst.i16 -32 + v296 = isub v295, v294 + v297 = iconst.i8 0 + v298 = stack_addr.i64 ss114 + v299 = stack_addr.i64 ss114 + v300 = load.i32 aligned v299 + v301 = iadd_imm.i64 v26, 2 + v302 = load.i8 v301 + v303 = uextend.i32 v302 + brz v303, block30 + jump block158 + +block158: + v304 = global_value.i64 gv20 + trap user0 + +block30: + v305 = load.i16 v26 + v306 = iconst.i16 64 + v307 = isub v305, v306 + v308 = iconst.i8 0 + v309 = stack_addr.i64 ss115 + v310 = stack_addr.i64 ss115 + v311 = load.i32 aligned v310 + v312 = iadd_imm.i64 v27, 2 + v313 = load.i8 v312 + v314 = uextend.i32 v313 + brz v314, block31 + jump block157 + +block157: + v315 = global_value.i64 gv21 + trap user0 + +block31: + v316 = load.i16 v27 + call fn30(v23, v317, v316) + jump block32 + +block32: + v318 = load.i16 v23 + v1007 -> v318 + v319 = iadd_imm.i64 v23, 8 + v320 = load.i64 aligned v319 + v321 = load.i64 aligned v319+8 + call fn31(v28, v14, v22) + jump block33 + +block33: + call fn32(v29, v17, v22) + jump block34 + +block34: + call fn33(v30, v20, v22) + jump block35 + +block35: + v322 = iconst.i8 1 + v323 = uextend.i32 v322 + brz v323, block42 + jump block36 + +block36: + v324 = iadd_imm.i64 v28, 8 + v325 = iadd_imm.i64 v29, 8 + v326 = iadd_imm.i64 v31, 8 + v327 = load.i64 v31 + v340 -> v327 + v328 = iadd_imm.i64 v31, 8 + v329 = load.i64 v328 + v341 -> v329 + v330 = load.i16 v327 + v331 = load.i16 v329 + v332 = icmp eq v330, v331 + v333 = bint.i8 v332 + v334 = uextend.i32 v333 + v335 = icmp_imm eq v334, 0 + v336 = bint.i8 v335 + v337 = uextend.i32 v336 + brz v337, block38 + jump block37 + +block37: + v338 = global_value.i64 gv22 + v339 = iconst.i64 3 + v342 = iadd_imm.i64 v36, 8 + v343 = load.i64 v36 + v344 = iadd_imm.i64 v36, 8 + v345 = load.i64 v344 + v347 -> v345 + v346 = func_addr.i64 fn34 + call fn35(v39, v343, v346) + jump block39 + +block38: + jump block42 + +block39: + v348 = func_addr.i64 fn36 + call fn37(v40, v347, v348) + jump block40 + +block40: + v349 = iconst.i64 0 + v350 = imul_imm v349, 16 + v351 = iadd.i64 v35, v350 + v352 = load.i64 aligned v39 + v353 = load.i64 aligned v39+8 + v354 = iconst.i64 1 + v355 = imul_imm v354, 16 + v356 = iadd.i64 v35, v355 + v357 = load.i64 aligned v40 + v358 = load.i64 aligned v40+8 + v359 = iconst.i64 2 + call fn38(v32, v33, v34) + jump block41 + +block41: + v360 = global_value.i64 gv23 + call fn39(v32, v360) + v361 = global_value.i64 gv24 + trap user65535 + +block42: + v362 = iconst.i8 1 + v363 = uextend.i32 v362 + brz v363, block49(v1007) + jump block43 + +block43: + v364 = iadd_imm.i64 v28, 8 + v365 = iadd_imm.i64 v30, 8 + v366 = iadd_imm.i64 v41, 8 + v367 = load.i64 v41 + v380 -> v367 + v368 = iadd_imm.i64 v41, 8 + v369 = load.i64 v368 + v381 -> v369 + v370 = load.i16 v367 + v371 = load.i16 v369 + v372 = icmp eq v370, v371 + v373 = bint.i8 v372 + v374 = uextend.i32 v373 + v375 = icmp_imm eq v374, 0 + v376 = bint.i8 v375 + v377 = uextend.i32 v376 + brz v377, block45 + jump block44 + +block44: + v378 = global_value.i64 gv25 + v379 = iconst.i64 3 + v382 = iadd_imm.i64 v46, 8 + v383 = load.i64 v46 + v384 = iadd_imm.i64 v46, 8 + v385 = load.i64 v384 + v387 -> v385 + v386 = func_addr.i64 fn41 + call fn42(v49, v383, v386) + jump block46 + +block45: + jump block49(v1007) + +block46: + v388 = func_addr.i64 fn43 + call fn44(v50, v387, v388) + jump block47 + +block47: + v389 = iconst.i64 0 + v390 = imul_imm v389, 16 + v391 = iadd.i64 v45, v390 + v392 = load.i64 aligned v49 + v393 = load.i64 aligned v49+8 + v394 = iconst.i64 1 + v395 = imul_imm v394, 16 + v396 = iadd.i64 v45, v395 + v397 = load.i64 aligned v50 + v398 = load.i64 aligned v50+8 + v399 = iconst.i64 2 + call fn45(v42, v43, v44) + jump block48 + +block48: + v400 = global_value.i64 gv26 + call fn46(v42, v400) + v401 = global_value.i64 gv27 + trap user65535 + +block49(v1006: i16): + v486 -> v1006 + v402 = load.i64 v28 + v403 = iconst.i64 1 + v404 = iadd v402, v403 + v405 = iconst.i8 0 + v406 = stack_addr.i64 ss116 + v407 = stack_addr.i64 ss116 + v408 = load.i64 aligned v407 + v409 = load.i64 aligned v407+8 + v410 = iadd_imm.i64 v51, 8 + v411 = load.i8 v410 + v412 = uextend.i32 v411 + brz v412, block50 + jump block156 + +block156: + v413 = global_value.i64 gv28 + trap user0 + +block50: + v414 = load.i64 v51 + v439 -> v414 + v452 -> v414 + v478 -> v414 + v508 -> v414 + v415 = load.i64 v29 + v416 = iconst.i64 1 + v417 = isub v415, v416 + v418 = iconst.i8 0 + v419 = stack_addr.i64 ss117 + v420 = stack_addr.i64 ss117 + v421 = load.i64 aligned v420 + v422 = load.i64 aligned v420+8 + v423 = iadd_imm.i64 v52, 8 + v424 = load.i8 v423 + v425 = uextend.i32 v424 + brz v425, block51 + jump block155 + +block155: + v426 = global_value.i64 gv29 + trap user0 + +block51: + v427 = load.i64 v52 + v509 -> v427 + v428 = iadd_imm.i64 v28, 8 + v429 = load.i16 v428 + v435 -> v429 + v430 = iconst.i16 0xffff_ffff_ffff_8000 + v431 = icmp eq v429, v430 + v432 = bint.i8 v431 + v433 = uextend.i32 v432 + brz v433, block52 + jump block154 + +block154: + v434 = global_value.i64 gv30 + trap user0 + +block52: + v436 = iconst.i16 0 + v437 = isub v436, v435 + v438 = sextend.i64 v437 + v453 -> v438 + v521 -> v438 + v440 = ushr.i64 v439, v438 + v441 = iconst.i8 0 + v442 = stack_addr.i64 ss118 + v443 = stack_addr.i64 ss118 + v444 = load.i64 aligned v443 + v445 = load.i64 aligned v443+8 + v446 = iadd_imm.i64 v53, 8 + v447 = load.i8 v446 + v448 = uextend.i32 v447 + brz v448, block53 + jump block153 + +block153: + v449 = global_value.i64 gv31 + trap user0 + +block53: + v450 = load.i64 v53 + v451 = ireduce.i32 v450 + v480 -> v451 + v551 -> v451 + v454 = iconst.i64 1 + v455 = ishl v454, v453 + v456 = iconst.i8 0 + v457 = stack_addr.i64 ss119 + v458 = stack_addr.i64 ss119 + v459 = load.i64 aligned v458 + v460 = load.i64 aligned v458+8 + v461 = iadd_imm.i64 v54, 8 + v462 = load.i8 v461 + v463 = uextend.i32 v462 + brz v463, block54 + jump block152 + +block152: + v464 = global_value.i64 gv32 + trap user0 + +block54: + v465 = load.i64 v54 + v466 = iconst.i64 1 + v467 = isub v465, v466 + v468 = iconst.i8 0 + v469 = stack_addr.i64 ss120 + v470 = stack_addr.i64 ss120 + v471 = load.i64 aligned v470 + v472 = load.i64 aligned v470+8 + v473 = iadd_imm.i64 v55, 8 + v474 = load.i8 v473 + v475 = uextend.i32 v474 + brz v475, block55 + jump block151 + +block151: + v476 = global_value.i64 gv33 + trap user0 + +block55: + v477 = load.i64 v55 + v479 = band.i64 v478, v477 + call fn54(v56, v480) + jump block56 + +block56: + v481 = load.i8 v56 + v548 -> v481 + v482 = iadd_imm.i64 v56, 4 + v483 = load.i32 v482 + v550 -> v483 + v484 = iconst.i64 0 + v485 = uextend.i16 v481 + v487 = isub v485, v486 + v488 = iconst.i8 0 + v489 = stack_addr.i64 ss121 + v490 = stack_addr.i64 ss121 + v491 = load.i32 aligned v490 + v492 = iadd_imm.i64 v57, 2 + v493 = load.i8 v492 + v494 = uextend.i32 v493 + brz v494, block57 + jump block150 + +block150: + v495 = global_value.i64 gv34 + trap user0 + +block57: + v496 = load.i16 v57 + v497 = iconst.i16 1 + v498 = iadd v496, v497 + v499 = iconst.i8 0 + v500 = stack_addr.i64 ss122 + v501 = stack_addr.i64 ss122 + v502 = load.i32 aligned v501 + v503 = iadd_imm.i64 v58, 2 + v504 = load.i8 v503 + v505 = uextend.i32 v504 + brz v505, block58 + jump block149 + +block149: + v506 = global_value.i64 gv35 + trap user0 + +block58: + v507 = load.i16 v58 + v510 = isub.i64 v508, v509 + v511 = iconst.i8 0 + v512 = stack_addr.i64 ss123 + v513 = stack_addr.i64 ss123 + v514 = load.i64 aligned v513 + v515 = load.i64 aligned v513+8 + v516 = iadd_imm.i64 v59, 8 + v517 = load.i8 v516 + v518 = uextend.i32 v517 + brz v518, block59 + jump block148 + +block148: + v519 = global_value.i64 gv36 + trap user0 + +block59: + v520 = load.i64 v59 + v546 -> v520 + v522 = iconst.i64 1 + v523 = ishl v522, v521 + v524 = iconst.i8 0 + v525 = stack_addr.i64 ss124 + v526 = stack_addr.i64 ss124 + v527 = load.i64 aligned v526 + v528 = load.i64 aligned v526+8 + v529 = iadd_imm.i64 v60, 8 + v530 = load.i8 v529 + v531 = uextend.i32 v530 + brz v531, block60 + jump block147 + +block147: + v532 = global_value.i64 gv37 + trap user0 + +block60: + v533 = load.i64 v60 + v534 = iconst.i64 1 + v535 = isub v533, v534 + v536 = iconst.i8 0 + v537 = stack_addr.i64 ss125 + v538 = stack_addr.i64 ss125 + v539 = load.i64 aligned v538 + v540 = load.i64 aligned v538+8 + v541 = iadd_imm.i64 v61, 8 + v542 = load.i8 v541 + v543 = uextend.i32 v542 + brz v543, block61 + jump block146 + +block146: + v544 = global_value.i64 gv38 + trap user0 + +block61: + v545 = load.i64 v61 + v547 = band.i64 v546, v545 + v549 = uextend.i16 v548 + jump block62(v551, v484, v521, v479, v520, v507, v508, v548, v547) + +block62(v552: i32, v1009: i64, v1013: i64, v1016: i64, v1019: i64, v1022: i16, v1025: i64, v1028: i8, v1033: i64): + v559 -> v552 + v562 -> v552 + v569 -> v552 + v596 -> v1009 + v605 -> v1009 + v609 -> v1009 + v1008 -> v1009 + v624 -> v1013 + v654 -> v1013 + v1012 -> v1013 + v1014 -> v1013 + v1041 -> v1013 + v636 -> v1016 + v1015 -> v1016 + v1017 -> v1016 + v1030 -> v1016 + v648 -> v1019 + v676 -> v1019 + v693 -> v1019 + v1018 -> v1019 + v1020 -> v1019 + v674 -> v1022 + v691 -> v1022 + v1021 -> v1022 + v1023 -> v1022 + v1054 -> v1022 + v677 -> v1025 + v1024 -> v1025 + v1026 -> v1025 + v1059 -> v1025 + v696 -> v1028 + v1027 -> v1028 + v1029 -> v1028 + v1031 -> v1033 + v1032 -> v1033 + v1034 -> v1033 + v553 = load.i32 v63 + v560 -> v553 + v554 = iconst.i32 0 + v555 = icmp eq v553, v554 + v556 = bint.i8 v555 + v557 = uextend.i32 v556 + brz v557, block63 + jump block145 + +block145: + v558 = global_value.i64 gv39 + trap user0 + +block63: + v561 = udiv.i32 v559, v560 + v574 -> v561 + v563 = load.i32 v63 + v570 -> v563 + v564 = iconst.i32 0 + v565 = icmp eq v563, v564 + v566 = bint.i8 v565 + v567 = uextend.i32 v566 + brz v567, block64 + jump block144 + +block144: + v568 = global_value.i64 gv40 + trap user0 + +block64: + v571 = urem.i32 v569, v570 + v622 -> v571 + v803 -> v571 + v1011 -> v571 + v572 = iconst.i8 1 + v573 = uextend.i32 v572 + brz v573, block68(v561) + jump block65 + +block65: + v575 = iconst.i32 10 + v576 = icmp.i32 ult v574, v575 + v577 = bint.i8 v576 + v578 = uextend.i32 v577 + v579 = icmp_imm eq v578, 0 + v580 = bint.i8 v579 + v581 = uextend.i32 v580 + brz v581, block67 + jump block66 + +block66: + v582 = global_value.i64 gv41 + v583 = global_value.i64 gv42 + trap user65535 + +block67: + jump block68(v574) + +block68(v584: i32): + v585 = ireduce.i8 v584 + v586 = iconst.i8 48 + v587 = iadd v586, v585 + v588 = iconst.i8 0 + v589 = stack_addr.i64 ss126 + v590 = stack_addr.i64 ss126 + v591 = load.i16 aligned v590 + v592 = iadd_imm.i64 v64, 1 + v593 = load.i8 v592 + v594 = uextend.i32 v593 + brz v594, block69 + jump block143 + +block143: + v595 = global_value.i64 gv43 + trap user0 + +block69: + v597 = load.i64 v3 + v598 = load.i64 v3+8 + v599 = icmp.i64 ult v596, v598 + v600 = bint.i8 v599 + v601 = uextend.i32 v600 + brnz v601, block70 + jump block142 + +block142: + v602 = global_value.i64 gv44 + trap user0 + +block70: + v603 = load.i64 v3 + v604 = load.i64 v3+8 + v606 = imul_imm.i64 v605, 1 + v607 = iadd v603, v606 + v608 = load.i8 aligned v64 + v610 = iconst.i64 1 + v611 = iadd.i64 v609, v610 + v612 = iconst.i8 0 + v613 = stack_addr.i64 ss127 + v614 = stack_addr.i64 ss127 + v615 = load.i64 aligned v614 + v616 = load.i64 aligned v614+8 + v617 = iadd_imm.i64 v65, 8 + v618 = load.i8 v617 + v619 = uextend.i32 v618 + brz v619, block71 + jump block141 + +block141: + v620 = global_value.i64 gv45 + trap user0 + +block71: + v621 = load.i64 v65 + v668 -> v621 + v695 -> v621 + v1010 -> v621 + v1046 -> v621 + v623 = uextend.i64 v622 + v625 = ishl v623, v624 + v626 = iconst.i8 0 + v627 = stack_addr.i64 ss128 + v628 = stack_addr.i64 ss128 + v629 = load.i64 aligned v628 + v630 = load.i64 aligned v628+8 + v631 = iadd_imm.i64 v66, 8 + v632 = load.i8 v631 + v633 = uextend.i32 v632 + brz v633, block72 + jump block140 + +block140: + v634 = global_value.i64 gv46 + trap user0 + +block72: + v635 = load.i64 v66 + v637 = iadd v635, v636 + v638 = iconst.i8 0 + v639 = stack_addr.i64 ss129 + v640 = stack_addr.i64 ss129 + v641 = load.i64 aligned v640 + v642 = load.i64 aligned v640+8 + v643 = iadd_imm.i64 v67, 8 + v644 = load.i8 v643 + v645 = uextend.i32 v644 + brz v645, block73 + jump block139 + +block139: + v646 = global_value.i64 gv47 + trap user0 + +block73: + v647 = load.i64 v67 + v675 -> v647 + v692 -> v647 + v649 = icmp ult v647, v648 + v650 = bint.i8 v649 + v651 = uextend.i32 v650 + brz v651, block80 + jump block74 + +block74: + v652 = load.i32 v63 + v653 = uextend.i64 v652 + v655 = ishl v653, v654 + v656 = iconst.i8 0 + v657 = stack_addr.i64 ss130 + v658 = stack_addr.i64 ss130 + v659 = load.i64 aligned v658 + v660 = load.i64 aligned v658+8 + v661 = iadd_imm.i64 v68, 8 + v662 = load.i8 v661 + v663 = uextend.i32 v662 + brz v663, block75 + jump block138 + +block138: + v664 = global_value.i64 gv48 + trap user0 + +block75: + v665 = load.i64 v68 + v690 -> v665 + v666 = load.i64 aligned v3 + v667 = load.i64 aligned v3+8 + v669 = load.i64 v73 + call fn70(v71, v72, v669) + jump block76 + +block76: + v670 = load.i64 aligned v71 + v671 = load.i64 aligned v71+8 + v672 = load.i64 aligned v70 + v673 = load.i64 aligned v70+8 + v678 = load.i64 v30 + v679 = isub.i64 v677, v678 + v680 = iconst.i8 0 + v681 = stack_addr.i64 ss131 + v682 = stack_addr.i64 ss131 + v683 = load.i64 aligned v682 + v684 = load.i64 aligned v682+8 + v685 = iadd_imm.i64 v74, 8 + v686 = load.i8 v685 + v687 = uextend.i32 v686 + brz v687, block77 + jump block137 + +block137: + v688 = global_value.i64 gv49 + trap user0 + +block77: + v689 = load.i64 v74 + v694 = iconst.i64 1 + call fn72(v0, v69, v691, v692, v693, v689, v690, v694) + jump block78 + +block78: + jump block79 + +block79: + return + +block80: + v697 = uextend.i64 v696 + v698 = icmp.i64 ugt v695, v697 + v699 = bint.i8 v698 + v700 = uextend.i32 v699 + brz v700, block96 + jump block81 + +block81: + v701 = iconst.i8 1 + v702 = uextend.i32 v701 + brz v702, block88 + jump block82 + +block82: + v703 = global_value.i64 gv50 + v704 = iadd_imm.i64 v75, 8 + v705 = load.i64 v75 + v718 -> v705 + v706 = iadd_imm.i64 v75, 8 + v707 = load.i64 v706 + v719 -> v707 + v708 = load.i32 v705 + v709 = load.i32 v707 + v710 = icmp eq v708, v709 + v711 = bint.i8 v710 + v712 = uextend.i32 v711 + v713 = icmp_imm eq v712, 0 + v714 = bint.i8 v713 + v715 = uextend.i32 v714 + brz v715, block84 + jump block83 + +block83: + v716 = global_value.i64 gv51 + v717 = iconst.i64 3 + v720 = iadd_imm.i64 v80, 8 + v721 = load.i64 v80 + v722 = iadd_imm.i64 v80, 8 + v723 = load.i64 v722 + v725 -> v723 + v724 = func_addr.i64 fn73 + call fn74(v83, v721, v724) + jump block85 + +block84: + jump block88 + +block85: + v726 = func_addr.i64 fn75 + call fn76(v84, v725, v726) + jump block86 + +block86: + v727 = iconst.i64 0 + v728 = imul_imm v727, 16 + v729 = iadd.i64 v79, v728 + v730 = load.i64 aligned v83 + v731 = load.i64 aligned v83+8 + v732 = iconst.i64 1 + v733 = imul_imm v732, 16 + v734 = iadd.i64 v79, v733 + v735 = load.i64 aligned v84 + v736 = load.i64 aligned v84+8 + v737 = iconst.i64 2 + call fn77(v76, v77, v78) + jump block87 + +block87: + v738 = global_value.i64 gv52 + call fn78(v76, v738) + v739 = global_value.i64 gv53 + trap user65535 + +block88: + v740 = iconst.i8 1 + v741 = uextend.i32 v740 + brz v741, block95(v1030, v1031, v1041, v1046, v1054, v1059) + jump block89 + +block89: + v742 = global_value.i64 gv54 + v743 = iadd_imm.i64 v85, 8 + v744 = load.i64 v85 + v757 -> v744 + v745 = iadd_imm.i64 v85, 8 + v746 = load.i64 v745 + v758 -> v746 + v747 = load.i16 v744 + v748 = load.i16 v746 + v749 = icmp eq v747, v748 + v750 = bint.i8 v749 + v751 = uextend.i32 v750 + v752 = icmp_imm eq v751, 0 + v753 = bint.i8 v752 + v754 = uextend.i32 v753 + brz v754, block91 + jump block90 + +block90: + v755 = global_value.i64 gv55 + v756 = iconst.i64 3 + v759 = iadd_imm.i64 v90, 8 + v760 = load.i64 v90 + v761 = iadd_imm.i64 v90, 8 + v762 = load.i64 v761 + v764 -> v762 + v763 = func_addr.i64 fn80 + call fn81(v93, v760, v763) + jump block92 + +block91: + jump block95(v1030, v1031, v1041, v1046, v1054, v1059) + +block92: + v765 = func_addr.i64 fn82 + call fn83(v94, v764, v765) + jump block93 + +block93: + v766 = iconst.i64 0 + v767 = imul_imm v766, 16 + v768 = iadd.i64 v89, v767 + v769 = load.i64 aligned v93 + v770 = load.i64 aligned v93+8 + v771 = iconst.i64 1 + v772 = imul_imm v771, 16 + v773 = iadd.i64 v89, v772 + v774 = load.i64 aligned v94 + v775 = load.i64 aligned v94+8 + v776 = iconst.i64 2 + call fn84(v86, v87, v88) + jump block94 + +block94: + v777 = global_value.i64 gv56 + call fn85(v86, v777) + v778 = global_value.i64 gv57 + trap user65535 + +block95(v779: i64, v780: i64, v1040: i64, v1045: i64, v1053: i16, v1058: i64): + v781 = iconst.i64 1 + jump block99(v779, v780, v781, v1040, v1045, v1053, v1058) + +block96: + v782 = iconst.i16 1 + v783 = load.i16 v62 + v784 = isub v783, v782 + v785 = iconst.i8 0 + v786 = stack_addr.i64 ss132 + v787 = stack_addr.i64 ss132 + v788 = load.i32 aligned v787 + v789 = iadd_imm.i64 v95, 2 + v790 = load.i8 v789 + v791 = uextend.i32 v790 + brz v791, block97 + jump block136 + +block136: + v792 = global_value.i64 gv58 + trap user0 + +block97: + v793 = load.i16 aligned v95 + v794 = iconst.i32 10 + v795 = iconst.i32 0 + v796 = icmp eq v794, v795 + v797 = bint.i8 v796 + v798 = uextend.i32 v797 + brz v798, block98 + jump block135 + +block135: + v799 = global_value.i64 gv59 + trap user0 + +block98: + v800 = iconst.i32 10 + v801 = load.i32 v63 + v802 = udiv v801, v800 + jump block62(v803, v1010, v1014, v1017, v1020, v1023, v1026, v1029, v1034) + +block99(v804: i64, v1035: i64, v1037: i64, v1039: i64, v1044: i64, v1052: i16, v1057: i64): + v817 -> v1035 + v830 -> v1037 + v844 -> v1039 + v857 -> v1039 + v939 -> v1039 + v1042 -> v1039 + v1050 -> v1039 + v908 -> v1044 + v917 -> v1044 + v921 -> v1044 + v1043 -> v1044 + v960 -> v1052 + v990 -> v1052 + v1051 -> v1052 + v1055 -> v1052 + v963 -> v1057 + v1056 -> v1057 + v1060 -> v1057 + v805 = iconst.i64 10 + v806 = imul v804, v805 + v807 = iconst.i8 0 + v808 = stack_addr.i64 ss133 + v809 = stack_addr.i64 ss133 + v810 = load.i64 aligned v809 + v811 = load.i64 aligned v809+8 + v812 = iadd_imm.i64 v96, 8 + v813 = load.i8 v812 + v814 = uextend.i32 v813 + brz v814, block100 + jump block134 + +block134: + v815 = global_value.i64 gv60 + trap user0 + +block100: + v816 = load.i64 v96 + v843 -> v816 + v856 -> v816 + v882 -> v816 + v818 = iconst.i64 10 + v819 = imul.i64 v817, v818 + v820 = iconst.i8 0 + v821 = stack_addr.i64 ss134 + v822 = stack_addr.i64 ss134 + v823 = load.i64 aligned v822 + v824 = load.i64 aligned v822+8 + v825 = iadd_imm.i64 v97, 8 + v826 = load.i8 v825 + v827 = uextend.i32 v826 + brz v827, block101 + jump block133 + +block133: + v828 = global_value.i64 gv61 + trap user0 + +block101: + v829 = load.i64 v97 + v935 -> v829 + v962 -> v829 + v992 -> v829 + v1036 -> v829 + v1049 -> v829 + v831 = iconst.i64 10 + v832 = imul.i64 v830, v831 + v833 = iconst.i8 0 + v834 = stack_addr.i64 ss135 + v835 = stack_addr.i64 ss135 + v836 = load.i64 aligned v835 + v837 = load.i64 aligned v835+8 + v838 = iadd_imm.i64 v98, 8 + v839 = load.i8 v838 + v840 = uextend.i32 v839 + brz v840, block102 + jump block132 + +block132: + v841 = global_value.i64 gv62 + trap user0 + +block102: + v842 = load.i64 v98 + v976 -> v842 + v989 -> v842 + v1038 -> v842 + v1061 -> v842 + v845 = ushr.i64 v843, v844 + v846 = iconst.i8 0 + v847 = stack_addr.i64 ss136 + v848 = stack_addr.i64 ss136 + v849 = load.i64 aligned v848 + v850 = load.i64 aligned v848+8 + v851 = iadd_imm.i64 v99, 8 + v852 = load.i8 v851 + v853 = uextend.i32 v852 + brz v853, block103 + jump block131 + +block131: + v854 = global_value.i64 gv63 + trap user0 + +block103: + v855 = load.i64 v99 + v886 -> v855 + v858 = iconst.i64 1 + v859 = ishl v858, v857 + v860 = iconst.i8 0 + v861 = stack_addr.i64 ss137 + v862 = stack_addr.i64 ss137 + v863 = load.i64 aligned v862 + v864 = load.i64 aligned v862+8 + v865 = iadd_imm.i64 v100, 8 + v866 = load.i8 v865 + v867 = uextend.i32 v866 + brz v867, block104 + jump block130 + +block130: + v868 = global_value.i64 gv64 + trap user0 + +block104: + v869 = load.i64 v100 + v870 = iconst.i64 1 + v871 = isub v869, v870 + v872 = iconst.i8 0 + v873 = stack_addr.i64 ss138 + v874 = stack_addr.i64 ss138 + v875 = load.i64 aligned v874 + v876 = load.i64 aligned v874+8 + v877 = iadd_imm.i64 v101, 8 + v878 = load.i8 v877 + v879 = uextend.i32 v878 + brz v879, block105 + jump block129 + +block129: + v880 = global_value.i64 gv65 + trap user0 + +block105: + v881 = load.i64 v101 + v883 = band.i64 v882, v881 + v934 -> v883 + v961 -> v883 + v991 -> v883 + v1005 -> v883 + v1048 -> v883 + v884 = iconst.i8 1 + v885 = uextend.i32 v884 + brz v885, block109(v855) + jump block106 + +block106: + v887 = iconst.i64 10 + v888 = icmp.i64 ult v886, v887 + v889 = bint.i8 v888 + v890 = uextend.i32 v889 + v891 = icmp_imm eq v890, 0 + v892 = bint.i8 v891 + v893 = uextend.i32 v892 + brz v893, block108 + jump block107 + +block107: + v894 = global_value.i64 gv66 + v895 = global_value.i64 gv67 + trap user65535 + +block108: + jump block109(v886) + +block109(v896: i64): + v897 = ireduce.i8 v896 + v898 = iconst.i8 48 + v899 = iadd v898, v897 + v900 = iconst.i8 0 + v901 = stack_addr.i64 ss139 + v902 = stack_addr.i64 ss139 + v903 = load.i16 aligned v902 + v904 = iadd_imm.i64 v102, 1 + v905 = load.i8 v904 + v906 = uextend.i32 v905 + brz v906, block110 + jump block128 + +block128: + v907 = global_value.i64 gv68 + trap user0 + +block110: + v909 = load.i64 v3 + v910 = load.i64 v3+8 + v911 = icmp.i64 ult v908, v910 + v912 = bint.i8 v911 + v913 = uextend.i32 v912 + brnz v913, block111 + jump block127 + +block127: + v914 = global_value.i64 gv69 + trap user0 + +block111: + v915 = load.i64 v3 + v916 = load.i64 v3+8 + v918 = imul_imm.i64 v917, 1 + v919 = iadd v915, v918 + v920 = load.i8 aligned v102 + v922 = iconst.i64 1 + v923 = iadd.i64 v921, v922 + v924 = iconst.i8 0 + v925 = stack_addr.i64 ss140 + v926 = stack_addr.i64 ss140 + v927 = load.i64 aligned v926 + v928 = load.i64 aligned v926+8 + v929 = iadd_imm.i64 v103, 8 + v930 = load.i8 v929 + v931 = uextend.i32 v930 + brz v931, block112 + jump block126 + +block126: + v932 = global_value.i64 gv70 + trap user0 + +block112: + v933 = load.i64 v103 + v954 -> v933 + v1047 -> v933 + v936 = icmp.i64 ult v934, v935 + v937 = bint.i8 v936 + v938 = uextend.i32 v937 + brz v938, block119 + jump block113 + +block113: + v940 = iconst.i64 1 + v941 = ishl v940, v939 + v942 = iconst.i8 0 + v943 = stack_addr.i64 ss141 + v944 = stack_addr.i64 ss141 + v945 = load.i64 aligned v944 + v946 = load.i64 aligned v944+8 + v947 = iadd_imm.i64 v104, 8 + v948 = load.i8 v947 + v949 = uextend.i32 v948 + brz v949, block114 + jump block125 + +block125: + v950 = global_value.i64 gv71 + trap user0 + +block114: + v951 = load.i64 v104 + v988 -> v951 + v952 = load.i64 aligned v3 + v953 = load.i64 aligned v3+8 + v955 = load.i64 v109 + call fn101(v107, v108, v955) + jump block115 + +block115: + v956 = load.i64 aligned v107 + v957 = load.i64 aligned v107+8 + v958 = load.i64 aligned v106 + v959 = load.i64 aligned v106+8 + v964 = load.i64 v30 + v965 = isub.i64 v963, v964 + v966 = iconst.i8 0 + v967 = stack_addr.i64 ss142 + v968 = stack_addr.i64 ss142 + v969 = load.i64 aligned v968 + v970 = load.i64 aligned v968+8 + v971 = iadd_imm.i64 v110, 8 + v972 = load.i8 v971 + v973 = uextend.i32 v972 + brz v973, block116 + jump block123 + +block123: + v974 = global_value.i64 gv72 + trap user0 + +block116: + v975 = load.i64 v110 + v977 = imul v975, v976 + v978 = iconst.i8 0 + v979 = stack_addr.i64 ss143 + v980 = stack_addr.i64 ss143 + v981 = load.i64 aligned v980 + v982 = load.i64 aligned v980+8 + v983 = iadd_imm.i64 v111, 8 + v984 = load.i8 v983 + v985 = uextend.i32 v984 + brz v985, block117 + jump block122 + +block122: + v986 = global_value.i64 gv73 + trap user0 + +block117: + v987 = load.i64 v111 + call fn104(v0, v105, v990, v991, v992, v987, v988, v989) + jump block118 + +block118: + jump block79 + +block119: + v993 = iconst.i16 1 + v994 = load.i16 v62 + v995 = isub v994, v993 + v996 = iconst.i8 0 + v997 = stack_addr.i64 ss144 + v998 = stack_addr.i64 ss144 + v999 = load.i32 aligned v998 + v1000 = iadd_imm.i64 v112, 2 + v1001 = load.i8 v1000 + v1002 = uextend.i32 v1001 + brz v1002, block120 + jump block121 + +block121: + v1003 = global_value.i64 gv74 + trap user0 + +block120: + v1004 = load.i16 aligned v112 + jump block99(v1005, v1036, v1038, v1042, v1047, v1055, v1060) +} diff --git a/cranelift/tests/bugpoint_test_expected.clif b/cranelift/tests/bugpoint_test_expected.clif new file mode 100644 index 0000000000..b2ca38a064 --- /dev/null +++ b/cranelift/tests/bugpoint_test_expected.clif @@ -0,0 +1,76 @@ +function u0:0(i64, i64, i64) system_v { + sig0 = (i64, i64, i16, i64, i64, i64, i64, i64) system_v + fn0 = u0:95 sig0 + +block0(v0: i64, v1: i64, v2: i64): + v113 -> v1 + v124 -> v1 + v136 -> v1 + v148 -> v1 + v160 -> v1 + v185 -> v1 + v222 -> v1 + v237 -> v1 + v241 -> v1 + v256 -> v1 + v262 -> v1 + v105 = iconst.i64 0 + trap user0 + +block99(v804: i64, v1035: i64, v1037: i64, v1039: i64, v1044: i64, v1052: i16, v1057: i64): + v817 -> v1035 + v830 -> v1037 + v844 -> v1039 + v857 -> v1039 + v939 -> v1039 + v1042 -> v1039 + v1050 -> v1039 + v908 -> v1044 + v917 -> v1044 + v921 -> v1044 + v1043 -> v1044 + v960 -> v1052 + v990 -> v1052 + v1051 -> v1052 + v1055 -> v1052 + v963 -> v1057 + v1056 -> v1057 + v1060 -> v1057 + trap user0 + +block101: + v829 = iconst.i64 0 + v935 -> v829 + v962 -> v829 + v992 -> v829 + v1036 -> v829 + v1049 -> v829 + trap user0 + +block102: + v842 = iconst.i64 0 + v976 -> v842 + v989 -> v842 + v1038 -> v842 + v1061 -> v842 + trap user0 + +block105: + v883 = iconst.i64 0 + v934 -> v883 + v961 -> v883 + v991 -> v883 + v1005 -> v883 + v1048 -> v883 + trap user0 + +block114: + v951 = iconst.i64 0 + v988 -> v951 + trap user0 + +block117: + v987 = iconst.i64 0 + call fn0(v0, v105, v1052, v883, v829, v987, v951, v842) + trap user0 +} diff --git a/cranelift/tests/filetests.rs b/cranelift/tests/filetests.rs new file mode 100644 index 0000000000..a633461109 --- /dev/null +++ b/cranelift/tests/filetests.rs @@ -0,0 +1,6 @@ +#[test] +fn filetests() { + // Run all the filetests in the following directories. + cranelift_filetests::run(false, false, &["filetests".into(), "docs".into()]) + .expect("test harness"); +} diff --git a/cranelift/umbrella/Cargo.toml b/cranelift/umbrella/Cargo.toml new file mode 100644 index 0000000000..f3a1aa8976 --- /dev/null +++ b/cranelift/umbrella/Cargo.toml @@ -0,0 +1,25 @@ +[package] +authors = ["The Cranelift Project Developers"] +name = "cranelift" +version = "0.59.0" +description = "Umbrella for commonly-used cranelift crates" +license = "Apache-2.0 WITH LLVM-exception" +documentation = "https://cranelift.readthedocs.io/" +repository = "https://github.com/bytecodealliance/cranelift" +categories = ["no-std"] +readme = "README.md" +keywords = ["compile", "compiler", "jit"] +edition = "2018" + +[dependencies] +cranelift-codegen = { path = "../codegen", version = "0.59.0", default-features = false } +cranelift-frontend = { path = "../frontend", version = "0.59.0", default-features = false } + +[features] +default = ["std"] +std = ["cranelift-codegen/std", "cranelift-frontend/std"] +core = ["cranelift-codegen/core", "cranelift-frontend/core"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/umbrella/LICENSE b/cranelift/umbrella/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/umbrella/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/umbrella/README.md b/cranelift/umbrella/README.md new file mode 100644 index 0000000000..134b3140bf --- /dev/null +++ b/cranelift/umbrella/README.md @@ -0,0 +1,3 @@ +This is an umbrella crate which contains no code of its own, but pulls in +other cranelift library crates to provide a convenient one-line dependency, +and a prelude, for common use cases. diff --git a/cranelift/umbrella/src/lib.rs b/cranelift/umbrella/src/lib.rs new file mode 100644 index 0000000000..46582c9555 --- /dev/null +++ b/cranelift/umbrella/src/lib.rs @@ -0,0 +1,50 @@ +//! Cranelift umbrella crate, providing a convenient one-line dependency. + +#![deny( + missing_docs, + trivial_numeric_casts, + unused_extern_crates, + unstable_features +)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +/// Provide these crates, renamed to reduce stutter. +pub use cranelift_codegen as codegen; +pub use cranelift_frontend as frontend; + +/// A prelude providing convenient access to commonly-used cranelift features. Use +/// as `use cranelift::prelude::*`. +pub mod prelude { + pub use crate::codegen; + pub use crate::codegen::entity::EntityRef; + pub use crate::codegen::ir::condcodes::{FloatCC, IntCC}; + pub use crate::codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Uimm64}; + pub use crate::codegen::ir::types; + pub use crate::codegen::ir::{ + AbiParam, Block, ExtFuncData, ExternalName, GlobalValueData, InstBuilder, JumpTableData, + MemFlags, Signature, StackSlotData, StackSlotKind, TrapCode, Type, Value, + }; + pub use crate::codegen::isa; + pub use crate::codegen::settings::{self, Configurable}; + + pub use crate::frontend::{FunctionBuilder, FunctionBuilderContext, Variable}; +} + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/wasm/Cargo.toml b/cranelift/wasm/Cargo.toml new file mode 100644 index 0000000000..bc5bb83f7b --- /dev/null +++ b/cranelift/wasm/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "cranelift-wasm" +version = "0.59.0" +authors = ["The Cranelift Project Developers"] +description = "Translator from WebAssembly to Cranelift IR" +repository = "https://github.com/bytecodealliance/cranelift" +license = "Apache-2.0 WITH LLVM-exception" +categories = ["no-std", "wasm"] +readme = "README.md" +keywords = ["webassembly", "wasm"] +edition = "2018" + +[dependencies] +wasmparser = { version = "0.51.0", default-features = false } +cranelift-codegen = { path = "../codegen", version = "0.59.0", default-features = false } +cranelift-entity = { path = "../entity", version = "0.59.0" } +cranelift-frontend = { path = "../frontend", version = "0.59.0", default-features = false } +hashbrown = { version = "0.6", optional = true } +log = { version = "0.4.6", default-features = false } +serde = { version = "1.0.94", features = ["derive"], optional = true } +thiserror = "1.0.4" + +[dev-dependencies] +wat = "1.0.9" +target-lexicon = "0.10" + +[features] +default = ["std"] +std = ["cranelift-codegen/std", "cranelift-frontend/std"] +core = ["hashbrown", "cranelift-codegen/core", "cranelift-frontend/core"] +enable-serde = ["serde"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "bytecodealliance/cranelift" } diff --git a/cranelift/wasm/LICENSE b/cranelift/wasm/LICENSE new file mode 100644 index 0000000000..f9d81955f4 --- /dev/null +++ b/cranelift/wasm/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift/wasm/README.md b/cranelift/wasm/README.md new file mode 100644 index 0000000000..7ea2f293d4 --- /dev/null +++ b/cranelift/wasm/README.md @@ -0,0 +1,8 @@ +This crate performs the translation from a wasm module in binary format to the +in-memory form of the [Cranelift IR]. + +If you're looking for a complete WebAssembly implementation that uses this +library, see [Wasmtime]. + +[Wasmtime]: https://github.com/bytecodealliance/wasmtime +[Cranelift IR]: https://cranelift.readthedocs.io/en/latest/ir.html diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs new file mode 100644 index 0000000000..da2a7ed186 --- /dev/null +++ b/cranelift/wasm/src/code_translator.rs @@ -0,0 +1,2045 @@ +//! This module contains the bulk of the interesting code performing the translation between +//! WebAssembly and Cranelift IR. +//! +//! The translation is done in one pass, opcode by opcode. Two main data structures are used during +//! code translations: the value stack and the control stack. The value stack mimics the execution +//! of the WebAssembly stack machine: each instruction result is pushed onto the stack and +//! instruction arguments are popped off the stack. Similarly, when encountering a control flow +//! block, it is pushed onto the control stack and popped off when encountering the corresponding +//! `End`. +//! +//! Another data structure, the translation state, records information concerning unreachable code +//! status and about if inserting a return at the end of the function is necessary. +//! +//! Some of the WebAssembly instructions need information about the environment for which they +//! are being translated: +//! +//! - the loads and stores need the memory base address; +//! - the `get_global` and `set_global` instructions depend on how the globals are implemented; +//! - `memory.size` and `memory.grow` are runtime functions; +//! - `call_indirect` has to translate the function index into the address of where this +//! is; +//! +//! That is why `translate_function_body` takes an object having the `WasmRuntime` trait as +//! argument. +use super::{hash_map, HashMap}; +use crate::environ::{FuncEnvironment, GlobalVariable, ReturnMode, WasmResult}; +use crate::state::{ControlStackFrame, ElseData, FuncTranslationState, ModuleTranslationState}; +use crate::translation_utils::{ + block_with_params, blocktype_params_results, f32_translation, f64_translation, +}; +use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, SignatureIndex, TableIndex}; +use crate::wasm_unsupported; +use core::{i32, u32}; +use cranelift_codegen::ir::condcodes::{FloatCC, IntCC}; +use cranelift_codegen::ir::types::*; +use cranelift_codegen::ir::{ + self, ConstantData, InstBuilder, JumpTableData, MemFlags, Value, ValueLabel, +}; +use cranelift_codegen::packed_option::ReservedValue; +use cranelift_frontend::{FunctionBuilder, Variable}; +use std::vec::Vec; +use wasmparser::{MemoryImmediate, Operator}; + +// Clippy warns about "flags: _" but its important to document that the flags field is ignored +#[cfg_attr( + feature = "cargo-clippy", + allow(clippy::unneeded_field_pattern, clippy::cognitive_complexity) +)] +/// Translates wasm operators into Cranelift IR instructions. Returns `true` if it inserted +/// a return. +pub fn translate_operator( + module_translation_state: &ModuleTranslationState, + op: &Operator, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + if !state.reachable { + translate_unreachable_operator(module_translation_state, &op, builder, state, environ)?; + return Ok(()); + } + + // This big match treats all Wasm code operators. + match op { + /********************************** Locals **************************************** + * `get_local` and `set_local` are treated as non-SSA variables and will completely + * disappear in the Cranelift Code + ***********************************************************************************/ + Operator::LocalGet { local_index } => { + let val = builder.use_var(Variable::with_u32(*local_index)); + state.push1(val); + let label = ValueLabel::from_u32(*local_index); + builder.set_val_label(val, label); + } + Operator::LocalSet { local_index } => { + let val = state.pop1(); + builder.def_var(Variable::with_u32(*local_index), val); + let label = ValueLabel::from_u32(*local_index); + builder.set_val_label(val, label); + } + Operator::LocalTee { local_index } => { + let val = state.peek1(); + builder.def_var(Variable::with_u32(*local_index), val); + let label = ValueLabel::from_u32(*local_index); + builder.set_val_label(val, label); + } + /********************************** Globals **************************************** + * `get_global` and `set_global` are handled by the environment. + ***********************************************************************************/ + Operator::GlobalGet { global_index } => { + let val = match state.get_global(builder.func, *global_index, environ)? { + GlobalVariable::Const(val) => val, + GlobalVariable::Memory { gv, offset, ty } => { + let addr = builder.ins().global_value(environ.pointer_type(), gv); + let flags = ir::MemFlags::trusted(); + builder.ins().load(ty, flags, addr, offset) + } + GlobalVariable::Custom => environ.translate_custom_global_get( + builder.cursor(), + GlobalIndex::from_u32(*global_index), + )?, + }; + state.push1(val); + } + Operator::GlobalSet { global_index } => { + match state.get_global(builder.func, *global_index, environ)? { + GlobalVariable::Const(_) => panic!("global #{} is a constant", *global_index), + GlobalVariable::Memory { gv, offset, ty } => { + let addr = builder.ins().global_value(environ.pointer_type(), gv); + let flags = ir::MemFlags::trusted(); + let val = state.pop1(); + debug_assert_eq!(ty, builder.func.dfg.value_type(val)); + builder.ins().store(flags, val, addr, offset); + } + GlobalVariable::Custom => { + let val = state.pop1(); + environ.translate_custom_global_set( + builder.cursor(), + GlobalIndex::from_u32(*global_index), + val, + )?; + } + } + } + /********************************* Stack misc *************************************** + * `drop`, `nop`, `unreachable` and `select`. + ***********************************************************************************/ + Operator::Drop => { + state.pop1(); + } + Operator::Select => { + let (arg1, arg2, cond) = state.pop3(); + state.push1(builder.ins().select(cond, arg1, arg2)); + } + Operator::TypedSelect { ty: _ } => { + // We ignore the explicit type parameter as it is only needed for + // validation, which we require to have been performed before + // translation. + let (arg1, arg2, cond) = state.pop3(); + state.push1(builder.ins().select(cond, arg1, arg2)); + } + Operator::Nop => { + // We do nothing + } + Operator::Unreachable => { + builder.ins().trap(ir::TrapCode::UnreachableCodeReached); + state.reachable = false; + } + /***************************** Control flow blocks ********************************** + * When starting a control flow block, we create a new `Block` that will hold the code + * after the block, and we push a frame on the control stack. Depending on the type + * of block, we create a new `Block` for the body of the block with an associated + * jump instruction. + * + * The `End` instruction pops the last control frame from the control stack, seals + * the destination block (since `br` instructions targeting it only appear inside the + * block and have already been translated) and modify the value stack to use the + * possible `Block`'s arguments values. + ***********************************************************************************/ + Operator::Block { ty } => { + let (params, results) = blocktype_params_results(module_translation_state, *ty)?; + let next = block_with_params(builder, results, environ)?; + state.push_block(next, params.len(), results.len()); + } + Operator::Loop { ty } => { + let (params, results) = blocktype_params_results(module_translation_state, *ty)?; + let loop_body = block_with_params(builder, params, environ)?; + let next = block_with_params(builder, results, environ)?; + builder.ins().jump(loop_body, state.peekn(params.len())); + state.push_loop(loop_body, next, params.len(), results.len()); + + // Pop the initial `Block` actuals and replace them with the `Block`'s + // params since control flow joins at the top of the loop. + state.popn(params.len()); + state + .stack + .extend_from_slice(builder.block_params(loop_body)); + + builder.switch_to_block(loop_body); + environ.translate_loop_header(builder.cursor())?; + } + Operator::If { ty } => { + let val = state.pop1(); + + let (params, results) = blocktype_params_results(module_translation_state, *ty)?; + let (destination, else_data) = if params == results { + // It is possible there is no `else` block, so we will only + // allocate an block for it if/when we find the `else`. For now, + // we if the condition isn't true, then we jump directly to the + // destination block following the whole `if...end`. If we do end + // up discovering an `else`, then we will allocate an block for it + // and go back and patch the jump. + let destination = block_with_params(builder, results, environ)?; + let branch_inst = builder + .ins() + .brz(val, destination, state.peekn(params.len())); + (destination, ElseData::NoElse { branch_inst }) + } else { + // The `if` type signature is not valid without an `else` block, + // so we eagerly allocate the `else` block here. + let destination = block_with_params(builder, results, environ)?; + let else_block = block_with_params(builder, params, environ)?; + builder + .ins() + .brz(val, else_block, state.peekn(params.len())); + builder.seal_block(else_block); + (destination, ElseData::WithElse { else_block }) + }; + + let next_block = builder.create_block(); + builder.ins().jump(next_block, &[]); + builder.seal_block(next_block); // Only predecessor is the current block. + builder.switch_to_block(next_block); + + // Here we append an argument to an Block targeted by an argumentless jump instruction + // But in fact there are two cases: + // - either the If does not have a Else clause, in that case ty = EmptyBlock + // and we add nothing; + // - either the If have an Else clause, in that case the destination of this jump + // instruction will be changed later when we translate the Else operator. + state.push_if(destination, else_data, params.len(), results.len(), *ty); + } + Operator::Else => { + let i = state.control_stack.len() - 1; + match state.control_stack[i] { + ControlStackFrame::If { + ref else_data, + head_is_reachable, + ref mut consequent_ends_reachable, + num_return_values, + blocktype, + destination, + .. + } => { + // We finished the consequent, so record its final + // reachability state. + debug_assert!(consequent_ends_reachable.is_none()); + *consequent_ends_reachable = Some(state.reachable); + + if head_is_reachable { + // We have a branch from the head of the `if` to the `else`. + state.reachable = true; + + // Ensure we have an block for the `else` block (it may have + // already been pre-allocated, see `ElseData` for details). + let else_block = match *else_data { + ElseData::NoElse { branch_inst } => { + let (params, _results) = + blocktype_params_results(module_translation_state, blocktype)?; + debug_assert_eq!(params.len(), num_return_values); + let else_block = block_with_params(builder, params, environ)?; + builder.ins().jump(destination, state.peekn(params.len())); + state.popn(params.len()); + + builder.change_jump_destination(branch_inst, else_block); + builder.seal_block(else_block); + else_block + } + ElseData::WithElse { else_block } => { + builder + .ins() + .jump(destination, state.peekn(num_return_values)); + state.popn(num_return_values); + else_block + } + }; + + // You might be expecting that we push the parameters for this + // `else` block here, something like this: + // + // state.pushn(&control_stack_frame.params); + // + // We don't do that because they are already on the top of the stack + // for us: we pushed the parameters twice when we saw the initial + // `if` so that we wouldn't have to save the parameters in the + // `ControlStackFrame` as another `Vec` allocation. + + builder.switch_to_block(else_block); + + // We don't bother updating the control frame's `ElseData` + // to `WithElse` because nothing else will read it. + } + } + _ => unreachable!(), + } + } + Operator::End => { + let frame = state.control_stack.pop().unwrap(); + let next_block = frame.following_code(); + + if !builder.is_unreachable() || !builder.is_pristine() { + let return_count = frame.num_return_values(); + let return_args = state.peekn_mut(return_count); + let next_block_types = builder.func.dfg.block_param_types(next_block); + bitcast_arguments(return_args, &next_block_types, builder); + builder.ins().jump(frame.following_code(), return_args); + // You might expect that if we just finished an `if` block that + // didn't have a corresponding `else` block, then we would clean + // up our duplicate set of parameters that we pushed earlier + // right here. However, we don't have to explicitly do that, + // since we truncate the stack back to the original height + // below. + } + builder.switch_to_block(next_block); + builder.seal_block(next_block); + // If it is a loop we also have to seal the body loop block + if let ControlStackFrame::Loop { header, .. } = frame { + builder.seal_block(header) + } + state.stack.truncate(frame.original_stack_size()); + state + .stack + .extend_from_slice(builder.block_params(next_block)); + } + /**************************** Branch instructions ********************************* + * The branch instructions all have as arguments a target nesting level, which + * corresponds to how many control stack frames do we have to pop to get the + * destination `Block`. + * + * Once the destination `Block` is found, we sometimes have to declare a certain depth + * of the stack unreachable, because some branch instructions are terminator. + * + * The `br_table` case is much more complicated because Cranelift's `br_table` instruction + * does not support jump arguments like all the other branch instructions. That is why, in + * the case where we would use jump arguments for every other branch instruction, we + * need to split the critical edges leaving the `br_tables` by creating one `Block` per + * table destination; the `br_table` will point to these newly created `Blocks` and these + * `Block`s contain only a jump instruction pointing to the final destination, this time with + * jump arguments. + * + * This system is also implemented in Cranelift's SSA construction algorithm, because + * `use_var` located in a destination `Block` of a `br_table` might trigger the addition + * of jump arguments in each predecessor branch instruction, one of which might be a + * `br_table`. + ***********************************************************************************/ + Operator::Br { relative_depth } => { + let i = state.control_stack.len() - 1 - (*relative_depth as usize); + let (return_count, br_destination) = { + let frame = &mut state.control_stack[i]; + // We signal that all the code that follows until the next End is unreachable + frame.set_branched_to_exit(); + let return_count = if frame.is_loop() { + 0 + } else { + frame.num_return_values() + }; + (return_count, frame.br_destination()) + }; + + // Bitcast any vector arguments to their default type, I8X16, before jumping. + let destination_args = state.peekn_mut(return_count); + let destination_types = builder.func.dfg.block_param_types(br_destination); + bitcast_arguments( + destination_args, + &destination_types[..return_count], + builder, + ); + + builder.ins().jump(br_destination, destination_args); + state.popn(return_count); + state.reachable = false; + } + Operator::BrIf { relative_depth } => translate_br_if(*relative_depth, builder, state), + Operator::BrTable { table } => { + let (depths, default) = table.read_table()?; + let mut min_depth = default; + for depth in &*depths { + if *depth < min_depth { + min_depth = *depth; + } + } + let jump_args_count = { + let i = state.control_stack.len() - 1 - (min_depth as usize); + let min_depth_frame = &state.control_stack[i]; + if min_depth_frame.is_loop() { + 0 + } else { + min_depth_frame.num_return_values() + } + }; + let val = state.pop1(); + let mut data = JumpTableData::with_capacity(depths.len()); + if jump_args_count == 0 { + // No jump arguments + for depth in &*depths { + let block = { + let i = state.control_stack.len() - 1 - (*depth as usize); + let frame = &mut state.control_stack[i]; + frame.set_branched_to_exit(); + frame.br_destination() + }; + data.push_entry(block); + } + let jt = builder.create_jump_table(data); + let block = { + let i = state.control_stack.len() - 1 - (default as usize); + let frame = &mut state.control_stack[i]; + frame.set_branched_to_exit(); + frame.br_destination() + }; + builder.ins().br_table(val, block, jt); + } else { + // Here we have jump arguments, but Cranelift's br_table doesn't support them + // We then proceed to split the edges going out of the br_table + let return_count = jump_args_count; + let mut dest_block_sequence = vec![]; + let mut dest_block_map = HashMap::new(); + for depth in &*depths { + let branch_block = match dest_block_map.entry(*depth as usize) { + hash_map::Entry::Occupied(entry) => *entry.get(), + hash_map::Entry::Vacant(entry) => { + let block = builder.create_block(); + dest_block_sequence.push((*depth as usize, block)); + *entry.insert(block) + } + }; + data.push_entry(branch_block); + } + let default_branch_block = match dest_block_map.entry(default as usize) { + hash_map::Entry::Occupied(entry) => *entry.get(), + hash_map::Entry::Vacant(entry) => { + let block = builder.create_block(); + dest_block_sequence.push((default as usize, block)); + *entry.insert(block) + } + }; + let jt = builder.create_jump_table(data); + builder.ins().br_table(val, default_branch_block, jt); + for (depth, dest_block) in dest_block_sequence { + builder.switch_to_block(dest_block); + builder.seal_block(dest_block); + let real_dest_block = { + let i = state.control_stack.len() - 1 - depth; + let frame = &mut state.control_stack[i]; + frame.set_branched_to_exit(); + frame.br_destination() + }; + + // Bitcast any vector arguments to their default type, I8X16, before jumping. + let destination_args = state.peekn_mut(return_count); + let destination_types = builder.func.dfg.block_param_types(real_dest_block); + bitcast_arguments( + destination_args, + &destination_types[..return_count], + builder, + ); + + builder.ins().jump(real_dest_block, destination_args); + } + state.popn(return_count); + } + state.reachable = false; + } + Operator::Return => { + let (return_count, br_destination) = { + let frame = &mut state.control_stack[0]; + frame.set_branched_to_exit(); + let return_count = frame.num_return_values(); + (return_count, frame.br_destination()) + }; + { + let return_args = state.peekn_mut(return_count); + let return_types = wasm_param_types(&builder.func.signature.returns, |i| { + environ.is_wasm_return(&builder.func.signature, i) + }); + bitcast_arguments(return_args, &return_types, builder); + match environ.return_mode() { + ReturnMode::NormalReturns => builder.ins().return_(return_args), + ReturnMode::FallthroughReturn => { + builder.ins().jump(br_destination, return_args) + } + }; + } + state.popn(return_count); + state.reachable = false; + } + /************************************ Calls **************************************** + * The call instructions pop off their arguments from the stack and append their + * return values to it. `call_indirect` needs environment support because there is an + * argument referring to an index in the external functions table of the module. + ************************************************************************************/ + Operator::Call { function_index } => { + let (fref, num_args) = state.get_direct_func(builder.func, *function_index, environ)?; + + // Bitcast any vector arguments to their default type, I8X16, before calling. + let callee_signature = + &builder.func.dfg.signatures[builder.func.dfg.ext_funcs[fref].signature]; + let args = state.peekn_mut(num_args); + let types = wasm_param_types(&callee_signature.params, |i| { + environ.is_wasm_parameter(&callee_signature, i) + }); + bitcast_arguments(args, &types, builder); + + let call = environ.translate_call( + builder.cursor(), + FuncIndex::from_u32(*function_index), + fref, + args, + )?; + let inst_results = builder.inst_results(call); + debug_assert_eq!( + inst_results.len(), + builder.func.dfg.signatures[builder.func.dfg.ext_funcs[fref].signature] + .returns + .len(), + "translate_call results should match the call signature" + ); + state.popn(num_args); + state.pushn(inst_results); + } + Operator::CallIndirect { index, table_index } => { + // `index` is the index of the function's signature and `table_index` is the index of + // the table to search the function in. + let (sigref, num_args) = state.get_indirect_sig(builder.func, *index, environ)?; + let table = state.get_table(builder.func, *table_index, environ)?; + let callee = state.pop1(); + + // Bitcast any vector arguments to their default type, I8X16, before calling. + let callee_signature = &builder.func.dfg.signatures[sigref]; + let args = state.peekn_mut(num_args); + let types = wasm_param_types(&callee_signature.params, |i| { + environ.is_wasm_parameter(&callee_signature, i) + }); + bitcast_arguments(args, &types, builder); + + let call = environ.translate_call_indirect( + builder.cursor(), + TableIndex::from_u32(*table_index), + table, + SignatureIndex::from_u32(*index), + sigref, + callee, + state.peekn(num_args), + )?; + let inst_results = builder.inst_results(call); + debug_assert_eq!( + inst_results.len(), + builder.func.dfg.signatures[sigref].returns.len(), + "translate_call_indirect results should match the call signature" + ); + state.popn(num_args); + state.pushn(inst_results); + } + /******************************* Memory management *********************************** + * Memory management is handled by environment. It is usually translated into calls to + * special functions. + ************************************************************************************/ + Operator::MemoryGrow { reserved } => { + // The WebAssembly MVP only supports one linear memory, but we expect the reserved + // argument to be a memory index. + let heap_index = MemoryIndex::from_u32(*reserved); + let heap = state.get_heap(builder.func, *reserved, environ)?; + let val = state.pop1(); + state.push1(environ.translate_memory_grow(builder.cursor(), heap_index, heap, val)?) + } + Operator::MemorySize { reserved } => { + let heap_index = MemoryIndex::from_u32(*reserved); + let heap = state.get_heap(builder.func, *reserved, environ)?; + state.push1(environ.translate_memory_size(builder.cursor(), heap_index, heap)?); + } + /******************************* Load instructions *********************************** + * Wasm specifies an integer alignment flag but we drop it in Cranelift. + * The memory base address is provided by the environment. + ************************************************************************************/ + Operator::I32Load8U { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Uload8, I32, builder, state, environ)?; + } + Operator::I32Load16U { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Uload16, I32, builder, state, environ)?; + } + Operator::I32Load8S { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Sload8, I32, builder, state, environ)?; + } + Operator::I32Load16S { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Sload16, I32, builder, state, environ)?; + } + Operator::I64Load8U { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Uload8, I64, builder, state, environ)?; + } + Operator::I64Load16U { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Uload16, I64, builder, state, environ)?; + } + Operator::I64Load8S { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Sload8, I64, builder, state, environ)?; + } + Operator::I64Load16S { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Sload16, I64, builder, state, environ)?; + } + Operator::I64Load32S { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Sload32, I64, builder, state, environ)?; + } + Operator::I64Load32U { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Uload32, I64, builder, state, environ)?; + } + Operator::I32Load { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Load, I32, builder, state, environ)?; + } + Operator::F32Load { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Load, F32, builder, state, environ)?; + } + Operator::I64Load { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Load, I64, builder, state, environ)?; + } + Operator::F64Load { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Load, F64, builder, state, environ)?; + } + Operator::V128Load { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_load(*offset, ir::Opcode::Load, I8X16, builder, state, environ)?; + } + /****************************** Store instructions *********************************** + * Wasm specifies an integer alignment flag but we drop it in Cranelift. + * The memory base address is provided by the environment. + ************************************************************************************/ + Operator::I32Store { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::I64Store { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::F32Store { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::F64Store { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_store(*offset, ir::Opcode::Store, builder, state, environ)?; + } + Operator::I32Store8 { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::I64Store8 { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_store(*offset, ir::Opcode::Istore8, builder, state, environ)?; + } + Operator::I32Store16 { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::I64Store16 { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_store(*offset, ir::Opcode::Istore16, builder, state, environ)?; + } + Operator::I64Store32 { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_store(*offset, ir::Opcode::Istore32, builder, state, environ)?; + } + Operator::V128Store { + memarg: MemoryImmediate { flags: _, offset }, + } => { + translate_store(*offset, ir::Opcode::Store, builder, state, environ)?; + } + /****************************** Nullary Operators ************************************/ + Operator::I32Const { value } => state.push1(builder.ins().iconst(I32, i64::from(*value))), + Operator::I64Const { value } => state.push1(builder.ins().iconst(I64, *value)), + Operator::F32Const { value } => { + state.push1(builder.ins().f32const(f32_translation(*value))); + } + Operator::F64Const { value } => { + state.push1(builder.ins().f64const(f64_translation(*value))); + } + /******************************* Unary Operators *************************************/ + Operator::I32Clz | Operator::I64Clz => { + let arg = state.pop1(); + state.push1(builder.ins().clz(arg)); + } + Operator::I32Ctz | Operator::I64Ctz => { + let arg = state.pop1(); + state.push1(builder.ins().ctz(arg)); + } + Operator::I32Popcnt | Operator::I64Popcnt => { + let arg = state.pop1(); + state.push1(builder.ins().popcnt(arg)); + } + Operator::I64ExtendI32S => { + let val = state.pop1(); + state.push1(builder.ins().sextend(I64, val)); + } + Operator::I64ExtendI32U => { + let val = state.pop1(); + state.push1(builder.ins().uextend(I64, val)); + } + Operator::I32WrapI64 => { + let val = state.pop1(); + state.push1(builder.ins().ireduce(I32, val)); + } + Operator::F32Sqrt | Operator::F64Sqrt => { + let arg = state.pop1(); + state.push1(builder.ins().sqrt(arg)); + } + Operator::F32Ceil | Operator::F64Ceil => { + let arg = state.pop1(); + state.push1(builder.ins().ceil(arg)); + } + Operator::F32Floor | Operator::F64Floor => { + let arg = state.pop1(); + state.push1(builder.ins().floor(arg)); + } + Operator::F32Trunc | Operator::F64Trunc => { + let arg = state.pop1(); + state.push1(builder.ins().trunc(arg)); + } + Operator::F32Nearest | Operator::F64Nearest => { + let arg = state.pop1(); + state.push1(builder.ins().nearest(arg)); + } + Operator::F32Abs | Operator::F64Abs => { + let val = state.pop1(); + state.push1(builder.ins().fabs(val)); + } + Operator::F32Neg | Operator::F64Neg => { + let arg = state.pop1(); + state.push1(builder.ins().fneg(arg)); + } + Operator::F64ConvertI64U | Operator::F64ConvertI32U => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_from_uint(F64, val)); + } + Operator::F64ConvertI64S | Operator::F64ConvertI32S => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_from_sint(F64, val)); + } + Operator::F32ConvertI64S | Operator::F32ConvertI32S => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_from_sint(F32, val)); + } + Operator::F32ConvertI64U | Operator::F32ConvertI32U => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_from_uint(F32, val)); + } + Operator::F64PromoteF32 => { + let val = state.pop1(); + state.push1(builder.ins().fpromote(F64, val)); + } + Operator::F32DemoteF64 => { + let val = state.pop1(); + state.push1(builder.ins().fdemote(F32, val)); + } + Operator::I64TruncF64S | Operator::I64TruncF32S => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_sint(I64, val)); + } + Operator::I32TruncF64S | Operator::I32TruncF32S => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_sint(I32, val)); + } + Operator::I64TruncF64U | Operator::I64TruncF32U => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_uint(I64, val)); + } + Operator::I32TruncF64U | Operator::I32TruncF32U => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_uint(I32, val)); + } + Operator::I64TruncSatF64S | Operator::I64TruncSatF32S => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_sint_sat(I64, val)); + } + Operator::I32TruncSatF64S | Operator::I32TruncSatF32S => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_sint_sat(I32, val)); + } + Operator::I64TruncSatF64U | Operator::I64TruncSatF32U => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_uint_sat(I64, val)); + } + Operator::I32TruncSatF64U | Operator::I32TruncSatF32U => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_uint_sat(I32, val)); + } + Operator::F32ReinterpretI32 => { + let val = state.pop1(); + state.push1(builder.ins().bitcast(F32, val)); + } + Operator::F64ReinterpretI64 => { + let val = state.pop1(); + state.push1(builder.ins().bitcast(F64, val)); + } + Operator::I32ReinterpretF32 => { + let val = state.pop1(); + state.push1(builder.ins().bitcast(I32, val)); + } + Operator::I64ReinterpretF64 => { + let val = state.pop1(); + state.push1(builder.ins().bitcast(I64, val)); + } + Operator::I32Extend8S => { + let val = state.pop1(); + state.push1(builder.ins().ireduce(I8, val)); + let val = state.pop1(); + state.push1(builder.ins().sextend(I32, val)); + } + Operator::I32Extend16S => { + let val = state.pop1(); + state.push1(builder.ins().ireduce(I16, val)); + let val = state.pop1(); + state.push1(builder.ins().sextend(I32, val)); + } + Operator::I64Extend8S => { + let val = state.pop1(); + state.push1(builder.ins().ireduce(I8, val)); + let val = state.pop1(); + state.push1(builder.ins().sextend(I64, val)); + } + Operator::I64Extend16S => { + let val = state.pop1(); + state.push1(builder.ins().ireduce(I16, val)); + let val = state.pop1(); + state.push1(builder.ins().sextend(I64, val)); + } + Operator::I64Extend32S => { + let val = state.pop1(); + state.push1(builder.ins().ireduce(I32, val)); + let val = state.pop1(); + state.push1(builder.ins().sextend(I64, val)); + } + /****************************** Binary Operators ************************************/ + Operator::I32Add | Operator::I64Add => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().iadd(arg1, arg2)); + } + Operator::I32And | Operator::I64And | Operator::V128And => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().band(arg1, arg2)); + } + Operator::I32Or | Operator::I64Or | Operator::V128Or => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().bor(arg1, arg2)); + } + Operator::I32Xor | Operator::I64Xor | Operator::V128Xor => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().bxor(arg1, arg2)); + } + Operator::I32Shl | Operator::I64Shl => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().ishl(arg1, arg2)); + } + Operator::I32ShrS | Operator::I64ShrS => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().sshr(arg1, arg2)); + } + Operator::I32ShrU | Operator::I64ShrU => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().ushr(arg1, arg2)); + } + Operator::I32Rotl | Operator::I64Rotl => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().rotl(arg1, arg2)); + } + Operator::I32Rotr | Operator::I64Rotr => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().rotr(arg1, arg2)); + } + Operator::F32Add | Operator::F64Add => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fadd(arg1, arg2)); + } + Operator::I32Sub | Operator::I64Sub => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().isub(arg1, arg2)); + } + Operator::F32Sub | Operator::F64Sub => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fsub(arg1, arg2)); + } + Operator::I32Mul | Operator::I64Mul => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().imul(arg1, arg2)); + } + Operator::F32Mul | Operator::F64Mul => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fmul(arg1, arg2)); + } + Operator::F32Div | Operator::F64Div => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fdiv(arg1, arg2)); + } + Operator::I32DivS | Operator::I64DivS => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().sdiv(arg1, arg2)); + } + Operator::I32DivU | Operator::I64DivU => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().udiv(arg1, arg2)); + } + Operator::I32RemS | Operator::I64RemS => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().srem(arg1, arg2)); + } + Operator::I32RemU | Operator::I64RemU => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().urem(arg1, arg2)); + } + Operator::F32Min | Operator::F64Min => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fmin(arg1, arg2)); + } + Operator::F32Max | Operator::F64Max => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fmax(arg1, arg2)); + } + Operator::F32Copysign | Operator::F64Copysign => { + let (arg1, arg2) = state.pop2(); + state.push1(builder.ins().fcopysign(arg1, arg2)); + } + /**************************** Comparison Operators **********************************/ + Operator::I32LtS | Operator::I64LtS => { + translate_icmp(IntCC::SignedLessThan, builder, state) + } + Operator::I32LtU | Operator::I64LtU => { + translate_icmp(IntCC::UnsignedLessThan, builder, state) + } + Operator::I32LeS | Operator::I64LeS => { + translate_icmp(IntCC::SignedLessThanOrEqual, builder, state) + } + Operator::I32LeU | Operator::I64LeU => { + translate_icmp(IntCC::UnsignedLessThanOrEqual, builder, state) + } + Operator::I32GtS | Operator::I64GtS => { + translate_icmp(IntCC::SignedGreaterThan, builder, state) + } + Operator::I32GtU | Operator::I64GtU => { + translate_icmp(IntCC::UnsignedGreaterThan, builder, state) + } + Operator::I32GeS | Operator::I64GeS => { + translate_icmp(IntCC::SignedGreaterThanOrEqual, builder, state) + } + Operator::I32GeU | Operator::I64GeU => { + translate_icmp(IntCC::UnsignedGreaterThanOrEqual, builder, state) + } + Operator::I32Eqz | Operator::I64Eqz => { + let arg = state.pop1(); + let val = builder.ins().icmp_imm(IntCC::Equal, arg, 0); + state.push1(builder.ins().bint(I32, val)); + } + Operator::I32Eq | Operator::I64Eq => translate_icmp(IntCC::Equal, builder, state), + Operator::F32Eq | Operator::F64Eq => translate_fcmp(FloatCC::Equal, builder, state), + Operator::I32Ne | Operator::I64Ne => translate_icmp(IntCC::NotEqual, builder, state), + Operator::F32Ne | Operator::F64Ne => translate_fcmp(FloatCC::NotEqual, builder, state), + Operator::F32Gt | Operator::F64Gt => translate_fcmp(FloatCC::GreaterThan, builder, state), + Operator::F32Ge | Operator::F64Ge => { + translate_fcmp(FloatCC::GreaterThanOrEqual, builder, state) + } + Operator::F32Lt | Operator::F64Lt => translate_fcmp(FloatCC::LessThan, builder, state), + Operator::F32Le | Operator::F64Le => { + translate_fcmp(FloatCC::LessThanOrEqual, builder, state) + } + Operator::RefNull => state.push1(builder.ins().null(environ.reference_type())), + Operator::RefIsNull => { + let arg = state.pop1(); + let val = builder.ins().is_null(arg); + let val_int = builder.ins().bint(I32, val); + state.push1(val_int); + } + Operator::RefFunc { function_index } => { + state.push1(environ.translate_ref_func(builder.cursor(), *function_index)?); + } + Operator::AtomicNotify { .. } + | Operator::I32AtomicWait { .. } + | Operator::I64AtomicWait { .. } + | Operator::I32AtomicLoad { .. } + | Operator::I64AtomicLoad { .. } + | Operator::I32AtomicLoad8U { .. } + | Operator::I32AtomicLoad16U { .. } + | Operator::I64AtomicLoad8U { .. } + | Operator::I64AtomicLoad16U { .. } + | Operator::I64AtomicLoad32U { .. } + | Operator::I32AtomicStore { .. } + | Operator::I64AtomicStore { .. } + | Operator::I32AtomicStore8 { .. } + | Operator::I32AtomicStore16 { .. } + | Operator::I64AtomicStore8 { .. } + | Operator::I64AtomicStore16 { .. } + | Operator::I64AtomicStore32 { .. } + | Operator::I32AtomicRmwAdd { .. } + | Operator::I64AtomicRmwAdd { .. } + | Operator::I32AtomicRmw8AddU { .. } + | Operator::I32AtomicRmw16AddU { .. } + | Operator::I64AtomicRmw8AddU { .. } + | Operator::I64AtomicRmw16AddU { .. } + | Operator::I64AtomicRmw32AddU { .. } + | Operator::I32AtomicRmwSub { .. } + | Operator::I64AtomicRmwSub { .. } + | Operator::I32AtomicRmw8SubU { .. } + | Operator::I32AtomicRmw16SubU { .. } + | Operator::I64AtomicRmw8SubU { .. } + | Operator::I64AtomicRmw16SubU { .. } + | Operator::I64AtomicRmw32SubU { .. } + | Operator::I32AtomicRmwAnd { .. } + | Operator::I64AtomicRmwAnd { .. } + | Operator::I32AtomicRmw8AndU { .. } + | Operator::I32AtomicRmw16AndU { .. } + | Operator::I64AtomicRmw8AndU { .. } + | Operator::I64AtomicRmw16AndU { .. } + | Operator::I64AtomicRmw32AndU { .. } + | Operator::I32AtomicRmwOr { .. } + | Operator::I64AtomicRmwOr { .. } + | Operator::I32AtomicRmw8OrU { .. } + | Operator::I32AtomicRmw16OrU { .. } + | Operator::I64AtomicRmw8OrU { .. } + | Operator::I64AtomicRmw16OrU { .. } + | Operator::I64AtomicRmw32OrU { .. } + | Operator::I32AtomicRmwXor { .. } + | Operator::I64AtomicRmwXor { .. } + | Operator::I32AtomicRmw8XorU { .. } + | Operator::I32AtomicRmw16XorU { .. } + | Operator::I64AtomicRmw8XorU { .. } + | Operator::I64AtomicRmw16XorU { .. } + | Operator::I64AtomicRmw32XorU { .. } + | Operator::I32AtomicRmwXchg { .. } + | Operator::I64AtomicRmwXchg { .. } + | Operator::I32AtomicRmw8XchgU { .. } + | Operator::I32AtomicRmw16XchgU { .. } + | Operator::I64AtomicRmw8XchgU { .. } + | Operator::I64AtomicRmw16XchgU { .. } + | Operator::I64AtomicRmw32XchgU { .. } + | Operator::I32AtomicRmwCmpxchg { .. } + | Operator::I64AtomicRmwCmpxchg { .. } + | Operator::I32AtomicRmw8CmpxchgU { .. } + | Operator::I32AtomicRmw16CmpxchgU { .. } + | Operator::I64AtomicRmw8CmpxchgU { .. } + | Operator::I64AtomicRmw16CmpxchgU { .. } + | Operator::I64AtomicRmw32CmpxchgU { .. } + | Operator::AtomicFence { .. } => { + return Err(wasm_unsupported!("proposed thread operator {:?}", op)); + } + Operator::MemoryCopy => { + // The WebAssembly MVP only supports one linear memory and + // wasmparser will ensure that the memory indices specified are + // zero. + let heap_index = MemoryIndex::from_u32(0); + let heap = state.get_heap(builder.func, 0, environ)?; + let len = state.pop1(); + let src = state.pop1(); + let dest = state.pop1(); + environ.translate_memory_copy(builder.cursor(), heap_index, heap, dest, src, len)?; + } + Operator::MemoryFill => { + // The WebAssembly MVP only supports one linear memory and + // wasmparser will ensure that the memory index specified is + // zero. + let heap_index = MemoryIndex::from_u32(0); + let heap = state.get_heap(builder.func, 0, environ)?; + let len = state.pop1(); + let val = state.pop1(); + let dest = state.pop1(); + environ.translate_memory_fill(builder.cursor(), heap_index, heap, dest, val, len)?; + } + Operator::MemoryInit { segment } => { + // The WebAssembly MVP only supports one linear memory and + // wasmparser will ensure that the memory index specified is + // zero. + let heap_index = MemoryIndex::from_u32(0); + let heap = state.get_heap(builder.func, 0, environ)?; + let len = state.pop1(); + let src = state.pop1(); + let dest = state.pop1(); + environ.translate_memory_init( + builder.cursor(), + heap_index, + heap, + *segment, + dest, + src, + len, + )?; + } + Operator::DataDrop { segment } => { + environ.translate_data_drop(builder.cursor(), *segment)?; + } + Operator::TableSize { table: index } => { + let table = state.get_table(builder.func, *index, environ)?; + state.push1(environ.translate_table_size( + builder.cursor(), + TableIndex::from_u32(*index), + table, + )?); + } + Operator::TableGrow { table } => { + let delta = state.pop1(); + let init_value = state.pop1(); + state.push1(environ.translate_table_grow( + builder.cursor(), + *table, + delta, + init_value, + )?); + } + Operator::TableGet { table } => { + let index = state.pop1(); + state.push1(environ.translate_table_get(builder.cursor(), *table, index)?); + } + Operator::TableSet { table } => { + let value = state.pop1(); + let index = state.pop1(); + environ.translate_table_set(builder.cursor(), *table, value, index)?; + } + Operator::TableCopy { + dst_table: dst_table_index, + src_table: src_table_index, + } => { + let dst_table = state.get_table(builder.func, *dst_table_index, environ)?; + let src_table = state.get_table(builder.func, *src_table_index, environ)?; + let len = state.pop1(); + let src = state.pop1(); + let dest = state.pop1(); + environ.translate_table_copy( + builder.cursor(), + TableIndex::from_u32(*dst_table_index), + dst_table, + TableIndex::from_u32(*src_table_index), + src_table, + dest, + src, + len, + )?; + } + Operator::TableFill { table } => { + let len = state.pop1(); + let val = state.pop1(); + let dest = state.pop1(); + environ.translate_table_fill(builder.cursor(), *table, dest, val, len)?; + } + Operator::TableInit { + segment, + table: table_index, + } => { + let table = state.get_table(builder.func, *table_index, environ)?; + let len = state.pop1(); + let src = state.pop1(); + let dest = state.pop1(); + environ.translate_table_init( + builder.cursor(), + *segment, + TableIndex::from_u32(*table_index), + table, + dest, + src, + len, + )?; + } + Operator::ElemDrop { segment } => { + environ.translate_elem_drop(builder.cursor(), *segment)?; + } + Operator::V128Const { value } => { + let data = value.bytes().to_vec().into(); + let handle = builder.func.dfg.constants.insert(data); + let value = builder.ins().vconst(I8X16, handle); + // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type before use + state.push1(value) + } + Operator::I8x16Splat | Operator::I16x8Splat => { + let reduced = builder.ins().ireduce(type_of(op).lane_type(), state.pop1()); + let splatted = builder.ins().splat(type_of(op), reduced); + state.push1(splatted) + } + Operator::I32x4Splat + | Operator::I64x2Splat + | Operator::F32x4Splat + | Operator::F64x2Splat => { + let splatted = builder.ins().splat(type_of(op), state.pop1()); + state.push1(splatted) + } + Operator::V8x16LoadSplat { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::V16x8LoadSplat { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::V32x4LoadSplat { + memarg: MemoryImmediate { flags: _, offset }, + } + | Operator::V64x2LoadSplat { + memarg: MemoryImmediate { flags: _, offset }, + } => { + // TODO: For spec compliance, this is initially implemented as a combination of `load + + // splat` but could be implemented eventually as a single instruction (`load_splat`). + // See https://github.com/bytecodealliance/cranelift/issues/1348. + translate_load( + *offset, + ir::Opcode::Load, + type_of(op).lane_type(), + builder, + state, + environ, + )?; + let splatted = builder.ins().splat(type_of(op), state.pop1()); + state.push1(splatted) + } + Operator::I8x16ExtractLaneS { lane } | Operator::I16x8ExtractLaneS { lane } => { + let vector = pop1_with_bitcast(state, type_of(op), builder); + let extracted = builder.ins().extractlane(vector, lane.clone()); + state.push1(builder.ins().sextend(I32, extracted)) + } + Operator::I8x16ExtractLaneU { lane } | Operator::I16x8ExtractLaneU { lane } => { + let vector = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().extractlane(vector, lane.clone())); + // on x86, PEXTRB zeroes the upper bits of the destination register of extractlane so uextend is elided; of course, this depends on extractlane being legalized to a PEXTRB + } + Operator::I32x4ExtractLane { lane } + | Operator::I64x2ExtractLane { lane } + | Operator::F32x4ExtractLane { lane } + | Operator::F64x2ExtractLane { lane } => { + let vector = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().extractlane(vector, lane.clone())) + } + Operator::I8x16ReplaceLane { lane } + | Operator::I16x8ReplaceLane { lane } + | Operator::I32x4ReplaceLane { lane } + | Operator::I64x2ReplaceLane { lane } + | Operator::F32x4ReplaceLane { lane } + | Operator::F64x2ReplaceLane { lane } => { + let (vector, replacement_value) = state.pop2(); + let original_vector_type = builder.func.dfg.value_type(vector); + let vector = optionally_bitcast_vector(vector, type_of(op), builder); + let replaced_vector = builder + .ins() + .insertlane(vector, lane.clone(), replacement_value); + state.push1(optionally_bitcast_vector( + replaced_vector, + original_vector_type, + builder, + )) + } + Operator::V8x16Shuffle { lanes, .. } => { + let (a, b) = pop2_with_bitcast(state, I8X16, builder); + let lanes = ConstantData::from(lanes.as_ref()); + let mask = builder.func.dfg.immediates.push(lanes); + let shuffled = builder.ins().shuffle(a, b, mask); + state.push1(shuffled) + // At this point the original types of a and b are lost; users of this value (i.e. this + // WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due + // to WASM using the less specific v128 type for certain operations and more specific + // types (e.g. i8x16) for others. + } + Operator::I8x16Add | Operator::I16x8Add | Operator::I32x4Add | Operator::I64x2Add => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().iadd(a, b)) + } + Operator::I8x16AddSaturateS | Operator::I16x8AddSaturateS => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().sadd_sat(a, b)) + } + Operator::I8x16AddSaturateU | Operator::I16x8AddSaturateU => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().uadd_sat(a, b)) + } + Operator::I8x16Sub | Operator::I16x8Sub | Operator::I32x4Sub | Operator::I64x2Sub => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().isub(a, b)) + } + Operator::I8x16SubSaturateS | Operator::I16x8SubSaturateS => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().ssub_sat(a, b)) + } + Operator::I8x16SubSaturateU | Operator::I16x8SubSaturateU => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().usub_sat(a, b)) + } + Operator::I8x16MinS | Operator::I16x8MinS | Operator::I32x4MinS => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().imin(a, b)) + } + Operator::I8x16MinU | Operator::I16x8MinU | Operator::I32x4MinU => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().umin(a, b)) + } + Operator::I8x16MaxS | Operator::I16x8MaxS | Operator::I32x4MaxS => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().imax(a, b)) + } + Operator::I8x16MaxU | Operator::I16x8MaxU | Operator::I32x4MaxU => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().umax(a, b)) + } + Operator::I8x16RoundingAverageU | Operator::I16x8RoundingAverageU => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().avg_round(a, b)) + } + Operator::I8x16Neg | Operator::I16x8Neg | Operator::I32x4Neg | Operator::I64x2Neg => { + let a = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().ineg(a)) + } + Operator::I16x8Mul | Operator::I32x4Mul => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().imul(a, b)) + } + Operator::V128AndNot => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().band_not(a, b)) + } + Operator::V128Not => { + let a = state.pop1(); + state.push1(builder.ins().bnot(a)); + } + Operator::I16x8Shl | Operator::I32x4Shl | Operator::I64x2Shl => { + let (a, b) = state.pop2(); + let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder); + let bitwidth = i64::from(builder.func.dfg.value_type(a).bits()); + // The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width + // we do `b AND 15`; this means fewer instructions than `iconst + urem`. + let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1); + state.push1(builder.ins().ishl(bitcast_a, b_mod_bitwidth)) + } + Operator::I16x8ShrU | Operator::I32x4ShrU | Operator::I64x2ShrU => { + let (a, b) = state.pop2(); + let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder); + let bitwidth = i64::from(builder.func.dfg.value_type(a).bits()); + // The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width + // we do `b AND 15`; this means fewer instructions than `iconst + urem`. + let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1); + state.push1(builder.ins().ushr(bitcast_a, b_mod_bitwidth)) + } + Operator::I16x8ShrS | Operator::I32x4ShrS => { + let (a, b) = state.pop2(); + let bitcast_a = optionally_bitcast_vector(a, type_of(op), builder); + let bitwidth = i64::from(builder.func.dfg.value_type(a).bits()); + // The spec expects to shift with `b mod lanewidth`; so, e.g., for 16 bit lane-width + // we do `b AND 15`; this means fewer instructions than `iconst + urem`. + let b_mod_bitwidth = builder.ins().band_imm(b, bitwidth - 1); + state.push1(builder.ins().sshr(bitcast_a, b_mod_bitwidth)) + } + Operator::V128Bitselect => { + let (a, b, c) = state.pop3(); + let bitcast_a = optionally_bitcast_vector(a, I8X16, builder); + let bitcast_b = optionally_bitcast_vector(b, I8X16, builder); + let bitcast_c = optionally_bitcast_vector(c, I8X16, builder); + // The CLIF operand ordering is slightly different and the types of all three + // operands must match (hence the bitcast). + state.push1(builder.ins().bitselect(bitcast_c, bitcast_a, bitcast_b)) + } + Operator::I8x16AnyTrue + | Operator::I16x8AnyTrue + | Operator::I32x4AnyTrue + | Operator::I64x2AnyTrue => { + let a = pop1_with_bitcast(state, type_of(op), builder); + let bool_result = builder.ins().vany_true(a); + state.push1(builder.ins().bint(I32, bool_result)) + } + Operator::I8x16AllTrue + | Operator::I16x8AllTrue + | Operator::I32x4AllTrue + | Operator::I64x2AllTrue => { + let a = pop1_with_bitcast(state, type_of(op), builder); + let bool_result = builder.ins().vall_true(a); + state.push1(builder.ins().bint(I32, bool_result)) + } + Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq => { + translate_vector_icmp(IntCC::Equal, type_of(op), builder, state) + } + Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne => { + translate_vector_icmp(IntCC::NotEqual, type_of(op), builder, state) + } + Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS => { + translate_vector_icmp(IntCC::SignedGreaterThan, type_of(op), builder, state) + } + Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS => { + translate_vector_icmp(IntCC::SignedLessThan, type_of(op), builder, state) + } + Operator::I8x16GtU | Operator::I16x8GtU | Operator::I32x4GtU => { + translate_vector_icmp(IntCC::UnsignedGreaterThan, type_of(op), builder, state) + } + Operator::I8x16LtU | Operator::I16x8LtU | Operator::I32x4LtU => { + translate_vector_icmp(IntCC::UnsignedLessThan, type_of(op), builder, state) + } + Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS => { + translate_vector_icmp(IntCC::SignedGreaterThanOrEqual, type_of(op), builder, state) + } + Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS => { + translate_vector_icmp(IntCC::SignedLessThanOrEqual, type_of(op), builder, state) + } + Operator::I8x16GeU | Operator::I16x8GeU | Operator::I32x4GeU => translate_vector_icmp( + IntCC::UnsignedGreaterThanOrEqual, + type_of(op), + builder, + state, + ), + Operator::I8x16LeU | Operator::I16x8LeU | Operator::I32x4LeU => { + translate_vector_icmp(IntCC::UnsignedLessThanOrEqual, type_of(op), builder, state) + } + Operator::F32x4Eq | Operator::F64x2Eq => { + translate_vector_fcmp(FloatCC::Equal, type_of(op), builder, state) + } + Operator::F32x4Ne | Operator::F64x2Ne => { + translate_vector_fcmp(FloatCC::NotEqual, type_of(op), builder, state) + } + Operator::F32x4Lt | Operator::F64x2Lt => { + translate_vector_fcmp(FloatCC::LessThan, type_of(op), builder, state) + } + Operator::F32x4Gt | Operator::F64x2Gt => { + translate_vector_fcmp(FloatCC::GreaterThan, type_of(op), builder, state) + } + Operator::F32x4Le | Operator::F64x2Le => { + translate_vector_fcmp(FloatCC::LessThanOrEqual, type_of(op), builder, state) + } + Operator::F32x4Ge | Operator::F64x2Ge => { + translate_vector_fcmp(FloatCC::GreaterThanOrEqual, type_of(op), builder, state) + } + Operator::F32x4Add | Operator::F64x2Add => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fadd(a, b)) + } + Operator::F32x4Sub | Operator::F64x2Sub => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fsub(a, b)) + } + Operator::F32x4Mul | Operator::F64x2Mul => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fmul(a, b)) + } + Operator::F32x4Div | Operator::F64x2Div => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fdiv(a, b)) + } + Operator::F32x4Max | Operator::F64x2Max => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fmax(a, b)) + } + Operator::F32x4Min | Operator::F64x2Min => { + let (a, b) = pop2_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fmin(a, b)) + } + Operator::F32x4Sqrt | Operator::F64x2Sqrt => { + let a = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().sqrt(a)) + } + Operator::F32x4Neg | Operator::F64x2Neg => { + let a = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fneg(a)) + } + Operator::F32x4Abs | Operator::F64x2Abs => { + let a = pop1_with_bitcast(state, type_of(op), builder); + state.push1(builder.ins().fabs(a)) + } + Operator::I8x16Shl + | Operator::I8x16ShrS + | Operator::I8x16ShrU + | Operator::I8x16Mul + | Operator::I64x2Mul + | Operator::I64x2ShrS + | Operator::I32x4TruncSatF32x4S + | Operator::I32x4TruncSatF32x4U + | Operator::I64x2TruncSatF64x2S + | Operator::I64x2TruncSatF64x2U + | Operator::F32x4ConvertI32x4S + | Operator::F32x4ConvertI32x4U + | Operator::F64x2ConvertI64x2S + | Operator::F64x2ConvertI64x2U { .. } + | Operator::I8x16NarrowI16x8S { .. } + | Operator::I8x16NarrowI16x8U { .. } + | Operator::I16x8NarrowI32x4S { .. } + | Operator::I16x8NarrowI32x4U { .. } + | Operator::I16x8WidenLowI8x16S { .. } + | Operator::I16x8WidenHighI8x16S { .. } + | Operator::I16x8WidenLowI8x16U { .. } + | Operator::I16x8WidenHighI8x16U { .. } + | Operator::I32x4WidenLowI16x8S { .. } + | Operator::I32x4WidenHighI16x8S { .. } + | Operator::I32x4WidenLowI16x8U { .. } + | Operator::I32x4WidenHighI16x8U { .. } + | Operator::V8x16Swizzle + | Operator::I16x8Load8x8S { .. } + | Operator::I16x8Load8x8U { .. } + | Operator::I32x4Load16x4S { .. } + | Operator::I32x4Load16x4U { .. } + | Operator::I64x2Load32x2S { .. } + | Operator::I64x2Load32x2U { .. } => { + return Err(wasm_unsupported!("proposed SIMD operator {:?}", op)); + } + }; + Ok(()) +} + +// Clippy warns us of some fields we are deliberately ignoring +#[cfg_attr(feature = "cargo-clippy", allow(clippy::unneeded_field_pattern))] +/// Deals with a Wasm instruction located in an unreachable portion of the code. Most of them +/// are dropped but special ones like `End` or `Else` signal the potential end of the unreachable +/// portion so the translation state must be updated accordingly. +fn translate_unreachable_operator( + module_translation_state: &ModuleTranslationState, + op: &Operator, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + debug_assert!(!state.reachable); + match *op { + Operator::If { ty } => { + // Push a placeholder control stack entry. The if isn't reachable, + // so we don't have any branches anywhere. + state.push_if( + ir::Block::reserved_value(), + ElseData::NoElse { + branch_inst: ir::Inst::reserved_value(), + }, + 0, + 0, + ty, + ); + } + Operator::Loop { ty: _ } | Operator::Block { ty: _ } => { + state.push_block(ir::Block::reserved_value(), 0, 0); + } + Operator::Else => { + let i = state.control_stack.len() - 1; + match state.control_stack[i] { + ControlStackFrame::If { + ref else_data, + head_is_reachable, + ref mut consequent_ends_reachable, + blocktype, + .. + } => { + debug_assert!(consequent_ends_reachable.is_none()); + *consequent_ends_reachable = Some(state.reachable); + + if head_is_reachable { + // We have a branch from the head of the `if` to the `else`. + state.reachable = true; + + let else_block = match *else_data { + ElseData::NoElse { branch_inst } => { + let (params, _results) = + blocktype_params_results(module_translation_state, blocktype)?; + let else_block = block_with_params(builder, params, environ)?; + + // We change the target of the branch instruction. + builder.change_jump_destination(branch_inst, else_block); + builder.seal_block(else_block); + else_block + } + ElseData::WithElse { else_block } => else_block, + }; + + builder.switch_to_block(else_block); + + // Again, no need to push the parameters for the `else`, + // since we already did when we saw the original `if`. See + // the comment for translating `Operator::Else` in + // `translate_operator` for details. + } + } + _ => unreachable!(), + } + } + Operator::End => { + let stack = &mut state.stack; + let control_stack = &mut state.control_stack; + let frame = control_stack.pop().unwrap(); + + // Now we have to split off the stack the values not used + // by unreachable code that hasn't been translated + stack.truncate(frame.original_stack_size()); + + let reachable_anyway = match frame { + // If it is a loop we also have to seal the body loop block + ControlStackFrame::Loop { header, .. } => { + builder.seal_block(header); + // And loops can't have branches to the end. + false + } + // If we never set `consequent_ends_reachable` then that means + // we are finishing the consequent now, and there was no + // `else`. Whether the following block is reachable depends only + // on if the head was reachable. + ControlStackFrame::If { + head_is_reachable, + consequent_ends_reachable: None, + .. + } => head_is_reachable, + // Since we are only in this function when in unreachable code, + // we know that the alternative just ended unreachable. Whether + // the following block is reachable depends on if the consequent + // ended reachable or not. + ControlStackFrame::If { + head_is_reachable, + consequent_ends_reachable: Some(consequent_ends_reachable), + .. + } => head_is_reachable && consequent_ends_reachable, + // All other control constructs are already handled. + _ => false, + }; + + if frame.exit_is_branched_to() || reachable_anyway { + builder.switch_to_block(frame.following_code()); + builder.seal_block(frame.following_code()); + + // And add the return values of the block but only if the next block is reachable + // (which corresponds to testing if the stack depth is 1) + stack.extend_from_slice(builder.block_params(frame.following_code())); + state.reachable = true; + } + } + _ => { + // We don't translate because this is unreachable code + } + } + + Ok(()) +} + +/// Get the address+offset to use for a heap access. +fn get_heap_addr( + heap: ir::Heap, + addr32: ir::Value, + offset: u32, + addr_ty: Type, + builder: &mut FunctionBuilder, +) -> (ir::Value, i32) { + use core::cmp::min; + + let mut adjusted_offset = u64::from(offset); + let offset_guard_size: u64 = builder.func.heaps[heap].offset_guard_size.into(); + + // Generate `heap_addr` instructions that are friendly to CSE by checking offsets that are + // multiples of the offset-guard size. Add one to make sure that we check the pointer itself + // is in bounds. + if offset_guard_size != 0 { + adjusted_offset = adjusted_offset / offset_guard_size * offset_guard_size; + } + + // For accesses on the outer skirts of the offset-guard pages, we expect that we get a trap + // even if the access goes beyond the offset-guard pages. This is because the first byte + // pointed to is inside the offset-guard pages. + let check_size = min(u64::from(u32::MAX), 1 + adjusted_offset) as u32; + let base = builder.ins().heap_addr(addr_ty, heap, addr32, check_size); + + // Native load/store instructions take a signed `Offset32` immediate, so adjust the base + // pointer if necessary. + if offset > i32::MAX as u32 { + // Offset doesn't fit in the load/store instruction. + let adj = builder.ins().iadd_imm(base, i64::from(i32::MAX) + 1); + (adj, (offset - (i32::MAX as u32 + 1)) as i32) + } else { + (base, offset as i32) + } +} + +/// Translate a load instruction. +fn translate_load( + offset: u32, + opcode: ir::Opcode, + result_ty: Type, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + let addr32 = state.pop1(); + // We don't yet support multiple linear memories. + let heap = state.get_heap(builder.func, 0, environ)?; + let (base, offset) = get_heap_addr(heap, addr32, offset, environ.pointer_type(), builder); + // Note that we don't set `is_aligned` here, even if the load instruction's + // alignment immediate says it's aligned, because WebAssembly's immediate + // field is just a hint, while Cranelift's aligned flag needs a guarantee. + let flags = MemFlags::new(); + let (load, dfg) = builder + .ins() + .Load(opcode, result_ty, flags, offset.into(), base); + state.push1(dfg.first_result(load)); + Ok(()) +} + +/// Translate a store instruction. +fn translate_store( + offset: u32, + opcode: ir::Opcode, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + let (addr32, val) = state.pop2(); + let val_ty = builder.func.dfg.value_type(val); + + // We don't yet support multiple linear memories. + let heap = state.get_heap(builder.func, 0, environ)?; + let (base, offset) = get_heap_addr(heap, addr32, offset, environ.pointer_type(), builder); + // See the comments in `translate_load` about the flags. + let flags = MemFlags::new(); + builder + .ins() + .Store(opcode, val_ty, flags, offset.into(), val, base); + Ok(()) +} + +fn translate_icmp(cc: IntCC, builder: &mut FunctionBuilder, state: &mut FuncTranslationState) { + let (arg0, arg1) = state.pop2(); + let val = builder.ins().icmp(cc, arg0, arg1); + state.push1(builder.ins().bint(I32, val)); +} + +fn translate_vector_icmp( + cc: IntCC, + needed_type: Type, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, +) { + let (a, b) = state.pop2(); + let bitcast_a = optionally_bitcast_vector(a, needed_type, builder); + let bitcast_b = optionally_bitcast_vector(b, needed_type, builder); + state.push1(builder.ins().icmp(cc, bitcast_a, bitcast_b)) +} + +fn translate_fcmp(cc: FloatCC, builder: &mut FunctionBuilder, state: &mut FuncTranslationState) { + let (arg0, arg1) = state.pop2(); + let val = builder.ins().fcmp(cc, arg0, arg1); + state.push1(builder.ins().bint(I32, val)); +} + +fn translate_vector_fcmp( + cc: FloatCC, + needed_type: Type, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, +) { + let (a, b) = state.pop2(); + let bitcast_a = optionally_bitcast_vector(a, needed_type, builder); + let bitcast_b = optionally_bitcast_vector(b, needed_type, builder); + state.push1(builder.ins().fcmp(cc, bitcast_a, bitcast_b)) +} + +fn translate_br_if( + relative_depth: u32, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, +) { + let val = state.pop1(); + let (br_destination, inputs) = translate_br_if_args(relative_depth, state); + + // Bitcast any vector arguments to their default type, I8X16, before jumping. + let destination_types = builder.func.dfg.block_param_types(br_destination); + bitcast_arguments(inputs, &destination_types[..inputs.len()], builder); + + builder.ins().brnz(val, br_destination, inputs); + + let next_block = builder.create_block(); + builder.ins().jump(next_block, &[]); + builder.seal_block(next_block); // The only predecessor is the current block. + builder.switch_to_block(next_block); +} + +fn translate_br_if_args( + relative_depth: u32, + state: &mut FuncTranslationState, +) -> (ir::Block, &mut [ir::Value]) { + let i = state.control_stack.len() - 1 - (relative_depth as usize); + let (return_count, br_destination) = { + let frame = &mut state.control_stack[i]; + // The values returned by the branch are still available for the reachable + // code that comes after it + frame.set_branched_to_exit(); + let return_count = if frame.is_loop() { + frame.num_param_values() + } else { + frame.num_return_values() + }; + (return_count, frame.br_destination()) + }; + let inputs = state.peekn_mut(return_count); + (br_destination, inputs) +} + +/// Determine the returned value type of a WebAssembly operator +fn type_of(operator: &Operator) -> Type { + match operator { + Operator::V128Load { .. } + | Operator::V128Store { .. } + | Operator::V128Const { .. } + | Operator::V128Not + | Operator::V128And + | Operator::V128AndNot + | Operator::V128Or + | Operator::V128Xor + | Operator::V128Bitselect => I8X16, // default type representing V128 + + Operator::V8x16Shuffle { .. } + | Operator::I8x16Splat + | Operator::V8x16LoadSplat { .. } + | Operator::I8x16ExtractLaneS { .. } + | Operator::I8x16ExtractLaneU { .. } + | Operator::I8x16ReplaceLane { .. } + | Operator::I8x16Eq + | Operator::I8x16Ne + | Operator::I8x16LtS + | Operator::I8x16LtU + | Operator::I8x16GtS + | Operator::I8x16GtU + | Operator::I8x16LeS + | Operator::I8x16LeU + | Operator::I8x16GeS + | Operator::I8x16GeU + | Operator::I8x16Neg + | Operator::I8x16AnyTrue + | Operator::I8x16AllTrue + | Operator::I8x16Shl + | Operator::I8x16ShrS + | Operator::I8x16ShrU + | Operator::I8x16Add + | Operator::I8x16AddSaturateS + | Operator::I8x16AddSaturateU + | Operator::I8x16Sub + | Operator::I8x16SubSaturateS + | Operator::I8x16SubSaturateU + | Operator::I8x16MinS + | Operator::I8x16MinU + | Operator::I8x16MaxS + | Operator::I8x16MaxU + | Operator::I8x16RoundingAverageU + | Operator::I8x16Mul => I8X16, + + Operator::I16x8Splat + | Operator::V16x8LoadSplat { .. } + | Operator::I16x8ExtractLaneS { .. } + | Operator::I16x8ExtractLaneU { .. } + | Operator::I16x8ReplaceLane { .. } + | Operator::I16x8Eq + | Operator::I16x8Ne + | Operator::I16x8LtS + | Operator::I16x8LtU + | Operator::I16x8GtS + | Operator::I16x8GtU + | Operator::I16x8LeS + | Operator::I16x8LeU + | Operator::I16x8GeS + | Operator::I16x8GeU + | Operator::I16x8Neg + | Operator::I16x8AnyTrue + | Operator::I16x8AllTrue + | Operator::I16x8Shl + | Operator::I16x8ShrS + | Operator::I16x8ShrU + | Operator::I16x8Add + | Operator::I16x8AddSaturateS + | Operator::I16x8AddSaturateU + | Operator::I16x8Sub + | Operator::I16x8SubSaturateS + | Operator::I16x8SubSaturateU + | Operator::I16x8MinS + | Operator::I16x8MinU + | Operator::I16x8MaxS + | Operator::I16x8MaxU + | Operator::I16x8RoundingAverageU + | Operator::I16x8Mul => I16X8, + + Operator::I32x4Splat + | Operator::V32x4LoadSplat { .. } + | Operator::I32x4ExtractLane { .. } + | Operator::I32x4ReplaceLane { .. } + | Operator::I32x4Eq + | Operator::I32x4Ne + | Operator::I32x4LtS + | Operator::I32x4LtU + | Operator::I32x4GtS + | Operator::I32x4GtU + | Operator::I32x4LeS + | Operator::I32x4LeU + | Operator::I32x4GeS + | Operator::I32x4GeU + | Operator::I32x4Neg + | Operator::I32x4AnyTrue + | Operator::I32x4AllTrue + | Operator::I32x4Shl + | Operator::I32x4ShrS + | Operator::I32x4ShrU + | Operator::I32x4Add + | Operator::I32x4Sub + | Operator::I32x4Mul + | Operator::I32x4MinS + | Operator::I32x4MinU + | Operator::I32x4MaxS + | Operator::I32x4MaxU + | Operator::F32x4ConvertI32x4S + | Operator::F32x4ConvertI32x4U => I32X4, + + Operator::I64x2Splat + | Operator::V64x2LoadSplat { .. } + | Operator::I64x2ExtractLane { .. } + | Operator::I64x2ReplaceLane { .. } + | Operator::I64x2Neg + | Operator::I64x2AnyTrue + | Operator::I64x2AllTrue + | Operator::I64x2Shl + | Operator::I64x2ShrS + | Operator::I64x2ShrU + | Operator::I64x2Add + | Operator::I64x2Sub + | Operator::F64x2ConvertI64x2S + | Operator::F64x2ConvertI64x2U => I64X2, + + Operator::F32x4Splat + | Operator::F32x4ExtractLane { .. } + | Operator::F32x4ReplaceLane { .. } + | Operator::F32x4Eq + | Operator::F32x4Ne + | Operator::F32x4Lt + | Operator::F32x4Gt + | Operator::F32x4Le + | Operator::F32x4Ge + | Operator::F32x4Abs + | Operator::F32x4Neg + | Operator::F32x4Sqrt + | Operator::F32x4Add + | Operator::F32x4Sub + | Operator::F32x4Mul + | Operator::F32x4Div + | Operator::F32x4Min + | Operator::F32x4Max + | Operator::I32x4TruncSatF32x4S + | Operator::I32x4TruncSatF32x4U => F32X4, + + Operator::F64x2Splat + | Operator::F64x2ExtractLane { .. } + | Operator::F64x2ReplaceLane { .. } + | Operator::F64x2Eq + | Operator::F64x2Ne + | Operator::F64x2Lt + | Operator::F64x2Gt + | Operator::F64x2Le + | Operator::F64x2Ge + | Operator::F64x2Abs + | Operator::F64x2Neg + | Operator::F64x2Sqrt + | Operator::F64x2Add + | Operator::F64x2Sub + | Operator::F64x2Mul + | Operator::F64x2Div + | Operator::F64x2Min + | Operator::F64x2Max + | Operator::I64x2TruncSatF64x2S + | Operator::I64x2TruncSatF64x2U => F64X2, + + _ => unimplemented!( + "Currently only SIMD instructions are mapped to their return type; the \ + following instruction is not mapped: {:?}", + operator + ), + } +} + +/// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by +/// adding a raw_bitcast if necessary. +pub fn optionally_bitcast_vector( + value: Value, + needed_type: Type, + builder: &mut FunctionBuilder, +) -> Value { + if builder.func.dfg.value_type(value) != needed_type { + builder.ins().raw_bitcast(needed_type, value) + } else { + value + } +} + +/// A helper for popping and bitcasting a single value; since SIMD values can lose their type by +/// using v128 (i.e. CLIF's I8x16) we must re-type the values using a bitcast to avoid CLIF +/// typing issues. +fn pop1_with_bitcast( + state: &mut FuncTranslationState, + needed_type: Type, + builder: &mut FunctionBuilder, +) -> Value { + optionally_bitcast_vector(state.pop1(), needed_type, builder) +} + +/// A helper for popping and bitcasting two values; since SIMD values can lose their type by +/// using v128 (i.e. CLIF's I8x16) we must re-type the values using a bitcast to avoid CLIF +/// typing issues. +fn pop2_with_bitcast( + state: &mut FuncTranslationState, + needed_type: Type, + builder: &mut FunctionBuilder, +) -> (Value, Value) { + let (a, b) = state.pop2(); + let bitcast_a = optionally_bitcast_vector(a, needed_type, builder); + let bitcast_b = optionally_bitcast_vector(b, needed_type, builder); + (bitcast_a, bitcast_b) +} + +/// A helper for bitcasting a sequence of values (e.g. function arguments). If a value is a +/// vector type that does not match its expected type, this will modify the value in place to point +/// to the result of a `raw_bitcast`. This conversion is necessary to translate Wasm code that +/// uses `V128` as function parameters (or implicitly in block parameters) and still use specific +/// CLIF types (e.g. `I32X4`) in the function body. +pub fn bitcast_arguments( + arguments: &mut [Value], + expected_types: &[Type], + builder: &mut FunctionBuilder, +) { + assert_eq!(arguments.len(), expected_types.len()); + for (i, t) in expected_types.iter().enumerate() { + if t.is_vector() { + assert!( + builder.func.dfg.value_type(arguments[i]).is_vector(), + "unexpected type mismatch: expected {}, argument {} was actually of type {}", + t, + arguments[i], + builder.func.dfg.value_type(arguments[i]) + ); + arguments[i] = optionally_bitcast_vector(arguments[i], *t, builder) + } + } +} + +/// A helper to extract all the `Type` listings of each variable in `params` +/// for only parameters the return true for `is_wasm`, typically paired with +/// `is_wasm_return` or `is_wasm_parameter`. +pub fn wasm_param_types(params: &[ir::AbiParam], is_wasm: impl Fn(usize) -> bool) -> Vec { + let mut ret = Vec::with_capacity(params.len()); + for (i, param) in params.iter().enumerate() { + if is_wasm(i) { + ret.push(param.value_type); + } + } + ret +} diff --git a/cranelift/wasm/src/environ/dummy.rs b/cranelift/wasm/src/environ/dummy.rs new file mode 100644 index 0000000000..4ebf0bdc29 --- /dev/null +++ b/cranelift/wasm/src/environ/dummy.rs @@ -0,0 +1,733 @@ +//! "Dummy" implementations of `ModuleEnvironment` and `FuncEnvironment` for testing +//! wasm translation. For complete implementations of `ModuleEnvironment` and +//! `FuncEnvironment`, see [wasmtime-environ] in [Wasmtime]. +//! +//! [wasmtime-environ]: https://crates.io/crates/wasmtime-environ +//! [Wasmtime]: https://github.com/bytecodealliance/wasmtime + +use crate::environ::{ + FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, TargetEnvironment, WasmResult, +}; +use crate::func_translator::FuncTranslator; +use crate::state::ModuleTranslationState; +use crate::translation_utils::{ + DefinedFuncIndex, FuncIndex, Global, GlobalIndex, Memory, MemoryIndex, PassiveDataIndex, + PassiveElemIndex, SignatureIndex, Table, TableIndex, +}; +use core::convert::TryFrom; +use cranelift_codegen::cursor::FuncCursor; +use cranelift_codegen::ir::immediates::{Offset32, Uimm64}; +use cranelift_codegen::ir::types::*; +use cranelift_codegen::ir::{self, InstBuilder}; +use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; +use std::boxed::Box; +use std::string::String; +use std::vec::Vec; + +/// Compute a `ir::ExternalName` for a given wasm function index. +fn get_func_name(func_index: FuncIndex) -> ir::ExternalName { + ir::ExternalName::user(0, func_index.as_u32()) +} + +/// A collection of names under which a given entity is exported. +pub struct Exportable { + /// A wasm entity. + pub entity: T, + + /// Names under which the entity is exported. + pub export_names: Vec, +} + +impl Exportable { + pub fn new(entity: T) -> Self { + Self { + entity, + export_names: Vec::new(), + } + } +} + +/// The main state belonging to a `DummyEnvironment`. This is split out from +/// `DummyEnvironment` to allow it to be borrowed separately from the +/// `FuncTranslator` field. +pub struct DummyModuleInfo { + /// Target description relevant to frontends producing Cranelift IR. + config: TargetFrontendConfig, + + /// Signatures as provided by `declare_signature`. + pub signatures: PrimaryMap, + + /// Module and field names of imported functions as provided by `declare_func_import`. + pub imported_funcs: Vec<(String, String)>, + + /// Module and field names of imported globals as provided by `declare_global_import`. + pub imported_globals: Vec<(String, String)>, + + /// Module and field names of imported tables as provided by `declare_table_import`. + pub imported_tables: Vec<(String, String)>, + + /// Module and field names of imported memories as provided by `declare_memory_import`. + pub imported_memories: Vec<(String, String)>, + + /// Functions, imported and local. + pub functions: PrimaryMap>, + + /// Function bodies. + pub function_bodies: PrimaryMap, + + /// Tables as provided by `declare_table`. + pub tables: PrimaryMap>, + + /// Memories as provided by `declare_memory`. + pub memories: PrimaryMap>, + + /// Globals as provided by `declare_global`. + pub globals: PrimaryMap>, + + /// The start function. + pub start_func: Option, +} + +impl DummyModuleInfo { + /// Creates a new `DummyModuleInfo` instance. + pub fn new(config: TargetFrontendConfig) -> Self { + Self { + config, + signatures: PrimaryMap::new(), + imported_funcs: Vec::new(), + imported_globals: Vec::new(), + imported_tables: Vec::new(), + imported_memories: Vec::new(), + functions: PrimaryMap::new(), + function_bodies: PrimaryMap::new(), + tables: PrimaryMap::new(), + memories: PrimaryMap::new(), + globals: PrimaryMap::new(), + start_func: None, + } + } +} + +/// This `ModuleEnvironment` implementation is a "naïve" one, doing essentially nothing and +/// emitting placeholders when forced to. Don't try to execute code translated for this +/// environment, essentially here for translation debug purposes. +pub struct DummyEnvironment { + /// Module information. + pub info: DummyModuleInfo, + + /// Function translation. + trans: FuncTranslator, + + /// Vector of wasm bytecode size for each function. + pub func_bytecode_sizes: Vec, + + /// How to return from functions. + return_mode: ReturnMode, + + /// Instructs to collect debug data during translation. + debug_info: bool, + + /// Function names. + function_names: SecondaryMap, +} + +impl DummyEnvironment { + /// Creates a new `DummyEnvironment` instance. + pub fn new(config: TargetFrontendConfig, return_mode: ReturnMode, debug_info: bool) -> Self { + Self { + info: DummyModuleInfo::new(config), + trans: FuncTranslator::new(), + func_bytecode_sizes: Vec::new(), + return_mode, + debug_info, + function_names: SecondaryMap::new(), + } + } + + /// Return a `DummyFuncEnvironment` for translating functions within this + /// `DummyEnvironment`. + pub fn func_env(&self) -> DummyFuncEnvironment { + DummyFuncEnvironment::new(&self.info, self.return_mode) + } + + fn get_func_type(&self, func_index: FuncIndex) -> SignatureIndex { + self.info.functions[func_index].entity + } + + /// Return the number of imported functions within this `DummyEnvironment`. + pub fn get_num_func_imports(&self) -> usize { + self.info.imported_funcs.len() + } + + /// Return the name of the function, if a name for the function with + /// the corresponding index exists. + pub fn get_func_name(&self, func_index: FuncIndex) -> Option<&str> { + self.function_names.get(func_index).map(String::as_ref) + } +} + +/// The `FuncEnvironment` implementation for use by the `DummyEnvironment`. +pub struct DummyFuncEnvironment<'dummy_environment> { + pub mod_info: &'dummy_environment DummyModuleInfo, + + return_mode: ReturnMode, +} + +impl<'dummy_environment> DummyFuncEnvironment<'dummy_environment> { + pub fn new(mod_info: &'dummy_environment DummyModuleInfo, return_mode: ReturnMode) -> Self { + Self { + mod_info, + return_mode, + } + } + + // Create a signature for `sigidx` amended with a `vmctx` argument after the standard wasm + // arguments. + fn vmctx_sig(&self, sigidx: SignatureIndex) -> ir::Signature { + let mut sig = self.mod_info.signatures[sigidx].clone(); + sig.params.push(ir::AbiParam::special( + self.pointer_type(), + ir::ArgumentPurpose::VMContext, + )); + sig + } +} + +impl<'dummy_environment> TargetEnvironment for DummyFuncEnvironment<'dummy_environment> { + fn target_config(&self) -> TargetFrontendConfig { + self.mod_info.config + } +} + +impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environment> { + fn return_mode(&self) -> ReturnMode { + self.return_mode + } + + fn make_global( + &mut self, + func: &mut ir::Function, + index: GlobalIndex, + ) -> WasmResult { + // Just create a dummy `vmctx` global. + let offset = i32::try_from((index.index() * 8) + 8).unwrap().into(); + let vmctx = func.create_global_value(ir::GlobalValueData::VMContext {}); + Ok(GlobalVariable::Memory { + gv: vmctx, + offset, + ty: self.mod_info.globals[index].entity.ty, + }) + } + + fn make_heap(&mut self, func: &mut ir::Function, _index: MemoryIndex) -> WasmResult { + // Create a static heap whose base address is stored at `vmctx+0`. + let addr = func.create_global_value(ir::GlobalValueData::VMContext); + let gv = func.create_global_value(ir::GlobalValueData::Load { + base: addr, + offset: Offset32::new(0), + global_type: self.pointer_type(), + readonly: true, + }); + + Ok(func.create_heap(ir::HeapData { + base: gv, + min_size: 0.into(), + offset_guard_size: 0x8000_0000.into(), + style: ir::HeapStyle::Static { + bound: 0x1_0000_0000.into(), + }, + index_type: I32, + })) + } + + fn make_table(&mut self, func: &mut ir::Function, _index: TableIndex) -> WasmResult { + // Create a table whose base address is stored at `vmctx+0`. + let vmctx = func.create_global_value(ir::GlobalValueData::VMContext); + let base_gv = func.create_global_value(ir::GlobalValueData::Load { + base: vmctx, + offset: Offset32::new(0), + global_type: self.pointer_type(), + readonly: true, // when tables in wasm become "growable", revisit whether this can be readonly or not. + }); + let bound_gv = func.create_global_value(ir::GlobalValueData::Load { + base: vmctx, + offset: Offset32::new(0), + global_type: I32, + readonly: true, + }); + + Ok(func.create_table(ir::TableData { + base_gv, + min_size: Uimm64::new(0), + bound_gv, + element_size: Uimm64::from(u64::from(self.pointer_bytes()) * 2), + index_type: I32, + })) + } + + fn make_indirect_sig( + &mut self, + func: &mut ir::Function, + index: SignatureIndex, + ) -> WasmResult { + // A real implementation would probably change the calling convention and add `vmctx` and + // signature index arguments. + Ok(func.import_signature(self.vmctx_sig(index))) + } + + fn make_direct_func( + &mut self, + func: &mut ir::Function, + index: FuncIndex, + ) -> WasmResult { + let sigidx = self.mod_info.functions[index].entity; + // A real implementation would probably add a `vmctx` argument. + // And maybe attempt some signature de-duplication. + let signature = func.import_signature(self.vmctx_sig(sigidx)); + let name = get_func_name(index); + Ok(func.import_function(ir::ExtFuncData { + name, + signature, + colocated: false, + })) + } + + fn translate_call_indirect( + &mut self, + mut pos: FuncCursor, + _table_index: TableIndex, + _table: ir::Table, + _sig_index: SignatureIndex, + sig_ref: ir::SigRef, + callee: ir::Value, + call_args: &[ir::Value], + ) -> WasmResult { + // Pass the current function's vmctx parameter on to the callee. + let vmctx = pos + .func + .special_param(ir::ArgumentPurpose::VMContext) + .expect("Missing vmctx parameter"); + + // The `callee` value is an index into a table of function pointers. + // Apparently, that table is stored at absolute address 0 in this dummy environment. + // TODO: Generate bounds checking code. + let ptr = self.pointer_type(); + let callee_offset = if ptr == I32 { + pos.ins().imul_imm(callee, 4) + } else { + let ext = pos.ins().uextend(I64, callee); + pos.ins().imul_imm(ext, 4) + }; + let mflags = ir::MemFlags::trusted(); + let func_ptr = pos.ins().load(ptr, mflags, callee_offset, 0); + + // Build a value list for the indirect call instruction containing the callee, call_args, + // and the vmctx parameter. + let mut args = ir::ValueList::default(); + args.push(func_ptr, &mut pos.func.dfg.value_lists); + args.extend(call_args.iter().cloned(), &mut pos.func.dfg.value_lists); + args.push(vmctx, &mut pos.func.dfg.value_lists); + + Ok(pos + .ins() + .CallIndirect(ir::Opcode::CallIndirect, INVALID, sig_ref, args) + .0) + } + + fn translate_call( + &mut self, + mut pos: FuncCursor, + _callee_index: FuncIndex, + callee: ir::FuncRef, + call_args: &[ir::Value], + ) -> WasmResult { + // Pass the current function's vmctx parameter on to the callee. + let vmctx = pos + .func + .special_param(ir::ArgumentPurpose::VMContext) + .expect("Missing vmctx parameter"); + + // Build a value list for the call instruction containing the call_args and the vmctx + // parameter. + let mut args = ir::ValueList::default(); + args.extend(call_args.iter().cloned(), &mut pos.func.dfg.value_lists); + args.push(vmctx, &mut pos.func.dfg.value_lists); + + Ok(pos.ins().Call(ir::Opcode::Call, INVALID, callee, args).0) + } + + fn translate_memory_grow( + &mut self, + mut pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _val: ir::Value, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, -1)) + } + + fn translate_memory_size( + &mut self, + mut pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, -1)) + } + + fn translate_memory_copy( + &mut self, + _pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _dst: ir::Value, + _src: ir::Value, + _len: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_memory_fill( + &mut self, + _pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _dst: ir::Value, + _val: ir::Value, + _len: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_memory_init( + &mut self, + _pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _seg_index: u32, + _dst: ir::Value, + _src: ir::Value, + _len: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_data_drop(&mut self, _pos: FuncCursor, _seg_index: u32) -> WasmResult<()> { + Ok(()) + } + + fn translate_table_size( + &mut self, + mut pos: FuncCursor, + _index: TableIndex, + _table: ir::Table, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, -1)) + } + + fn translate_table_grow( + &mut self, + mut pos: FuncCursor, + _table_index: u32, + _delta: ir::Value, + _init_value: ir::Value, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, -1)) + } + + fn translate_table_get( + &mut self, + mut pos: FuncCursor, + _table_index: u32, + _index: ir::Value, + ) -> WasmResult { + Ok(pos.ins().null(self.reference_type())) + } + + fn translate_table_set( + &mut self, + _pos: FuncCursor, + _table_index: u32, + _value: ir::Value, + _index: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_table_copy( + &mut self, + _pos: FuncCursor, + _dst_index: TableIndex, + _dst_table: ir::Table, + _src_index: TableIndex, + _src_table: ir::Table, + _dst: ir::Value, + _src: ir::Value, + _len: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_table_fill( + &mut self, + _pos: FuncCursor, + _table_index: u32, + _dst: ir::Value, + _val: ir::Value, + _len: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_table_init( + &mut self, + _pos: FuncCursor, + _seg_index: u32, + _table_index: TableIndex, + _table: ir::Table, + _dst: ir::Value, + _src: ir::Value, + _len: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } + + fn translate_elem_drop(&mut self, _pos: FuncCursor, _seg_index: u32) -> WasmResult<()> { + Ok(()) + } + + fn translate_ref_func( + &mut self, + mut pos: FuncCursor, + _func_index: u32, + ) -> WasmResult { + Ok(pos.ins().null(self.reference_type())) + } + + fn translate_custom_global_get( + &mut self, + mut pos: FuncCursor, + _global_index: GlobalIndex, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, -1)) + } + + fn translate_custom_global_set( + &mut self, + _pos: FuncCursor, + _global_index: GlobalIndex, + _val: ir::Value, + ) -> WasmResult<()> { + Ok(()) + } +} + +impl TargetEnvironment for DummyEnvironment { + fn target_config(&self) -> TargetFrontendConfig { + self.info.config + } +} + +impl<'data> ModuleEnvironment<'data> for DummyEnvironment { + fn declare_signature(&mut self, sig: ir::Signature) -> WasmResult<()> { + self.info.signatures.push(sig); + Ok(()) + } + + fn declare_func_import( + &mut self, + sig_index: SignatureIndex, + module: &'data str, + field: &'data str, + ) -> WasmResult<()> { + assert_eq!( + self.info.functions.len(), + self.info.imported_funcs.len(), + "Imported functions must be declared first" + ); + self.info.functions.push(Exportable::new(sig_index)); + self.info + .imported_funcs + .push((String::from(module), String::from(field))); + Ok(()) + } + + fn declare_func_type(&mut self, sig_index: SignatureIndex) -> WasmResult<()> { + self.info.functions.push(Exportable::new(sig_index)); + Ok(()) + } + + fn declare_global(&mut self, global: Global) -> WasmResult<()> { + self.info.globals.push(Exportable::new(global)); + Ok(()) + } + + fn declare_global_import( + &mut self, + global: Global, + module: &'data str, + field: &'data str, + ) -> WasmResult<()> { + self.info.globals.push(Exportable::new(global)); + self.info + .imported_globals + .push((String::from(module), String::from(field))); + Ok(()) + } + + fn declare_table(&mut self, table: Table) -> WasmResult<()> { + self.info.tables.push(Exportable::new(table)); + Ok(()) + } + + fn declare_table_import( + &mut self, + table: Table, + module: &'data str, + field: &'data str, + ) -> WasmResult<()> { + self.info.tables.push(Exportable::new(table)); + self.info + .imported_tables + .push((String::from(module), String::from(field))); + Ok(()) + } + + fn declare_table_elements( + &mut self, + _table_index: TableIndex, + _base: Option, + _offset: usize, + _elements: Box<[FuncIndex]>, + ) -> WasmResult<()> { + // We do nothing + Ok(()) + } + + fn declare_passive_element( + &mut self, + _elem_index: PassiveElemIndex, + _segments: Box<[FuncIndex]>, + ) -> WasmResult<()> { + Ok(()) + } + + fn declare_passive_data( + &mut self, + _elem_index: PassiveDataIndex, + _segments: &'data [u8], + ) -> WasmResult<()> { + Ok(()) + } + + fn declare_memory(&mut self, memory: Memory) -> WasmResult<()> { + self.info.memories.push(Exportable::new(memory)); + Ok(()) + } + + fn declare_memory_import( + &mut self, + memory: Memory, + module: &'data str, + field: &'data str, + ) -> WasmResult<()> { + self.info.memories.push(Exportable::new(memory)); + self.info + .imported_memories + .push((String::from(module), String::from(field))); + Ok(()) + } + + fn declare_data_initialization( + &mut self, + _memory_index: MemoryIndex, + _base: Option, + _offset: usize, + _data: &'data [u8], + ) -> WasmResult<()> { + // We do nothing + Ok(()) + } + + fn declare_func_export(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()> { + self.info.functions[func_index] + .export_names + .push(String::from(name)); + Ok(()) + } + + fn declare_table_export( + &mut self, + table_index: TableIndex, + name: &'data str, + ) -> WasmResult<()> { + self.info.tables[table_index] + .export_names + .push(String::from(name)); + Ok(()) + } + + fn declare_memory_export( + &mut self, + memory_index: MemoryIndex, + name: &'data str, + ) -> WasmResult<()> { + self.info.memories[memory_index] + .export_names + .push(String::from(name)); + Ok(()) + } + + fn declare_global_export( + &mut self, + global_index: GlobalIndex, + name: &'data str, + ) -> WasmResult<()> { + self.info.globals[global_index] + .export_names + .push(String::from(name)); + Ok(()) + } + + fn declare_start_func(&mut self, func_index: FuncIndex) -> WasmResult<()> { + debug_assert!(self.info.start_func.is_none()); + self.info.start_func = Some(func_index); + Ok(()) + } + + fn define_function_body( + &mut self, + module_translation_state: &ModuleTranslationState, + body_bytes: &'data [u8], + body_offset: usize, + ) -> WasmResult<()> { + let func = { + let mut func_environ = DummyFuncEnvironment::new(&self.info, self.return_mode); + let func_index = + FuncIndex::new(self.get_num_func_imports() + self.info.function_bodies.len()); + let name = get_func_name(func_index); + let sig = func_environ.vmctx_sig(self.get_func_type(func_index)); + let mut func = ir::Function::with_name_signature(name, sig); + if self.debug_info { + func.collect_debug_info(); + } + self.trans.translate( + module_translation_state, + body_bytes, + body_offset, + &mut func, + &mut func_environ, + )?; + func + }; + self.func_bytecode_sizes.push(body_bytes.len()); + self.info.function_bodies.push(func); + Ok(()) + } + + fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()> { + self.function_names[func_index] = String::from(name); + Ok(()) + } +} diff --git a/cranelift/wasm/src/environ/mod.rs b/cranelift/wasm/src/environ/mod.rs new file mode 100644 index 0000000000..1cdb0b292a --- /dev/null +++ b/cranelift/wasm/src/environ/mod.rs @@ -0,0 +1,11 @@ +//! Support for configurable wasm translation. + +mod dummy; +#[macro_use] +mod spec; + +pub use crate::environ::dummy::DummyEnvironment; +pub use crate::environ::spec::{ + FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, TargetEnvironment, WasmError, + WasmResult, +}; diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs new file mode 100644 index 0000000000..b0dd4d508d --- /dev/null +++ b/cranelift/wasm/src/environ/spec.rs @@ -0,0 +1,667 @@ +//! All the runtime support necessary for the wasm to cranelift translation is formalized by the +//! traits `FunctionEnvironment` and `ModuleEnvironment`. +//! +//! There are skeleton implementations of these traits in the `dummy` module, and complete +//! implementations in [Wasmtime]. +//! +//! [Wasmtime]: https://github.com/bytecodealliance/wasmtime + +use crate::state::{FuncTranslationState, ModuleTranslationState}; +use crate::translation_utils::{ + FuncIndex, Global, GlobalIndex, Memory, MemoryIndex, PassiveDataIndex, PassiveElemIndex, + SignatureIndex, Table, TableIndex, +}; +use core::convert::From; +use cranelift_codegen::cursor::FuncCursor; +use cranelift_codegen::ir::immediates::Offset32; +use cranelift_codegen::ir::{self, InstBuilder}; +use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_frontend::FunctionBuilder; +use std::boxed::Box; +use thiserror::Error; +use wasmparser::BinaryReaderError; +use wasmparser::Operator; + +/// The value of a WebAssembly global variable. +#[derive(Clone, Copy)] +pub enum GlobalVariable { + /// This is a constant global with a value known at compile time. + Const(ir::Value), + + /// This is a variable in memory that should be referenced through a `GlobalValue`. + Memory { + /// The address of the global variable storage. + gv: ir::GlobalValue, + /// An offset to add to the address. + offset: Offset32, + /// The global variable's type. + ty: ir::Type, + }, + + /// This is a global variable that needs to be handled by the environment. + Custom, +} + +/// A WebAssembly translation error. +/// +/// When a WebAssembly function can't be translated, one of these error codes will be returned +/// to describe the failure. +#[derive(Error, Debug)] +pub enum WasmError { + /// The input WebAssembly code is invalid. + /// + /// This error code is used by a WebAssembly translator when it encounters invalid WebAssembly + /// code. This should never happen for validated WebAssembly code. + #[error("Invalid input WebAssembly code at offset {offset}: {message}")] + InvalidWebAssembly { + /// A string describing the validation error. + message: std::string::String, + /// The bytecode offset where the error occurred. + offset: usize, + }, + + /// A feature used by the WebAssembly code is not supported by the embedding environment. + /// + /// Embedding environments may have their own limitations and feature restrictions. + #[error("Unsupported feature: {0}")] + Unsupported(std::string::String), + + /// An implementation limit was exceeded. + /// + /// Cranelift can compile very large and complicated functions, but the [implementation has + /// limits][limits] that cause compilation to fail when they are exceeded. + /// + /// [limits]: https://cranelift.readthedocs.io/en/latest/ir.html#implementation-limits + #[error("Implementation limit exceeded")] + ImplLimitExceeded, + + /// Any user-defined error. + #[error("User error: {0}")] + User(std::string::String), +} + +/// Return an `Err(WasmError::Unsupported(msg))` where `msg` the string built by calling `format!` +/// on the arguments to this macro. +#[macro_export] +macro_rules! wasm_unsupported { + ($($arg:tt)*) => { $crate::environ::WasmError::Unsupported(format!($($arg)*)) } +} + +impl From for WasmError { + /// Convert from a `BinaryReaderError` to a `WasmError`. + fn from(e: BinaryReaderError) -> Self { + Self::InvalidWebAssembly { + message: e.message().into(), + offset: e.offset(), + } + } +} + +/// A convenient alias for a `Result` that uses `WasmError` as the error type. +pub type WasmResult = Result; + +/// How to return from functions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum ReturnMode { + /// Use normal return instructions as needed. + NormalReturns, + /// Use a single fallthrough return at the end of the function. + FallthroughReturn, +} + +/// Environment affecting the translation of a WebAssembly. +pub trait TargetEnvironment { + /// Get the information needed to produce Cranelift IR for the given target. + fn target_config(&self) -> TargetFrontendConfig; + + /// Get the Cranelift integer type to use for native pointers. + /// + /// This returns `I64` for 64-bit architectures and `I32` for 32-bit architectures. + fn pointer_type(&self) -> ir::Type { + ir::Type::int(u16::from(self.target_config().pointer_bits())).unwrap() + } + + /// Get the size of a native pointer, in bytes. + fn pointer_bytes(&self) -> u8 { + self.target_config().pointer_bytes() + } + + /// Get the Cranelift reference type to use for native references. + /// + /// This returns `R64` for 64-bit architectures and `R32` for 32-bit architectures. + fn reference_type(&self) -> ir::Type { + match self.pointer_type() { + ir::types::I32 => ir::types::R32, + ir::types::I64 => ir::types::R64, + _ => panic!("unsupported pointer type"), + } + } +} + +/// Environment affecting the translation of a single WebAssembly function. +/// +/// A `FuncEnvironment` trait object is required to translate a WebAssembly function to Cranelift +/// IR. The function environment provides information about the WebAssembly module as well as the +/// runtime environment. +pub trait FuncEnvironment: TargetEnvironment { + /// Is the given parameter of the given function a wasm-level parameter, as opposed to a hidden + /// parameter added for use by the implementation? + fn is_wasm_parameter(&self, signature: &ir::Signature, index: usize) -> bool { + signature.params[index].purpose == ir::ArgumentPurpose::Normal + } + + /// Is the given return of the given function a wasm-level parameter, as + /// opposed to a hidden parameter added for use by the implementation? + fn is_wasm_return(&self, signature: &ir::Signature, index: usize) -> bool { + signature.returns[index].purpose == ir::ArgumentPurpose::Normal + } + + /// Should the code be structured to use a single `fallthrough_return` instruction at the end + /// of the function body, rather than `return` instructions as needed? This is used by VMs + /// to append custom epilogues. + fn return_mode(&self) -> ReturnMode { + ReturnMode::NormalReturns + } + + /// Set up the necessary preamble definitions in `func` to access the global variable + /// identified by `index`. + /// + /// The index space covers both imported globals and globals defined by the module. + /// + /// Return the global variable reference that should be used to access the global and the + /// WebAssembly type of the global. + fn make_global( + &mut self, + func: &mut ir::Function, + index: GlobalIndex, + ) -> WasmResult; + + /// Set up the necessary preamble definitions in `func` to access the linear memory identified + /// by `index`. + /// + /// The index space covers both imported and locally declared memories. + fn make_heap(&mut self, func: &mut ir::Function, index: MemoryIndex) -> WasmResult; + + /// Set up the necessary preamble definitions in `func` to access the table identified + /// by `index`. + /// + /// The index space covers both imported and locally declared tables. + fn make_table(&mut self, func: &mut ir::Function, index: TableIndex) -> WasmResult; + + /// Set up a signature definition in the preamble of `func` that can be used for an indirect + /// call with signature `index`. + /// + /// The signature may contain additional arguments needed for an indirect call, but the + /// arguments marked as `ArgumentPurpose::Normal` must correspond to the WebAssembly signature + /// arguments. + /// + /// The signature will only be used for indirect calls, even if the module has direct function + /// calls with the same WebAssembly type. + fn make_indirect_sig( + &mut self, + func: &mut ir::Function, + index: SignatureIndex, + ) -> WasmResult; + + /// Set up an external function definition in the preamble of `func` that can be used to + /// directly call the function `index`. + /// + /// The index space covers both imported functions and functions defined in the current module. + /// + /// The function's signature may contain additional arguments needed for a direct call, but the + /// arguments marked as `ArgumentPurpose::Normal` must correspond to the WebAssembly signature + /// arguments. + /// + /// The function's signature will only be used for direct calls, even if the module has + /// indirect calls with the same WebAssembly type. + fn make_direct_func( + &mut self, + func: &mut ir::Function, + index: FuncIndex, + ) -> WasmResult; + + /// Translate a `call_indirect` WebAssembly instruction at `pos`. + /// + /// Insert instructions at `pos` for an indirect call to the function `callee` in the table + /// `table_index` with WebAssembly signature `sig_index`. The `callee` value will have type + /// `i32`. + /// + /// The signature `sig_ref` was previously created by `make_indirect_sig()`. + /// + /// Return the call instruction whose results are the WebAssembly return values. + #[cfg_attr(feature = "cargo-clippy", allow(clippy::too_many_arguments))] + fn translate_call_indirect( + &mut self, + pos: FuncCursor, + table_index: TableIndex, + table: ir::Table, + sig_index: SignatureIndex, + sig_ref: ir::SigRef, + callee: ir::Value, + call_args: &[ir::Value], + ) -> WasmResult; + + /// Translate a `call` WebAssembly instruction at `pos`. + /// + /// Insert instructions at `pos` for a direct call to the function `callee_index`. + /// + /// The function reference `callee` was previously created by `make_direct_func()`. + /// + /// Return the call instruction whose results are the WebAssembly return values. + fn translate_call( + &mut self, + mut pos: FuncCursor, + _callee_index: FuncIndex, + callee: ir::FuncRef, + call_args: &[ir::Value], + ) -> WasmResult { + Ok(pos.ins().call(callee, call_args)) + } + + /// Translate a `memory.grow` WebAssembly instruction. + /// + /// The `index` provided identifies the linear memory to grow, and `heap` is the heap reference + /// returned by `make_heap` for the same index. + /// + /// The `val` value is the requested memory size in pages. + /// + /// Returns the old size (in pages) of the memory. + fn translate_memory_grow( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + val: ir::Value, + ) -> WasmResult; + + /// Translates a `memory.size` WebAssembly instruction. + /// + /// The `index` provided identifies the linear memory to query, and `heap` is the heap reference + /// returned by `make_heap` for the same index. + /// + /// Returns the size in pages of the memory. + fn translate_memory_size( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + ) -> WasmResult; + + /// Translate a `memory.copy` WebAssembly instruction. + /// + /// The `index` provided identifies the linear memory to query, and `heap` is the heap reference + /// returned by `make_heap` for the same index. + fn translate_memory_copy( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + dst: ir::Value, + src: ir::Value, + len: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `memory.fill` WebAssembly instruction. + /// + /// The `index` provided identifies the linear memory to query, and `heap` is the heap reference + /// returned by `make_heap` for the same index. + fn translate_memory_fill( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + dst: ir::Value, + val: ir::Value, + len: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `memory.init` WebAssembly instruction. + /// + /// The `index` provided identifies the linear memory to query, and `heap` is the heap reference + /// returned by `make_heap` for the same index. `seg_index` is the index of the segment to copy + /// from. + #[allow(clippy::too_many_arguments)] + fn translate_memory_init( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + seg_index: u32, + dst: ir::Value, + src: ir::Value, + len: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `data.drop` WebAssembly instruction. + fn translate_data_drop(&mut self, pos: FuncCursor, seg_index: u32) -> WasmResult<()>; + + /// Translate a `table.size` WebAssembly instruction. + fn translate_table_size( + &mut self, + pos: FuncCursor, + index: TableIndex, + table: ir::Table, + ) -> WasmResult; + + /// Translate a `table.grow` WebAssembly instruction. + fn translate_table_grow( + &mut self, + pos: FuncCursor, + table_index: u32, + delta: ir::Value, + init_value: ir::Value, + ) -> WasmResult; + + /// Translate a `table.get` WebAssembly instruction. + fn translate_table_get( + &mut self, + pos: FuncCursor, + table_index: u32, + index: ir::Value, + ) -> WasmResult; + + /// Translate a `table.set` WebAssembly instruction. + fn translate_table_set( + &mut self, + pos: FuncCursor, + table_index: u32, + value: ir::Value, + index: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `table.copy` WebAssembly instruction. + #[allow(clippy::too_many_arguments)] + fn translate_table_copy( + &mut self, + pos: FuncCursor, + dst_table_index: TableIndex, + dst_table: ir::Table, + src_table_index: TableIndex, + src_table: ir::Table, + dst: ir::Value, + src: ir::Value, + len: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `table.fill` WebAssembly instruction. + fn translate_table_fill( + &mut self, + pos: FuncCursor, + table_index: u32, + dst: ir::Value, + val: ir::Value, + len: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `table.init` WebAssembly instruction. + #[allow(clippy::too_many_arguments)] + fn translate_table_init( + &mut self, + pos: FuncCursor, + seg_index: u32, + table_index: TableIndex, + table: ir::Table, + dst: ir::Value, + src: ir::Value, + len: ir::Value, + ) -> WasmResult<()>; + + /// Translate a `elem.drop` WebAssembly instruction. + fn translate_elem_drop(&mut self, pos: FuncCursor, seg_index: u32) -> WasmResult<()>; + + /// Translate a `ref.func` WebAssembly instruction. + fn translate_ref_func(&mut self, pos: FuncCursor, func_index: u32) -> WasmResult; + + /// Translate a `global.get` WebAssembly instruction at `pos` for a global + /// that is custom. + fn translate_custom_global_get( + &mut self, + pos: FuncCursor, + global_index: GlobalIndex, + ) -> WasmResult; + + /// Translate a `global.set` WebAssembly instruction at `pos` for a global + /// that is custom. + fn translate_custom_global_set( + &mut self, + pos: FuncCursor, + global_index: GlobalIndex, + val: ir::Value, + ) -> WasmResult<()>; + + /// Emit code at the beginning of every wasm loop. + /// + /// This can be used to insert explicit interrupt or safepoint checking at + /// the beginnings of loops. + fn translate_loop_header(&mut self, _pos: FuncCursor) -> WasmResult<()> { + // By default, don't emit anything. + Ok(()) + } + + /// Optional callback for the `FunctionEnvironment` performing this translation to maintain + /// internal state or prepare custom state for the operator to translate + fn before_translate_operator( + &mut self, + _op: &Operator, + _builder: &mut FunctionBuilder, + _state: &FuncTranslationState, + ) -> WasmResult<()> { + Ok(()) + } + + /// Optional callback for the `FunctionEnvironment` performing this translation to maintain + /// internal state or finalize custom state for the operator that was translated + fn after_translate_operator( + &mut self, + _op: &Operator, + _builder: &mut FunctionBuilder, + _state: &FuncTranslationState, + ) -> WasmResult<()> { + Ok(()) + } +} + +/// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the +/// [`translate_module`](fn.translate_module.html) function. These methods should not be called +/// by the user, they are only for `cranelift-wasm` internal use. +pub trait ModuleEnvironment<'data>: TargetEnvironment { + /// Provides the number of signatures up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_signatures(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares a function signature to the environment. + fn declare_signature(&mut self, sig: ir::Signature) -> WasmResult<()>; + + /// Provides the number of imports up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_imports(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares a function import to the environment. + fn declare_func_import( + &mut self, + sig_index: SignatureIndex, + module: &'data str, + field: &'data str, + ) -> WasmResult<()>; + + /// Declares a table import to the environment. + fn declare_table_import( + &mut self, + table: Table, + module: &'data str, + field: &'data str, + ) -> WasmResult<()>; + + /// Declares a memory import to the environment. + fn declare_memory_import( + &mut self, + memory: Memory, + module: &'data str, + field: &'data str, + ) -> WasmResult<()>; + + /// Declares a global import to the environment. + fn declare_global_import( + &mut self, + global: Global, + module: &'data str, + field: &'data str, + ) -> WasmResult<()>; + + /// Notifies the implementation that all imports have been declared. + fn finish_imports(&mut self) -> WasmResult<()> { + Ok(()) + } + + /// Provides the number of defined functions up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_func_types(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares the type (signature) of a local function in the module. + fn declare_func_type(&mut self, sig_index: SignatureIndex) -> WasmResult<()>; + + /// Provides the number of defined tables up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_tables(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares a table to the environment. + fn declare_table(&mut self, table: Table) -> WasmResult<()>; + + /// Provides the number of defined memories up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_memories(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares a memory to the environment + fn declare_memory(&mut self, memory: Memory) -> WasmResult<()>; + + /// Provides the number of defined globals up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_globals(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares a global to the environment. + fn declare_global(&mut self, global: Global) -> WasmResult<()>; + + /// Provides the number of exports up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_exports(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Declares a function export to the environment. + fn declare_func_export(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()>; + + /// Declares a table export to the environment. + fn declare_table_export(&mut self, table_index: TableIndex, name: &'data str) + -> WasmResult<()>; + + /// Declares a memory export to the environment. + fn declare_memory_export( + &mut self, + memory_index: MemoryIndex, + name: &'data str, + ) -> WasmResult<()>; + + /// Declares a global export to the environment. + fn declare_global_export( + &mut self, + global_index: GlobalIndex, + name: &'data str, + ) -> WasmResult<()>; + + /// Notifies the implementation that all exports have been declared. + fn finish_exports(&mut self) -> WasmResult<()> { + Ok(()) + } + + /// Declares the optional start function. + fn declare_start_func(&mut self, index: FuncIndex) -> WasmResult<()>; + + /// Provides the number of element initializers up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_table_elements(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Fills a declared table with references to functions in the module. + fn declare_table_elements( + &mut self, + table_index: TableIndex, + base: Option, + offset: usize, + elements: Box<[FuncIndex]>, + ) -> WasmResult<()>; + + /// Declare a passive element segment. + fn declare_passive_element( + &mut self, + index: PassiveElemIndex, + elements: Box<[FuncIndex]>, + ) -> WasmResult<()>; + + /// Provides the number of passive data segments up front. + /// + /// By default this does nothing, but implementations may use this to + /// pre-allocate memory if desired. + fn reserve_passive_data(&mut self, count: u32) -> WasmResult<()> { + let _ = count; + Ok(()) + } + + /// Declare a passive data segment. + fn declare_passive_data( + &mut self, + data_index: PassiveDataIndex, + data: &'data [u8], + ) -> WasmResult<()>; + + /// Provides the contents of a function body. + /// + /// Note there's no `reserve_function_bodies` function because the number of + /// functions is already provided by `reserve_func_types`. + fn define_function_body( + &mut self, + module_translation_state: &ModuleTranslationState, + body_bytes: &'data [u8], + body_offset: usize, + ) -> WasmResult<()>; + + /// Provides the number of data initializers up front. By default this does nothing, but + /// implementations can use this to preallocate memory if desired. + fn reserve_data_initializers(&mut self, _num: u32) -> WasmResult<()> { + Ok(()) + } + + /// Fills a declared memory with bytes at module instantiation. + fn declare_data_initialization( + &mut self, + memory_index: MemoryIndex, + base: Option, + offset: usize, + data: &'data [u8], + ) -> WasmResult<()>; + + /// Declares the name of a function to the environment. + /// + /// By default this does nothing, but implementations can use this to read + /// the function name subsection of the custom name section if desired. + fn declare_func_name(&mut self, _func_index: FuncIndex, _name: &'data str) -> WasmResult<()> { + Ok(()) + } + + /// Indicates that a custom section has been found in the wasm file + fn custom_section(&mut self, _name: &'data str, _data: &'data [u8]) -> WasmResult<()> { + Ok(()) + } +} diff --git a/cranelift/wasm/src/func_translator.rs b/cranelift/wasm/src/func_translator.rs new file mode 100644 index 0000000000..27431be148 --- /dev/null +++ b/cranelift/wasm/src/func_translator.rs @@ -0,0 +1,433 @@ +//! Stand-alone WebAssembly to Cranelift IR translator. +//! +//! This module defines the `FuncTranslator` type which can translate a single WebAssembly +//! function to Cranelift IR guided by a `FuncEnvironment` which provides information about the +//! WebAssembly module and the runtime environment. + +use crate::code_translator::{bitcast_arguments, translate_operator, wasm_param_types}; +use crate::environ::{FuncEnvironment, ReturnMode, WasmResult}; +use crate::state::{FuncTranslationState, ModuleTranslationState}; +use crate::translation_utils::get_vmctx_value_label; +use crate::wasm_unsupported; +use cranelift_codegen::entity::EntityRef; +use cranelift_codegen::ir::{self, Block, InstBuilder, ValueLabel}; +use cranelift_codegen::timing; +use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable}; +use log::info; +use wasmparser::{self, BinaryReader}; + +/// WebAssembly to Cranelift IR function translator. +/// +/// A `FuncTranslator` is used to translate a binary WebAssembly function into Cranelift IR guided +/// by a `FuncEnvironment` object. A single translator instance can be reused to translate multiple +/// functions which will reduce heap allocation traffic. +pub struct FuncTranslator { + func_ctx: FunctionBuilderContext, + state: FuncTranslationState, +} + +impl FuncTranslator { + /// Create a new translator. + pub fn new() -> Self { + Self { + func_ctx: FunctionBuilderContext::new(), + state: FuncTranslationState::new(), + } + } + + /// Translate a binary WebAssembly function. + /// + /// The `code` slice contains the binary WebAssembly *function code* as it appears in the code + /// section of a WebAssembly module, not including the initial size of the function code. The + /// slice is expected to contain two parts: + /// + /// - The declaration of *locals*, and + /// - The function *body* as an expression. + /// + /// See [the WebAssembly specification][wasm]. + /// + /// [wasm]: https://webassembly.github.io/spec/core/binary/modules.html#code-section + /// + /// The Cranelift IR function `func` should be completely empty except for the `func.signature` + /// and `func.name` fields. The signature may contain special-purpose arguments which are not + /// regarded as WebAssembly local variables. Any signature arguments marked as + /// `ArgumentPurpose::Normal` are made accessible as WebAssembly local variables. + /// + pub fn translate( + &mut self, + module_translation_state: &ModuleTranslationState, + code: &[u8], + code_offset: usize, + func: &mut ir::Function, + environ: &mut FE, + ) -> WasmResult<()> { + self.translate_from_reader( + module_translation_state, + BinaryReader::new_with_offset(code, code_offset), + func, + environ, + ) + } + + /// Translate a binary WebAssembly function from a `BinaryReader`. + pub fn translate_from_reader( + &mut self, + module_translation_state: &ModuleTranslationState, + mut reader: BinaryReader, + func: &mut ir::Function, + environ: &mut FE, + ) -> WasmResult<()> { + let _tt = timing::wasm_translate_function(); + info!( + "translate({} bytes, {}{})", + reader.bytes_remaining(), + func.name, + func.signature + ); + debug_assert_eq!(func.dfg.num_blocks(), 0, "Function must be empty"); + debug_assert_eq!(func.dfg.num_insts(), 0, "Function must be empty"); + + // This clears the `FunctionBuilderContext`. + let mut builder = FunctionBuilder::new(func, &mut self.func_ctx); + builder.set_srcloc(cur_srcloc(&reader)); + let entry_block = builder.create_block(); + builder.append_block_params_for_function_params(entry_block); + builder.switch_to_block(entry_block); // This also creates values for the arguments. + builder.seal_block(entry_block); // Declare all predecessors known. + + // Make sure the entry block is inserted in the layout before we make any callbacks to + // `environ`. The callback functions may need to insert things in the entry block. + builder.ensure_inserted_block(); + + let num_params = declare_wasm_parameters(&mut builder, entry_block, environ); + + // Set up the translation state with a single pushed control block representing the whole + // function and its return values. + let exit_block = builder.create_block(); + builder.append_block_params_for_function_returns(exit_block); + self.state.initialize(&builder.func.signature, exit_block); + + parse_local_decls(&mut reader, &mut builder, num_params, environ)?; + parse_function_body( + module_translation_state, + reader, + &mut builder, + &mut self.state, + environ, + )?; + + builder.finalize(); + Ok(()) + } +} + +/// Declare local variables for the signature parameters that correspond to WebAssembly locals. +/// +/// Return the number of local variables declared. +fn declare_wasm_parameters( + builder: &mut FunctionBuilder, + entry_block: Block, + environ: &FE, +) -> usize { + let sig_len = builder.func.signature.params.len(); + let mut next_local = 0; + for i in 0..sig_len { + let param_type = builder.func.signature.params[i]; + // There may be additional special-purpose parameters in addition to the normal WebAssembly + // signature parameters. For example, a `vmctx` pointer. + if environ.is_wasm_parameter(&builder.func.signature, i) { + // This is a normal WebAssembly signature parameter, so create a local for it. + let local = Variable::new(next_local); + builder.declare_var(local, param_type.value_type); + next_local += 1; + + let param_value = builder.block_params(entry_block)[i]; + builder.def_var(local, param_value); + } + if param_type.purpose == ir::ArgumentPurpose::VMContext { + let param_value = builder.block_params(entry_block)[i]; + builder.set_val_label(param_value, get_vmctx_value_label()); + } + } + + next_local +} + +/// Parse the local variable declarations that precede the function body. +/// +/// Declare local variables, starting from `num_params`. +fn parse_local_decls( + reader: &mut BinaryReader, + builder: &mut FunctionBuilder, + num_params: usize, + environ: &mut FE, +) -> WasmResult<()> { + let mut next_local = num_params; + let local_count = reader.read_local_count()?; + + let mut locals_total = 0; + for _ in 0..local_count { + builder.set_srcloc(cur_srcloc(reader)); + let (count, ty) = reader.read_local_decl(&mut locals_total)?; + declare_locals(builder, count, ty, &mut next_local, environ)?; + } + + Ok(()) +} + +/// Declare `count` local variables of the same type, starting from `next_local`. +/// +/// Fail of too many locals are declared in the function, or if the type is not valid for a local. +fn declare_locals( + builder: &mut FunctionBuilder, + count: u32, + wasm_type: wasmparser::Type, + next_local: &mut usize, + environ: &mut FE, +) -> WasmResult<()> { + // All locals are initialized to 0. + use wasmparser::Type::*; + let zeroval = match wasm_type { + I32 => builder.ins().iconst(ir::types::I32, 0), + I64 => builder.ins().iconst(ir::types::I64, 0), + F32 => builder.ins().f32const(ir::immediates::Ieee32::with_bits(0)), + F64 => builder.ins().f64const(ir::immediates::Ieee64::with_bits(0)), + V128 => { + let constant_handle = builder.func.dfg.constants.insert([0; 16].to_vec().into()); + builder.ins().vconst(ir::types::I8X16, constant_handle) + } + NullRef => builder.ins().null(environ.reference_type()), + AnyRef => builder.ins().null(environ.reference_type()), + AnyFunc => builder.ins().null(environ.reference_type()), + ty => return Err(wasm_unsupported!("unsupported local type {:?}", ty)), + }; + + let ty = builder.func.dfg.value_type(zeroval); + for _ in 0..count { + let local = Variable::new(*next_local); + builder.declare_var(local, ty); + builder.def_var(local, zeroval); + builder.set_val_label(zeroval, ValueLabel::new(*next_local)); + *next_local += 1; + } + Ok(()) +} + +/// Parse the function body in `reader`. +/// +/// This assumes that the local variable declarations have already been parsed and function +/// arguments and locals are declared in the builder. +fn parse_function_body( + module_translation_state: &ModuleTranslationState, + mut reader: BinaryReader, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + // The control stack is initialized with a single block representing the whole function. + debug_assert_eq!(state.control_stack.len(), 1, "State not initialized"); + + // Keep going until the final `End` operator which pops the outermost block. + while !state.control_stack.is_empty() { + builder.set_srcloc(cur_srcloc(&reader)); + let op = reader.read_operator()?; + environ.before_translate_operator(&op, builder, state)?; + translate_operator(module_translation_state, &op, builder, state, environ)?; + environ.after_translate_operator(&op, builder, state)?; + } + + // The final `End` operator left us in the exit block where we need to manually add a return + // instruction. + // + // If the exit block is unreachable, it may not have the correct arguments, so we would + // generate a return instruction that doesn't match the signature. + if state.reachable { + debug_assert!(builder.is_pristine()); + if !builder.is_unreachable() { + match environ.return_mode() { + ReturnMode::NormalReturns => { + let return_types = wasm_param_types(&builder.func.signature.returns, |i| { + environ.is_wasm_return(&builder.func.signature, i) + }); + bitcast_arguments(&mut state.stack, &return_types, builder); + builder.ins().return_(&state.stack) + } + ReturnMode::FallthroughReturn => builder.ins().fallthrough_return(&state.stack), + }; + } + } + + // Discard any remaining values on the stack. Either we just returned them, + // or the end of the function is unreachable. + state.stack.clear(); + + debug_assert!(reader.eof()); + + Ok(()) +} + +/// Get the current source location from a reader. +fn cur_srcloc(reader: &BinaryReader) -> ir::SourceLoc { + // We record source locations as byte code offsets relative to the beginning of the file. + // This will wrap around if byte code is larger than 4 GB. + ir::SourceLoc::new(reader.original_position() as u32) +} + +#[cfg(test)] +mod tests { + use super::{FuncTranslator, ReturnMode}; + use crate::environ::DummyEnvironment; + use crate::ModuleTranslationState; + use cranelift_codegen::ir::types::I32; + use cranelift_codegen::{ir, isa, settings, Context}; + use log::debug; + use target_lexicon::PointerWidth; + + #[test] + fn small1() { + // Implicit return. + // + // (func $small1 (param i32) (result i32) + // (i32.add (get_local 0) (i32.const 1)) + // ) + const BODY: [u8; 7] = [ + 0x00, // local decl count + 0x20, 0x00, // get_local 0 + 0x41, 0x01, // i32.const 1 + 0x6a, // i32.add + 0x0b, // end + ]; + + let mut trans = FuncTranslator::new(); + let flags = settings::Flags::new(settings::builder()); + let runtime = DummyEnvironment::new( + isa::TargetFrontendConfig { + default_call_conv: isa::CallConv::Fast, + pointer_width: PointerWidth::U64, + }, + ReturnMode::NormalReturns, + false, + ); + + let module_translation_state = ModuleTranslationState::new(); + let mut ctx = Context::new(); + + ctx.func.name = ir::ExternalName::testcase("small1"); + ctx.func.signature.params.push(ir::AbiParam::new(I32)); + ctx.func.signature.returns.push(ir::AbiParam::new(I32)); + + trans + .translate( + &module_translation_state, + &BODY, + 0, + &mut ctx.func, + &mut runtime.func_env(), + ) + .unwrap(); + debug!("{}", ctx.func.display(None)); + ctx.verify(&flags).unwrap(); + } + + #[test] + fn small2() { + // Same as above, but with an explicit return instruction. + // + // (func $small2 (param i32) (result i32) + // (return (i32.add (get_local 0) (i32.const 1))) + // ) + const BODY: [u8; 8] = [ + 0x00, // local decl count + 0x20, 0x00, // get_local 0 + 0x41, 0x01, // i32.const 1 + 0x6a, // i32.add + 0x0f, // return + 0x0b, // end + ]; + + let mut trans = FuncTranslator::new(); + let flags = settings::Flags::new(settings::builder()); + let runtime = DummyEnvironment::new( + isa::TargetFrontendConfig { + default_call_conv: isa::CallConv::Fast, + pointer_width: PointerWidth::U64, + }, + ReturnMode::NormalReturns, + false, + ); + + let module_translation_state = ModuleTranslationState::new(); + let mut ctx = Context::new(); + + ctx.func.name = ir::ExternalName::testcase("small2"); + ctx.func.signature.params.push(ir::AbiParam::new(I32)); + ctx.func.signature.returns.push(ir::AbiParam::new(I32)); + + trans + .translate( + &module_translation_state, + &BODY, + 0, + &mut ctx.func, + &mut runtime.func_env(), + ) + .unwrap(); + debug!("{}", ctx.func.display(None)); + ctx.verify(&flags).unwrap(); + } + + #[test] + fn infloop() { + // An infinite loop, no return instructions. + // + // (func $infloop (result i32) + // (local i32) + // (loop (result i32) + // (i32.add (get_local 0) (i32.const 1)) + // (set_local 0) + // (br 0) + // ) + // ) + const BODY: [u8; 16] = [ + 0x01, // 1 local decl. + 0x01, 0x7f, // 1 i32 local. + 0x03, 0x7f, // loop i32 + 0x20, 0x00, // get_local 0 + 0x41, 0x01, // i32.const 0 + 0x6a, // i32.add + 0x21, 0x00, // set_local 0 + 0x0c, 0x00, // br 0 + 0x0b, // end + 0x0b, // end + ]; + + let mut trans = FuncTranslator::new(); + let flags = settings::Flags::new(settings::builder()); + let runtime = DummyEnvironment::new( + isa::TargetFrontendConfig { + default_call_conv: isa::CallConv::Fast, + pointer_width: PointerWidth::U64, + }, + ReturnMode::NormalReturns, + false, + ); + + let module_translation_state = ModuleTranslationState::new(); + let mut ctx = Context::new(); + + ctx.func.name = ir::ExternalName::testcase("infloop"); + ctx.func.signature.returns.push(ir::AbiParam::new(I32)); + + trans + .translate( + &module_translation_state, + &BODY, + 0, + &mut ctx.func, + &mut runtime.func_env(), + ) + .unwrap(); + debug!("{}", ctx.func.display(None)); + ctx.verify(&flags).unwrap(); + } +} diff --git a/cranelift/wasm/src/lib.rs b/cranelift/wasm/src/lib.rs new file mode 100644 index 0000000000..b98c95b466 --- /dev/null +++ b/cranelift/wasm/src/lib.rs @@ -0,0 +1,75 @@ +//! Performs translation from a wasm module in binary format to the in-memory form +//! of Cranelift IR. More particularly, it translates the code of all the functions bodies and +//! interacts with an environment implementing the +//! [`ModuleEnvironment`](trait.ModuleEnvironment.html) +//! trait to deal with tables, globals and linear memory. +//! +//! The crate provides a `DummyEnvironment` struct that will allow to translate the code of the +//! functions but will fail at execution. +//! +//! The main function of this module is [`translate_module`](fn.translate_module.html). + +#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] +#![warn(unused_import_braces)] +#![cfg_attr(feature = "std", deny(unstable_features))] +#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::new_without_default))] +#![cfg_attr( + feature = "cargo-clippy", + warn( + clippy::float_arithmetic, + clippy::mut_mut, + clippy::nonminimal_bool, + clippy::option_map_unwrap_or, + clippy::option_map_unwrap_or_else, + clippy::print_stdout, + clippy::unicode_not_nfc, + clippy::use_self + ) +)] +#![no_std] + +#[cfg(not(feature = "std"))] +#[macro_use] +extern crate alloc as std; +#[cfg(feature = "std")] +#[macro_use] +extern crate std; + +#[cfg(not(feature = "std"))] +use hashbrown::{ + hash_map, + hash_map::Entry::{Occupied, Vacant}, + HashMap, +}; +#[cfg(feature = "std")] +use std::collections::{ + hash_map, + hash_map::Entry::{Occupied, Vacant}, + HashMap, +}; + +mod code_translator; +mod environ; +mod func_translator; +mod module_translator; +mod sections_translator; +mod state; +mod translation_utils; + +pub use crate::environ::{ + DummyEnvironment, FuncEnvironment, GlobalVariable, ModuleEnvironment, ReturnMode, + TargetEnvironment, WasmError, WasmResult, +}; +pub use crate::func_translator::FuncTranslator; +pub use crate::module_translator::translate_module; +pub use crate::state::func_state::FuncTranslationState; +pub use crate::state::module_state::ModuleTranslationState; +pub use crate::translation_utils::{ + get_vmctx_value_label, DefinedFuncIndex, DefinedGlobalIndex, DefinedMemoryIndex, + DefinedTableIndex, FuncIndex, Global, GlobalIndex, GlobalInit, Memory, MemoryIndex, + PassiveDataIndex, PassiveElemIndex, SignatureIndex, Table, TableElementType, TableIndex, +}; + +/// Version number of this crate. +pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/wasm/src/module_translator.rs b/cranelift/wasm/src/module_translator.rs new file mode 100644 index 0000000000..e31e6f09f8 --- /dev/null +++ b/cranelift/wasm/src/module_translator.rs @@ -0,0 +1,93 @@ +//! Translation skeleton that traverses the whole WebAssembly module and call helper functions +//! to deal with each part of it. +use crate::environ::{ModuleEnvironment, WasmResult}; +use crate::sections_translator::{ + parse_code_section, parse_data_section, parse_element_section, parse_export_section, + parse_function_section, parse_global_section, parse_import_section, parse_memory_section, + parse_name_section, parse_start_section, parse_table_section, parse_type_section, +}; +use crate::state::ModuleTranslationState; +use cranelift_codegen::timing; +use wasmparser::{CustomSectionContent, ModuleReader, SectionContent}; + +/// Translate a sequence of bytes forming a valid Wasm binary into a list of valid Cranelift IR +/// [`Function`](cranelift_codegen::ir::Function). +pub fn translate_module<'data>( + data: &'data [u8], + environ: &mut dyn ModuleEnvironment<'data>, +) -> WasmResult { + let _tt = timing::wasm_translate_module(); + let mut reader = ModuleReader::new(data)?; + let mut module_translation_state = ModuleTranslationState::new(); + + while !reader.eof() { + let section = reader.read()?; + match section.content()? { + SectionContent::Type(types) => { + parse_type_section(types, &mut module_translation_state, environ)?; + } + + SectionContent::Import(imports) => { + parse_import_section(imports, environ)?; + } + + SectionContent::Function(functions) => { + parse_function_section(functions, environ)?; + } + + SectionContent::Table(tables) => { + parse_table_section(tables, environ)?; + } + + SectionContent::Memory(memories) => { + parse_memory_section(memories, environ)?; + } + + SectionContent::Global(globals) => { + parse_global_section(globals, environ)?; + } + + SectionContent::Export(exports) => { + parse_export_section(exports, environ)?; + } + + SectionContent::Start(start) => { + parse_start_section(start, environ)?; + } + + SectionContent::Element(elements) => { + parse_element_section(elements, environ)?; + } + + SectionContent::Code(code) => { + parse_code_section(code, &module_translation_state, environ)?; + } + + SectionContent::Data(data) => { + parse_data_section(data, environ)?; + } + + SectionContent::DataCount(count) => { + environ.reserve_passive_data(count)?; + } + + SectionContent::Custom { + name, + binary, + content, + } => match content { + Some(CustomSectionContent::Name(names)) => { + parse_name_section(names, environ)?; + } + _ => { + let mut reader = binary.clone(); + let len = reader.bytes_remaining(); + let payload = reader.read_bytes(len)?; + environ.custom_section(name, payload)?; + } + }, + } + } + + Ok(module_translation_state) +} diff --git a/cranelift/wasm/src/sections_translator.rs b/cranelift/wasm/src/sections_translator.rs new file mode 100644 index 0000000000..6759a60c3f --- /dev/null +++ b/cranelift/wasm/src/sections_translator.rs @@ -0,0 +1,463 @@ +//! Helper functions to gather information for each of the non-function sections of a +//! WebAssembly module. +//! +//! The code of these helper functions is straightforward since they only read metadata +//! about linear memories, tables, globals, etc. and store them for later use. +//! +//! The special case of the initialize expressions for table elements offsets or global variables +//! is handled, according to the semantics of WebAssembly, to only specific expressions that are +//! interpreted on the fly. +use crate::environ::{ModuleEnvironment, WasmError, WasmResult}; +use crate::state::ModuleTranslationState; +use crate::translation_utils::{ + tabletype_to_type, type_to_type, FuncIndex, Global, GlobalIndex, GlobalInit, Memory, + MemoryIndex, PassiveDataIndex, PassiveElemIndex, SignatureIndex, Table, TableElementType, + TableIndex, +}; +use crate::{wasm_unsupported, HashMap}; +use core::convert::TryFrom; +use cranelift_codegen::ir::immediates::V128Imm; +use cranelift_codegen::ir::{self, AbiParam, Signature}; +use cranelift_entity::packed_option::ReservedValue; +use cranelift_entity::EntityRef; +use std::boxed::Box; +use std::vec::Vec; +use wasmparser::{ + self, CodeSectionReader, Data, DataKind, DataSectionReader, Element, ElementItem, ElementItems, + ElementKind, ElementSectionReader, Export, ExportSectionReader, ExternalKind, FuncType, + FunctionSectionReader, GlobalSectionReader, GlobalType, ImportSectionEntryType, + ImportSectionReader, MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader, + Operator, TableSectionReader, Type, TypeSectionReader, +}; + +/// Parses the Type section of the wasm module. +pub fn parse_type_section( + types: TypeSectionReader, + module_translation_state: &mut ModuleTranslationState, + environ: &mut dyn ModuleEnvironment, +) -> WasmResult<()> { + let count = types.get_count(); + module_translation_state.wasm_types.reserve(count as usize); + environ.reserve_signatures(count)?; + + for entry in types { + match entry? { + FuncType { + form: wasmparser::Type::Func, + params, + returns, + } => { + let mut sig = + Signature::new(ModuleEnvironment::target_config(environ).default_call_conv); + sig.params.extend(params.iter().map(|ty| { + let cret_arg: ir::Type = type_to_type(*ty, environ) + .expect("only numeric types are supported in function signatures"); + AbiParam::new(cret_arg) + })); + sig.returns.extend(returns.iter().map(|ty| { + let cret_arg: ir::Type = type_to_type(*ty, environ) + .expect("only numeric types are supported in function signatures"); + AbiParam::new(cret_arg) + })); + environ.declare_signature(sig)?; + module_translation_state.wasm_types.push((params, returns)); + } + ty => { + return Err(wasm_unsupported!( + "unsupported type in type section: {:?}", + ty + )) + } + } + } + Ok(()) +} + +/// Parses the Import section of the wasm module. +pub fn parse_import_section<'data>( + imports: ImportSectionReader<'data>, + environ: &mut dyn ModuleEnvironment<'data>, +) -> WasmResult<()> { + environ.reserve_imports(imports.get_count())?; + + for entry in imports { + let import = entry?; + let module_name = import.module; + let field_name = import.field; + + match import.ty { + ImportSectionEntryType::Function(sig) => { + environ.declare_func_import( + SignatureIndex::from_u32(sig), + module_name, + field_name, + )?; + } + ImportSectionEntryType::Memory(MemoryType { + limits: ref memlimits, + shared, + }) => { + environ.declare_memory_import( + Memory { + minimum: memlimits.initial, + maximum: memlimits.maximum, + shared, + }, + module_name, + field_name, + )?; + } + ImportSectionEntryType::Global(ref ty) => { + environ.declare_global_import( + Global { + ty: type_to_type(ty.content_type, environ).unwrap(), + mutability: ty.mutable, + initializer: GlobalInit::Import, + }, + module_name, + field_name, + )?; + } + ImportSectionEntryType::Table(ref tab) => { + environ.declare_table_import( + Table { + ty: match tabletype_to_type(tab.element_type, environ)? { + Some(t) => TableElementType::Val(t), + None => TableElementType::Func, + }, + minimum: tab.limits.initial, + maximum: tab.limits.maximum, + }, + module_name, + field_name, + )?; + } + } + } + + environ.finish_imports()?; + Ok(()) +} + +/// Parses the Function section of the wasm module. +pub fn parse_function_section( + functions: FunctionSectionReader, + environ: &mut dyn ModuleEnvironment, +) -> WasmResult<()> { + let num_functions = functions.get_count(); + if num_functions == std::u32::MAX { + // We reserve `u32::MAX` for our own use in cranelift-entity. + return Err(WasmError::ImplLimitExceeded); + } + + environ.reserve_func_types(num_functions)?; + + for entry in functions { + let sigindex = entry?; + environ.declare_func_type(SignatureIndex::from_u32(sigindex))?; + } + + Ok(()) +} + +/// Parses the Table section of the wasm module. +pub fn parse_table_section( + tables: TableSectionReader, + environ: &mut dyn ModuleEnvironment, +) -> WasmResult<()> { + environ.reserve_tables(tables.get_count())?; + + for entry in tables { + let table = entry?; + environ.declare_table(Table { + ty: match tabletype_to_type(table.element_type, environ)? { + Some(t) => TableElementType::Val(t), + None => TableElementType::Func, + }, + minimum: table.limits.initial, + maximum: table.limits.maximum, + })?; + } + + Ok(()) +} + +/// Parses the Memory section of the wasm module. +pub fn parse_memory_section( + memories: MemorySectionReader, + environ: &mut dyn ModuleEnvironment, +) -> WasmResult<()> { + environ.reserve_memories(memories.get_count())?; + + for entry in memories { + let memory = entry?; + environ.declare_memory(Memory { + minimum: memory.limits.initial, + maximum: memory.limits.maximum, + shared: memory.shared, + })?; + } + + Ok(()) +} + +/// Parses the Global section of the wasm module. +pub fn parse_global_section( + globals: GlobalSectionReader, + environ: &mut dyn ModuleEnvironment, +) -> WasmResult<()> { + environ.reserve_globals(globals.get_count())?; + + for entry in globals { + let wasmparser::Global { + ty: GlobalType { + content_type, + mutable, + }, + init_expr, + } = entry?; + let mut init_expr_reader = init_expr.get_binary_reader(); + let initializer = match init_expr_reader.read_operator()? { + Operator::I32Const { value } => GlobalInit::I32Const(value), + Operator::I64Const { value } => GlobalInit::I64Const(value), + Operator::F32Const { value } => GlobalInit::F32Const(value.bits()), + Operator::F64Const { value } => GlobalInit::F64Const(value.bits()), + Operator::V128Const { value } => { + GlobalInit::V128Const(V128Imm::from(value.bytes().to_vec().as_slice())) + } + Operator::RefNull => GlobalInit::RefNullConst, + Operator::RefFunc { function_index } => { + GlobalInit::RefFunc(FuncIndex::from_u32(function_index)) + } + Operator::GlobalGet { global_index } => { + GlobalInit::GetGlobal(GlobalIndex::from_u32(global_index)) + } + ref s => { + return Err(wasm_unsupported!( + "unsupported init expr in global section: {:?}", + s + )); + } + }; + let global = Global { + ty: type_to_type(content_type, environ).unwrap(), + mutability: mutable, + initializer, + }; + environ.declare_global(global)?; + } + + Ok(()) +} + +/// Parses the Export section of the wasm module. +pub fn parse_export_section<'data>( + exports: ExportSectionReader<'data>, + environ: &mut dyn ModuleEnvironment<'data>, +) -> WasmResult<()> { + environ.reserve_exports(exports.get_count())?; + + for entry in exports { + let Export { + field, + ref kind, + index, + } = entry?; + + // The input has already been validated, so we should be able to + // assume valid UTF-8 and use `from_utf8_unchecked` if performance + // becomes a concern here. + let index = index as usize; + match *kind { + ExternalKind::Function => environ.declare_func_export(FuncIndex::new(index), field)?, + ExternalKind::Table => environ.declare_table_export(TableIndex::new(index), field)?, + ExternalKind::Memory => { + environ.declare_memory_export(MemoryIndex::new(index), field)? + } + ExternalKind::Global => { + environ.declare_global_export(GlobalIndex::new(index), field)? + } + } + } + + environ.finish_exports()?; + Ok(()) +} + +/// Parses the Start section of the wasm module. +pub fn parse_start_section(index: u32, environ: &mut dyn ModuleEnvironment) -> WasmResult<()> { + environ.declare_start_func(FuncIndex::from_u32(index))?; + Ok(()) +} + +fn read_elems(items: &ElementItems) -> WasmResult> { + let items_reader = items.get_items_reader()?; + let mut elems = Vec::with_capacity(usize::try_from(items_reader.get_count()).unwrap()); + for item in items_reader { + let elem = match item? { + ElementItem::Null => FuncIndex::reserved_value(), + ElementItem::Func(index) => FuncIndex::from_u32(index), + }; + elems.push(elem); + } + Ok(elems.into_boxed_slice()) +} + +/// Parses the Element section of the wasm module. +pub fn parse_element_section<'data>( + elements: ElementSectionReader<'data>, + environ: &mut dyn ModuleEnvironment, +) -> WasmResult<()> { + environ.reserve_table_elements(elements.get_count())?; + + for (index, entry) in elements.into_iter().enumerate() { + let Element { kind, items, ty } = entry?; + if ty != Type::AnyFunc { + return Err(wasm_unsupported!( + "unsupported table element type: {:?}", + ty + )); + } + let segments = read_elems(&items)?; + match kind { + ElementKind::Active { + table_index, + init_expr, + } => { + let mut init_expr_reader = init_expr.get_binary_reader(); + let (base, offset) = match init_expr_reader.read_operator()? { + Operator::I32Const { value } => (None, value as u32 as usize), + Operator::GlobalGet { global_index } => { + (Some(GlobalIndex::from_u32(global_index)), 0) + } + ref s => { + return Err(wasm_unsupported!( + "unsupported init expr in element section: {:?}", + s + )); + } + }; + environ.declare_table_elements( + TableIndex::from_u32(table_index), + base, + offset, + segments, + )? + } + ElementKind::Passive => { + let index = PassiveElemIndex::from_u32(index as u32); + environ.declare_passive_element(index, segments)?; + } + ElementKind::Declared => return Err(wasm_unsupported!("element kind declared")), + } + } + Ok(()) +} + +/// Parses the Code section of the wasm module. +pub fn parse_code_section<'data>( + code: CodeSectionReader<'data>, + module_translation_state: &ModuleTranslationState, + environ: &mut dyn ModuleEnvironment<'data>, +) -> WasmResult<()> { + for body in code { + let mut reader = body?.get_binary_reader(); + let size = reader.bytes_remaining(); + let offset = reader.original_position(); + environ.define_function_body(module_translation_state, reader.read_bytes(size)?, offset)?; + } + Ok(()) +} + +/// Parses the Data section of the wasm module. +pub fn parse_data_section<'data>( + data: DataSectionReader<'data>, + environ: &mut dyn ModuleEnvironment<'data>, +) -> WasmResult<()> { + environ.reserve_data_initializers(data.get_count())?; + + for (index, entry) in data.into_iter().enumerate() { + let Data { kind, data } = entry?; + match kind { + DataKind::Active { + memory_index, + init_expr, + } => { + let mut init_expr_reader = init_expr.get_binary_reader(); + let (base, offset) = match init_expr_reader.read_operator()? { + Operator::I32Const { value } => (None, value as u32 as usize), + Operator::GlobalGet { global_index } => { + (Some(GlobalIndex::from_u32(global_index)), 0) + } + ref s => { + return Err(wasm_unsupported!( + "unsupported init expr in data section: {:?}", + s + )) + } + }; + environ.declare_data_initialization( + MemoryIndex::from_u32(memory_index), + base, + offset, + data, + )?; + } + DataKind::Passive => { + let index = PassiveDataIndex::from_u32(index as u32); + environ.declare_passive_data(index, data)?; + } + } + } + + Ok(()) +} + +/// Parses the Name section of the wasm module. +pub fn parse_name_section<'data>( + mut names: NameSectionReader<'data>, + environ: &mut dyn ModuleEnvironment<'data>, +) -> WasmResult<()> { + while let Ok(subsection) = names.read() { + match subsection { + wasmparser::Name::Function(function_subsection) => { + if let Some(function_names) = function_subsection + .get_map() + .ok() + .and_then(parse_function_name_subsection) + { + for (index, name) in function_names { + environ.declare_func_name(index, name)?; + } + } + return Ok(()); + } + wasmparser::Name::Local(_) | wasmparser::Name::Module(_) => {} + }; + } + Ok(()) +} + +fn parse_function_name_subsection( + mut naming_reader: NamingReader<'_>, +) -> Option> { + let mut function_names = HashMap::new(); + for _ in 0..naming_reader.get_count() { + let Naming { index, name } = naming_reader.read().ok()?; + if index == std::u32::MAX { + // We reserve `u32::MAX` for our own use in cranelift-entity. + return None; + } + + if function_names + .insert(FuncIndex::from_u32(index), name) + .is_some() + { + // If the function index has been previously seen, then we + // break out of the loop and early return `None`, because these + // should be unique. + return None; + } + } + Some(function_names) +} diff --git a/cranelift/wasm/src/state/func_state.rs b/cranelift/wasm/src/state/func_state.rs new file mode 100644 index 0000000000..81a0d35e97 --- /dev/null +++ b/cranelift/wasm/src/state/func_state.rs @@ -0,0 +1,514 @@ +//! WebAssembly module and function translation state. +//! +//! The `ModuleTranslationState` struct defined in this module is used to keep track of data about +//! the whole WebAssembly module, such as the decoded type signatures. +//! +//! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly +//! value and control stacks during the translation of a single function. + +use crate::environ::{FuncEnvironment, GlobalVariable, WasmResult}; +use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, SignatureIndex, TableIndex}; +use crate::{HashMap, Occupied, Vacant}; +use cranelift_codegen::ir::{self, Block, Inst, Value}; +use std::vec::Vec; + +/// Information about the presence of an associated `else` for an `if`, or the +/// lack thereof. +#[derive(Debug)] +pub enum ElseData { + /// The `if` does not already have an `else` block. + /// + /// This doesn't mean that it will never have an `else`, just that we + /// haven't seen it yet. + NoElse { + /// If we discover that we need an `else` block, this is the jump + /// instruction that needs to be fixed up to point to the new `else` + /// block rather than the destination block after the `if...end`. + branch_inst: Inst, + }, + + /// We have already allocated an `else` block. + /// + /// Usually we don't know whether we will hit an `if .. end` or an `if + /// .. else .. end`, but sometimes we can tell based on the block's type + /// signature that the signature is not valid if there isn't an `else`. In + /// these cases, we pre-allocate the `else` block. + WithElse { + /// This is the `else` block. + else_block: Block, + }, +} + +/// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following +/// fields: +/// +/// - `destination`: reference to the `Block` that will hold the code after the control block; +/// - `num_return_values`: number of values returned by the control block; +/// - `original_stack_size`: size of the value stack at the beginning of the control block. +/// +/// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction +/// separating the `true` and `false` branch. The `loop` frame has a `header` field that references +/// the `Block` that contains the beginning of the body of the loop. +#[derive(Debug)] +pub enum ControlStackFrame { + If { + destination: Block, + else_data: ElseData, + num_param_values: usize, + num_return_values: usize, + original_stack_size: usize, + exit_is_branched_to: bool, + blocktype: wasmparser::TypeOrFuncType, + /// Was the head of the `if` reachable? + head_is_reachable: bool, + /// What was the reachability at the end of the consequent? + /// + /// This is `None` until we're finished translating the consequent, and + /// is set to `Some` either by hitting an `else` when we will begin + /// translating the alternative, or by hitting an `end` in which case + /// there is no alternative. + consequent_ends_reachable: Option, + // Note: no need for `alternative_ends_reachable` because that is just + // `state.reachable` when we hit the `end` in the `if .. else .. end`. + }, + Block { + destination: Block, + num_param_values: usize, + num_return_values: usize, + original_stack_size: usize, + exit_is_branched_to: bool, + }, + Loop { + destination: Block, + header: Block, + num_param_values: usize, + num_return_values: usize, + original_stack_size: usize, + }, +} + +/// Helper methods for the control stack objects. +impl ControlStackFrame { + pub fn num_return_values(&self) -> usize { + match *self { + Self::If { + num_return_values, .. + } + | Self::Block { + num_return_values, .. + } + | Self::Loop { + num_return_values, .. + } => num_return_values, + } + } + pub fn num_param_values(&self) -> usize { + match *self { + Self::If { + num_param_values, .. + } + | Self::Block { + num_param_values, .. + } + | Self::Loop { + num_param_values, .. + } => num_param_values, + } + } + pub fn following_code(&self) -> Block { + match *self { + Self::If { destination, .. } + | Self::Block { destination, .. } + | Self::Loop { destination, .. } => destination, + } + } + pub fn br_destination(&self) -> Block { + match *self { + Self::If { destination, .. } | Self::Block { destination, .. } => destination, + Self::Loop { header, .. } => header, + } + } + pub fn original_stack_size(&self) -> usize { + match *self { + Self::If { + original_stack_size, + .. + } + | Self::Block { + original_stack_size, + .. + } + | Self::Loop { + original_stack_size, + .. + } => original_stack_size, + } + } + pub fn is_loop(&self) -> bool { + match *self { + Self::If { .. } | Self::Block { .. } => false, + Self::Loop { .. } => true, + } + } + + pub fn exit_is_branched_to(&self) -> bool { + match *self { + Self::If { + exit_is_branched_to, + .. + } + | Self::Block { + exit_is_branched_to, + .. + } => exit_is_branched_to, + Self::Loop { .. } => false, + } + } + + pub fn set_branched_to_exit(&mut self) { + match *self { + Self::If { + ref mut exit_is_branched_to, + .. + } + | Self::Block { + ref mut exit_is_branched_to, + .. + } => *exit_is_branched_to = true, + Self::Loop { .. } => {} + } + } +} + +/// Contains information passed along during a function's translation and that records: +/// +/// - The current value and control stacks. +/// - The depth of the two unreachable control blocks stacks, that are manipulated when translating +/// unreachable code; +pub struct FuncTranslationState { + /// A stack of values corresponding to the active values in the input wasm function at this + /// point. + pub(crate) stack: Vec, + /// A stack of active control flow operations at this point in the input wasm function. + pub(crate) control_stack: Vec, + /// Is the current translation state still reachable? This is false when translating operators + /// like End, Return, or Unreachable. + pub(crate) reachable: bool, + + // Map of global variables that have already been created by `FuncEnvironment::make_global`. + globals: HashMap, + + // Map of heaps that have been created by `FuncEnvironment::make_heap`. + heaps: HashMap, + + // Map of tables that have been created by `FuncEnvironment::make_table`. + tables: HashMap, + + // Map of indirect call signatures that have been created by + // `FuncEnvironment::make_indirect_sig()`. + // Stores both the signature reference and the number of WebAssembly arguments + signatures: HashMap, + + // Imported and local functions that have been created by + // `FuncEnvironment::make_direct_func()`. + // Stores both the function reference and the number of WebAssembly arguments + functions: HashMap, +} + +// Public methods that are exposed to non-`cranelift_wasm` API consumers. +impl FuncTranslationState { + /// True if the current translation state expresses reachable code, false if it is unreachable. + #[inline] + pub fn reachable(&self) -> bool { + self.reachable + } +} + +impl FuncTranslationState { + /// Construct a new, empty, `FuncTranslationState` + pub(crate) fn new() -> Self { + Self { + stack: Vec::new(), + control_stack: Vec::new(), + reachable: true, + globals: HashMap::new(), + heaps: HashMap::new(), + tables: HashMap::new(), + signatures: HashMap::new(), + functions: HashMap::new(), + } + } + + fn clear(&mut self) { + debug_assert!(self.stack.is_empty()); + debug_assert!(self.control_stack.is_empty()); + self.reachable = true; + self.globals.clear(); + self.heaps.clear(); + self.tables.clear(); + self.signatures.clear(); + self.functions.clear(); + } + + /// Initialize the state for compiling a function with the given signature. + /// + /// This resets the state to containing only a single block representing the whole function. + /// The exit block is the last block in the function which will contain the return instruction. + pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) { + self.clear(); + self.push_block( + exit_block, + 0, + sig.returns + .iter() + .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal) + .count(), + ); + } + + /// Push a value. + pub(crate) fn push1(&mut self, val: Value) { + self.stack.push(val); + } + + /// Push multiple values. + pub(crate) fn pushn(&mut self, vals: &[Value]) { + self.stack.extend_from_slice(vals); + } + + /// Pop one value. + pub(crate) fn pop1(&mut self) -> Value { + self.stack + .pop() + .expect("attempted to pop a value from an empty stack") + } + + /// Peek at the top of the stack without popping it. + pub(crate) fn peek1(&self) -> Value { + *self + .stack + .last() + .expect("attempted to peek at a value on an empty stack") + } + + /// Pop two values. Return them in the order they were pushed. + pub(crate) fn pop2(&mut self) -> (Value, Value) { + let v2 = self.stack.pop().unwrap(); + let v1 = self.stack.pop().unwrap(); + (v1, v2) + } + + /// Pop three values. Return them in the order they were pushed. + pub(crate) fn pop3(&mut self) -> (Value, Value, Value) { + let v3 = self.stack.pop().unwrap(); + let v2 = self.stack.pop().unwrap(); + let v1 = self.stack.pop().unwrap(); + (v1, v2, v3) + } + + /// Helper to ensure the the stack size is at least as big as `n`; note that due to + /// `debug_assert` this will not execute in non-optimized builds. + #[inline] + fn ensure_length_is_at_least(&self, n: usize) { + debug_assert!( + n <= self.stack.len(), + "attempted to access {} values but stack only has {} values", + n, + self.stack.len() + ) + } + + /// Pop the top `n` values on the stack. + /// + /// The popped values are not returned. Use `peekn` to look at them before popping. + pub(crate) fn popn(&mut self, n: usize) { + self.ensure_length_is_at_least(n); + let new_len = self.stack.len() - n; + self.stack.truncate(new_len); + } + + /// Peek at the top `n` values on the stack in the order they were pushed. + pub(crate) fn peekn(&self, n: usize) -> &[Value] { + self.ensure_length_is_at_least(n); + &self.stack[self.stack.len() - n..] + } + + /// Peek at the top `n` values on the stack in the order they were pushed. + pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] { + self.ensure_length_is_at_least(n); + let len = self.stack.len(); + &mut self.stack[len - n..] + } + + /// Push a block on the control stack. + pub(crate) fn push_block( + &mut self, + following_code: Block, + num_param_types: usize, + num_result_types: usize, + ) { + debug_assert!(num_param_types <= self.stack.len()); + self.control_stack.push(ControlStackFrame::Block { + destination: following_code, + original_stack_size: self.stack.len() - num_param_types, + num_param_values: num_param_types, + num_return_values: num_result_types, + exit_is_branched_to: false, + }); + } + + /// Push a loop on the control stack. + pub(crate) fn push_loop( + &mut self, + header: Block, + following_code: Block, + num_param_types: usize, + num_result_types: usize, + ) { + debug_assert!(num_param_types <= self.stack.len()); + self.control_stack.push(ControlStackFrame::Loop { + header, + destination: following_code, + original_stack_size: self.stack.len() - num_param_types, + num_param_values: num_param_types, + num_return_values: num_result_types, + }); + } + + /// Push an if on the control stack. + pub(crate) fn push_if( + &mut self, + destination: Block, + else_data: ElseData, + num_param_types: usize, + num_result_types: usize, + blocktype: wasmparser::TypeOrFuncType, + ) { + debug_assert!(num_param_types <= self.stack.len()); + + // Push a second copy of our `if`'s parameters on the stack. This lets + // us avoid saving them on the side in the `ControlStackFrame` for our + // `else` block (if it exists), which would require a second heap + // allocation. See also the comment in `translate_operator` for + // `Operator::Else`. + self.stack.reserve(num_param_types); + for i in (self.stack.len() - num_param_types)..self.stack.len() { + let val = self.stack[i]; + self.stack.push(val); + } + + self.control_stack.push(ControlStackFrame::If { + destination, + else_data, + original_stack_size: self.stack.len() - num_param_types, + num_param_values: num_param_types, + num_return_values: num_result_types, + exit_is_branched_to: false, + head_is_reachable: self.reachable, + consequent_ends_reachable: None, + blocktype, + }); + } +} + +/// Methods for handling entity references. +impl FuncTranslationState { + /// Get the `GlobalVariable` reference that should be used to access the global variable + /// `index`. Create the reference if necessary. + /// Also return the WebAssembly type of the global. + pub(crate) fn get_global( + &mut self, + func: &mut ir::Function, + index: u32, + environ: &mut FE, + ) -> WasmResult { + let index = GlobalIndex::from_u32(index); + match self.globals.entry(index) { + Occupied(entry) => Ok(*entry.get()), + Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)), + } + } + + /// Get the `Heap` reference that should be used to access linear memory `index`. + /// Create the reference if necessary. + pub(crate) fn get_heap( + &mut self, + func: &mut ir::Function, + index: u32, + environ: &mut FE, + ) -> WasmResult { + let index = MemoryIndex::from_u32(index); + match self.heaps.entry(index) { + Occupied(entry) => Ok(*entry.get()), + Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)), + } + } + + /// Get the `Table` reference that should be used to access table `index`. + /// Create the reference if necessary. + pub(crate) fn get_table( + &mut self, + func: &mut ir::Function, + index: u32, + environ: &mut FE, + ) -> WasmResult { + let index = TableIndex::from_u32(index); + match self.tables.entry(index) { + Occupied(entry) => Ok(*entry.get()), + Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)), + } + } + + /// Get the `SigRef` reference that should be used to make an indirect call with signature + /// `index`. Also return the number of WebAssembly arguments in the signature. + /// + /// Create the signature if necessary. + pub(crate) fn get_indirect_sig( + &mut self, + func: &mut ir::Function, + index: u32, + environ: &mut FE, + ) -> WasmResult<(ir::SigRef, usize)> { + let index = SignatureIndex::from_u32(index); + match self.signatures.entry(index) { + Occupied(entry) => Ok(*entry.get()), + Vacant(entry) => { + let sig = environ.make_indirect_sig(func, index)?; + Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig])))) + } + } + } + + /// Get the `FuncRef` reference that should be used to make a direct call to function + /// `index`. Also return the number of WebAssembly arguments in the signature. + /// + /// Create the function reference if necessary. + pub(crate) fn get_direct_func( + &mut self, + func: &mut ir::Function, + index: u32, + environ: &mut FE, + ) -> WasmResult<(ir::FuncRef, usize)> { + let index = FuncIndex::from_u32(index); + match self.functions.entry(index) { + Occupied(entry) => Ok(*entry.get()), + Vacant(entry) => { + let fref = environ.make_direct_func(func, index)?; + let sig = func.dfg.ext_funcs[fref].signature; + Ok(*entry.insert(( + fref, + num_wasm_parameters(environ, &func.dfg.signatures[sig]), + ))) + } + } + } +} + +fn num_wasm_parameters( + environ: &FE, + signature: &ir::Signature, +) -> usize { + (0..signature.params.len()) + .filter(|index| environ.is_wasm_parameter(signature, *index)) + .count() +} diff --git a/cranelift/wasm/src/state/mod.rs b/cranelift/wasm/src/state/mod.rs new file mode 100644 index 0000000000..730dc8beb5 --- /dev/null +++ b/cranelift/wasm/src/state/mod.rs @@ -0,0 +1,14 @@ +//! WebAssembly module and function translation state. +//! +//! The `ModuleTranslationState` struct defined in this module is used to keep track of data about +//! the whole WebAssembly module, such as the decoded type signatures. +//! +//! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly +//! value and control stacks during the translation of a single function. + +pub(crate) mod func_state; +pub(crate) mod module_state; + +// Re-export for convenience. +pub(crate) use func_state::*; +pub(crate) use module_state::*; diff --git a/cranelift/wasm/src/state/module_state.rs b/cranelift/wasm/src/state/module_state.rs new file mode 100644 index 0000000000..e997305858 --- /dev/null +++ b/cranelift/wasm/src/state/module_state.rs @@ -0,0 +1,31 @@ +use crate::translation_utils::SignatureIndex; +use cranelift_entity::PrimaryMap; +use std::boxed::Box; + +/// Map of signatures to a function's parameter and return types. +pub(crate) type WasmTypes = + PrimaryMap, Box<[wasmparser::Type]>)>; + +/// Contains information decoded from the Wasm module that must be referenced +/// during each Wasm function's translation. +/// +/// This is only for data that is maintained by `cranelift-wasm` itself, as +/// opposed to being maintained by the embedder. Data that is maintained by the +/// embedder is represented with `ModuleEnvironment`. +#[derive(Debug)] +pub struct ModuleTranslationState { + /// A map containing a Wasm module's original, raw signatures. + /// + /// This is used for translating multi-value Wasm blocks inside functions, + /// which are encoded to refer to their type signature via index. + pub(crate) wasm_types: WasmTypes, +} + +impl ModuleTranslationState { + /// Creates a new empty ModuleTranslationState. + pub fn new() -> Self { + Self { + wasm_types: PrimaryMap::new(), + } + } +} diff --git a/cranelift/wasm/src/translation_utils.rs b/cranelift/wasm/src/translation_utils.rs new file mode 100644 index 0000000000..8a19761a88 --- /dev/null +++ b/cranelift/wasm/src/translation_utils.rs @@ -0,0 +1,252 @@ +//! Helper functions and structures for the translation. +use crate::environ::{TargetEnvironment, WasmResult}; +use crate::state::ModuleTranslationState; +use crate::wasm_unsupported; +use core::u32; +use cranelift_codegen::entity::entity_impl; +use cranelift_codegen::ir; +use cranelift_codegen::ir::immediates::V128Imm; +use cranelift_frontend::FunctionBuilder; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; +use wasmparser; + +/// Index type of a function (imported or defined) inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct FuncIndex(u32); +entity_impl!(FuncIndex); + +/// Index type of a defined function inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct DefinedFuncIndex(u32); +entity_impl!(DefinedFuncIndex); + +/// Index type of a defined table inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct DefinedTableIndex(u32); +entity_impl!(DefinedTableIndex); + +/// Index type of a defined memory inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct DefinedMemoryIndex(u32); +entity_impl!(DefinedMemoryIndex); + +/// Index type of a defined global inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct DefinedGlobalIndex(u32); +entity_impl!(DefinedGlobalIndex); + +/// Index type of a table (imported or defined) inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct TableIndex(u32); +entity_impl!(TableIndex); + +/// Index type of a global variable (imported or defined) inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct GlobalIndex(u32); +entity_impl!(GlobalIndex); + +/// Index type of a linear memory (imported or defined) inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct MemoryIndex(u32); +entity_impl!(MemoryIndex); + +/// Index type of a signature (imported or defined) inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct SignatureIndex(u32); +entity_impl!(SignatureIndex); + +/// Index type of a passive data segment inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct PassiveDataIndex(u32); +entity_impl!(PassiveDataIndex); + +/// Index type of a passive element segment inside the WebAssembly module. +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub struct PassiveElemIndex(u32); +entity_impl!(PassiveElemIndex); + +/// WebAssembly global. +#[derive(Debug, Clone, Copy, Hash)] +pub struct Global { + /// The type of the value stored in the global. + pub ty: ir::Type, + /// A flag indicating whether the value may change at runtime. + pub mutability: bool, + /// The source of the initial value. + pub initializer: GlobalInit, +} + +/// Globals are initialized via the `const` operators or by referring to another import. +#[derive(Debug, Clone, Copy, Hash)] +pub enum GlobalInit { + /// An `i32.const`. + I32Const(i32), + /// An `i64.const`. + I64Const(i64), + /// An `f32.const`. + F32Const(u32), + /// An `f64.const`. + F64Const(u64), + /// A `vconst`. + V128Const(V128Imm), + /// A `global.get` of another global. + GetGlobal(GlobalIndex), + /// A `ref.null`. + RefNullConst, + /// A `ref.func `. + RefFunc(FuncIndex), + ///< The global is imported from, and thus initialized by, a different module. + Import, +} + +/// WebAssembly table. +#[derive(Debug, Clone, Copy, Hash)] +pub struct Table { + /// The type of data stored in elements of the table. + pub ty: TableElementType, + /// The minimum number of elements in the table. + pub minimum: u32, + /// The maximum number of elements in the table. + pub maximum: Option, +} + +/// WebAssembly table element. Can be a function or a scalar type. +#[derive(Debug, Clone, Copy, Hash)] +pub enum TableElementType { + /// A scalar type. + Val(ir::Type), + /// A function. + Func, +} + +/// WebAssembly linear memory. +#[derive(Debug, Clone, Copy, Hash)] +pub struct Memory { + /// The minimum number of pages in the memory. + pub minimum: u32, + /// The maximum number of pages in the memory. + pub maximum: Option, + /// Whether the memory may be shared between multiple threads. + pub shared: bool, +} + +/// Helper function translating wasmparser types to Cranelift types when possible. +pub fn type_to_type( + ty: wasmparser::Type, + environ: &PE, +) -> WasmResult { + match ty { + wasmparser::Type::I32 => Ok(ir::types::I32), + wasmparser::Type::I64 => Ok(ir::types::I64), + wasmparser::Type::F32 => Ok(ir::types::F32), + wasmparser::Type::F64 => Ok(ir::types::F64), + wasmparser::Type::V128 => Ok(ir::types::I8X16), + wasmparser::Type::AnyRef | wasmparser::Type::AnyFunc | wasmparser::Type::NullRef => { + Ok(environ.reference_type()) + } + ty => Err(wasm_unsupported!("type_to_type: wasm type {:?}", ty)), + } +} + +/// Helper function translating wasmparser possible table types to Cranelift types when possible, +/// or None for Func tables. +pub fn tabletype_to_type( + ty: wasmparser::Type, + environ: &PE, +) -> WasmResult> { + match ty { + wasmparser::Type::I32 => Ok(Some(ir::types::I32)), + wasmparser::Type::I64 => Ok(Some(ir::types::I64)), + wasmparser::Type::F32 => Ok(Some(ir::types::F32)), + wasmparser::Type::F64 => Ok(Some(ir::types::F64)), + wasmparser::Type::V128 => Ok(Some(ir::types::I8X16)), + wasmparser::Type::AnyRef => Ok(Some(environ.reference_type())), + wasmparser::Type::AnyFunc => Ok(None), + ty => Err(wasm_unsupported!( + "tabletype_to_type: table wasm type {:?}", + ty + )), + } +} + +/// Get the parameter and result types for the given Wasm blocktype. +pub fn blocktype_params_results( + module_translation_state: &ModuleTranslationState, + ty_or_ft: wasmparser::TypeOrFuncType, +) -> WasmResult<(&[wasmparser::Type], &[wasmparser::Type])> { + Ok(match ty_or_ft { + wasmparser::TypeOrFuncType::Type(ty) => match ty { + wasmparser::Type::I32 => (&[], &[wasmparser::Type::I32]), + wasmparser::Type::I64 => (&[], &[wasmparser::Type::I64]), + wasmparser::Type::F32 => (&[], &[wasmparser::Type::F32]), + wasmparser::Type::F64 => (&[], &[wasmparser::Type::F64]), + wasmparser::Type::V128 => (&[], &[wasmparser::Type::V128]), + wasmparser::Type::AnyRef => (&[], &[wasmparser::Type::AnyRef]), + wasmparser::Type::AnyFunc => (&[], &[wasmparser::Type::AnyFunc]), + wasmparser::Type::NullRef => (&[], &[wasmparser::Type::NullRef]), + wasmparser::Type::EmptyBlockType => (&[], &[]), + ty => return Err(wasm_unsupported!("blocktype_params_results: type {:?}", ty)), + }, + wasmparser::TypeOrFuncType::FuncType(ty_index) => { + let sig_idx = SignatureIndex::from_u32(ty_index); + let (ref params, ref returns) = module_translation_state.wasm_types[sig_idx]; + (&*params, &*returns) + } + }) +} + +/// Create a `Block` with the given Wasm parameters. +pub fn block_with_params( + builder: &mut FunctionBuilder, + params: &[wasmparser::Type], + environ: &PE, +) -> WasmResult { + let block = builder.create_block(); + for ty in params.iter() { + match ty { + wasmparser::Type::I32 => { + builder.append_block_param(block, ir::types::I32); + } + wasmparser::Type::I64 => { + builder.append_block_param(block, ir::types::I64); + } + wasmparser::Type::F32 => { + builder.append_block_param(block, ir::types::F32); + } + wasmparser::Type::F64 => { + builder.append_block_param(block, ir::types::F64); + } + wasmparser::Type::AnyRef | wasmparser::Type::AnyFunc | wasmparser::Type::NullRef => { + builder.append_block_param(block, environ.reference_type()); + } + wasmparser::Type::V128 => { + builder.append_block_param(block, ir::types::I8X16); + } + ty => { + return Err(wasm_unsupported!( + "block_with_params: type {:?} in multi-value block's signature", + ty + )) + } + } + } + Ok(block) +} + +/// Turns a `wasmparser` `f32` into a `Cranelift` one. +pub fn f32_translation(x: wasmparser::Ieee32) -> ir::immediates::Ieee32 { + ir::immediates::Ieee32::with_bits(x.bits()) +} + +/// Turns a `wasmparser` `f64` into a `Cranelift` one. +pub fn f64_translation(x: wasmparser::Ieee64) -> ir::immediates::Ieee64 { + ir::immediates::Ieee64::with_bits(x.bits()) +} + +/// Special VMContext value label. It is tracked as 0xffff_fffe label. +pub fn get_vmctx_value_label() -> ir::ValueLabel { + const VMCTX_LABEL: u32 = 0xffff_fffe; + ir::ValueLabel::from_u32(VMCTX_LABEL) +} diff --git a/cranelift/wasm/tests/wasm_testsuite.rs b/cranelift/wasm/tests/wasm_testsuite.rs new file mode 100644 index 0000000000..e371a84882 --- /dev/null +++ b/cranelift/wasm/tests/wasm_testsuite.rs @@ -0,0 +1,104 @@ +use cranelift_codegen::isa; +use cranelift_codegen::print_errors::pretty_verifier_error; +use cranelift_codegen::settings::{self, Flags}; +use cranelift_codegen::verifier; +use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex, ReturnMode}; +use std::fs; +use std::fs::File; +use std::io; +use std::io::prelude::*; +use std::path::Path; +use std::str::FromStr; +use target_lexicon::triple; + +#[test] +fn testsuite() { + let mut paths: Vec<_> = fs::read_dir("../wasmtests") + .unwrap() + .map(|r| r.unwrap()) + .filter(|p| { + // Ignore files starting with `.`, which could be editor temporary files + if let Some(stem) = p.path().file_stem() { + if let Some(stemstr) = stem.to_str() { + return !stemstr.starts_with('.'); + } + } + false + }) + .collect(); + paths.sort_by_key(|dir| dir.path()); + let flags = Flags::new(settings::builder()); + for path in paths { + let path = path.path(); + println!("=== {} ===", path.display()); + let data = read_module(&path); + handle_module(data, &flags, ReturnMode::NormalReturns); + } +} + +#[test] +fn use_fallthrough_return() { + let flags = Flags::new(settings::builder()); + let path = Path::new("../wasmtests/use_fallthrough_return.wat"); + let data = read_module(&path); + handle_module(data, &flags, ReturnMode::FallthroughReturn); +} + +#[test] +fn use_name_section() { + let data = wat::parse_str( + r#" + (module $module_name + (func $func_name (local $loc_name i32) + ) + )"#, + ) + .unwrap(); + + let flags = Flags::new(settings::builder()); + let triple = triple!("riscv64"); + let isa = isa::lookup(triple).unwrap().finish(flags.clone()); + let return_mode = ReturnMode::NormalReturns; + let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + + translate_module(data.as_ref(), &mut dummy_environ).unwrap(); + + assert_eq!( + dummy_environ.get_func_name(FuncIndex::from_u32(0)).unwrap(), + "func_name" + ); +} + +fn read_file(path: &Path) -> io::Result> { + let mut buf: Vec = Vec::new(); + let mut file = File::open(path)?; + file.read_to_end(&mut buf)?; + Ok(buf) +} + +fn read_module(path: &Path) -> Vec { + match path.extension() { + None => { + panic!("the file extension is not wasm or wat"); + } + Some(ext) => match ext.to_str() { + Some("wasm") => read_file(path).expect("error reading wasm file"), + Some("wat") => wat::parse_file(path).expect("failed to parse wat"), + None | Some(&_) => panic!("the file extension for {:?} is not wasm or wat", path), + }, + } +} + +fn handle_module(data: Vec, flags: &Flags, return_mode: ReturnMode) { + let triple = triple!("riscv64"); + let isa = isa::lookup(triple).unwrap().finish(flags.clone()); + let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + + translate_module(&data, &mut dummy_environ).unwrap(); + + for func in dummy_environ.info.function_bodies.values() { + verifier::verify_function(func, &*isa) + .map_err(|errors| panic!(pretty_verifier_error(func, Some(&*isa), None, errors))) + .unwrap(); + } +} diff --git a/cranelift/wasmtests/arith.wat b/cranelift/wasmtests/arith.wat new file mode 100644 index 0000000000..fa7115696b --- /dev/null +++ b/cranelift/wasmtests/arith.wat @@ -0,0 +1,13 @@ +(module + (memory 1) + (func $main (local i32) + (set_local 0 (i32.sub (i32.const 4) (i32.const 4))) + (if + (get_local 0) + (then unreachable) + (else (drop (i32.mul (i32.const 6) (get_local 0)))) + ) + ) + (start $main) + (data (i32.const 0) "abcdefgh") +) diff --git a/cranelift/wasmtests/br_table.wat b/cranelift/wasmtests/br_table.wat new file mode 100644 index 0000000000..75444fa49c --- /dev/null +++ b/cranelift/wasmtests/br_table.wat @@ -0,0 +1,30 @@ +(module + (func (result i32) + (block (result i32) + (block (result i32) + (block (result i32) + (br_table 0 1 2 3 (i32.const 42) (i32.const 0)) + ) + ) + ) + ) + (func (result i32) + (block (result i32) + (block (result i32) + (block (result i32) + (br_table 3 2 1 0 (i32.const 42) (i32.const 0)) + ) + ) + ) + ) + (func (result i32) + (block (result i32) + (br_table 0 0 1 1 (i32.const 42) (i32.const 0)) + ) + ) + (func (result i32) + (block (result i32) + (br_table 1 1 0 0 (i32.const 42) (i32.const 0)) + ) + ) +) diff --git a/cranelift/wasmtests/call-simd.wat b/cranelift/wasmtests/call-simd.wat new file mode 100644 index 0000000000..61834d86bd --- /dev/null +++ b/cranelift/wasmtests/call-simd.wat @@ -0,0 +1,14 @@ +(module + (func $main + (v128.const i32x4 1 2 3 4) + (v128.const i32x4 1 2 3 4) + (call $add) + drop + ) + (func $add (param $a v128) (param $b v128) (result v128) + (local.get $a) + (local.get $b) + (i32x4.add) + ) + (start $main) +) diff --git a/cranelift/wasmtests/call.wat b/cranelift/wasmtests/call.wat new file mode 100644 index 0000000000..e8640d2342 --- /dev/null +++ b/cranelift/wasmtests/call.wat @@ -0,0 +1,10 @@ +(module + (func $main (local i32) + (set_local 0 (i32.const 0)) + (drop (call $inc)) + ) + (func $inc (result i32) + (i32.const 1) + ) + (start $main) +) diff --git a/cranelift/wasmtests/embenchen_fannkuch.wat b/cranelift/wasmtests/embenchen_fannkuch.wat new file mode 100644 index 0000000000..c61b5e6e2f --- /dev/null +++ b/cranelift/wasmtests/embenchen_fannkuch.wat @@ -0,0 +1,16725 @@ +(module + (type $0 (func (param i32 i32 i32) (result i32))) + (type $1 (func (param i32) (result i32))) + (type $2 (func (param i32))) + (type $3 (func (result i32))) + (type $4 (func (param i32 i32) (result i32))) + (type $5 (func (param i32 i32))) + (type $6 (func)) + (type $7 (func (param i32 i32 i32 i32 i32) (result i32))) + (type $8 (func (param i32 i32 i32))) + (type $9 (func (param i64 i32) (result i32))) + (type $10 (func (param i32 i32 i32 i32 i32))) + (type $11 (func (param f64 i32) (result f64))) + (type $12 (func (param i32 i32 i32 i32) (result i32))) + (import "env" "memory" (memory $16 2048 2048)) + (data (i32.const 1024) "\04\04\00\00\05") + (data (i32.const 1040) "\01") + (data (i32.const 1064) "\01\00\00\00\02\00\00\00<\10\00\00\00\04") + (data (i32.const 1088) "\01") + (data (i32.const 1103) "\n\ff\ff\ff\ff") + (data (i32.const 1140) "error: %d\n\00Pfannkuchen(%d) = %d.\n\00%d\00\11\00\n\00\11\11\11\00\00\00\00\05\00\00\00\00\00\00\t\00\00\00\00\0b") + (data (i32.const 1209) "\11\00\0f\n\11\11\11\03\n\07\00\01\13\t\0b\0b\00\00\t\06\0b\00\00\0b\00\06\11\00\00\00\11\11\11") + (data (i32.const 1258) "\0b") + (data (i32.const 1267) "\11\00\n\n\11\11\11\00\n\00\00\02\00\t\0b\00\00\00\t\00\0b\00\00\0b") + (data (i32.const 1316) "\0c") + (data (i32.const 1328) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c") + (data (i32.const 1374) "\0e") + (data (i32.const 1386) "\0d\00\00\00\04\0d\00\00\00\00\t\0e\00\00\00\00\00\0e\00\00\0e") + (data (i32.const 1432) "\10") + (data (i32.const 1444) "\0f\00\00\00\00\0f\00\00\00\00\t\10\00\00\00\00\00\10\00\00\10\00\00\12\00\00\00\12\12\12") + (data (i32.const 1499) "\12\00\00\00\12\12\12\00\00\00\00\00\00\t") + (data (i32.const 1548) "\0b") + (data (i32.const 1560) "\n\00\00\00\00\n\00\00\00\00\t\0b\00\00\00\00\00\0b\00\00\0b") + (data (i32.const 1606) "\0c") + (data (i32.const 1618) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c\00\000123456789ABCDEF-+ 0X0x\00(null)\00-0X+0X 0X-0x+0x 0x\00inf\00INF\00nan\00NAN\00.\00T!\"\19\0d\01\02\03\11K\1c\0c\10\04\0b\1d\12\1e\'hnopqb \05\06\0f\13\14\15\1a\08\16\07($\17\18\t\n\0e\1b\1f%#\83\82}&*+<=>?CGJMXYZ[\\]^_`acdefgijklrstyz{|\00Illegal byte sequence\00Domain error\00Result not representable\00Not a tty\00Permission denied\00Operation not permitted\00No such file or directory\00No such process\00File exists\00Value too large for data type\00No space left on device\00Out of memory\00Resource busy\00Interrupted system call\00Resource temporarily unavailable\00Invalid seek\00Cross-device link\00Read-only file system\00Directory not empty\00Connection reset by peer\00Operation timed out\00Connection refused\00Host is down\00Host is unreachable\00Address in use\00Broken pipe\00I/O error\00No such device or address\00Block device required\00No such device\00Not a directory\00Is a directory\00Text file busy\00Exec format error\00Invalid argument\00Argument list too long\00Symbolic link loop\00Filename too long\00Too many open files in system\00No file descriptors available\00Bad file descriptor\00No child process\00Bad address\00File too large\00Too many links\00No locks available\00Resource deadlock would occur\00State not recoverable\00Previous owner died\00Operation canceled\00Function not implemented\00No message of desired type\00Identifier removed\00Device not a stream\00No data available\00Device timeout\00Out of streams resources\00Link has been severed\00Protocol error\00Bad message\00File descriptor in bad state\00Not a socket\00Destination address required\00Message too large\00Protocol wrong type for socket\00Protocol not available\00Protocol not supported\00Socket type not supported\00Not supported\00Protocol family not supported\00Address family not supported by protocol\00Address not available\00Network is down\00Network unreachable\00Connection reset by network\00Connection aborted\00No buffer space available\00Socket is connected\00Socket not connected\00Cannot send after socket shutdown\00Operation already in progress\00Operation in progress\00Stale file handle\00Remote I/O error\00Quota exceeded\00No medium found\00Wrong medium type\00No error information") + (import "env" "table" (table $timport$17 8 8 funcref)) + (elem (global.get $gimport$19) $45 $9 $46 $14 $10 $15 $47 $16) + (import "env" "DYNAMICTOP_PTR" (global $gimport$0 i32)) + (import "env" "STACKTOP" (global $gimport$1 i32)) + (import "env" "STACK_MAX" (global $gimport$2 i32)) + (import "env" "memoryBase" (global $gimport$18 i32)) + (import "env" "tableBase" (global $gimport$19 i32)) + (import "env" "abort" (func $fimport$3 (param i32))) + (import "env" "enlargeMemory" (func $fimport$4 (result i32))) + (import "env" "getTotalMemory" (func $fimport$5 (result i32))) + (import "env" "abortOnCannotGrowMemory" (func $fimport$6 (result i32))) + (import "env" "_pthread_cleanup_pop" (func $fimport$7 (param i32))) + (import "env" "___syscall6" (func $fimport$8 (param i32 i32) (result i32))) + (import "env" "_pthread_cleanup_push" (func $fimport$9 (param i32 i32))) + (import "env" "_abort" (func $fimport$10)) + (import "env" "___setErrNo" (func $fimport$11 (param i32))) + (import "env" "_emscripten_memcpy_big" (func $fimport$12 (param i32 i32 i32) (result i32))) + (import "env" "___syscall54" (func $fimport$13 (param i32 i32) (result i32))) + (import "env" "___syscall140" (func $fimport$14 (param i32 i32) (result i32))) + (import "env" "___syscall146" (func $fimport$15 (param i32 i32) (result i32))) + (global $global$0 (mut i32) (global.get $gimport$0)) + (global $global$1 (mut i32) (global.get $gimport$1)) + (global $global$2 (mut i32) (global.get $gimport$2)) + (global $global$3 (mut i32) (i32.const 0)) + (global $global$4 (mut i32) (i32.const 0)) + (global $global$5 (mut i32) (i32.const 0)) + (export "_sbrk" (func $38)) + (export "_free" (func $36)) + (export "_main" (func $8)) + (export "_pthread_self" (func $41)) + (export "_memset" (func $39)) + (export "_malloc" (func $35)) + (export "_memcpy" (func $40)) + (export "___errno_location" (func $12)) + (export "runPostSets" (func $37)) + (export "stackAlloc" (func $0)) + (export "stackSave" (func $1)) + (export "stackRestore" (func $2)) + (export "establishStackSpace" (func $3)) + (export "setThrew" (func $4)) + (export "setTempRet0" (func $5)) + (export "getTempRet0" (func $6)) + (export "dynCall_ii" (func $42)) + (export "dynCall_iiii" (func $43)) + (export "dynCall_vi" (func $44)) + (func $0 (; 13 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (local.get $0) + ) + ) + (global.set $global$1 + (i32.and + (i32.add + (global.get $global$1) + (i32.const 15) + ) + (i32.const -16) + ) + ) + (local.get $1) + ) + ) + (func $1 (; 14 ;) (type $3) (result i32) + (global.get $global$1) + ) + (func $2 (; 15 ;) (type $2) (param $0 i32) + (global.set $global$1 + (local.get $0) + ) + ) + (func $3 (; 16 ;) (type $5) (param $0 i32) (param $1 i32) + (block $label$1 + (global.set $global$1 + (local.get $0) + ) + (global.set $global$2 + (local.get $1) + ) + ) + ) + (func $4 (; 17 ;) (type $5) (param $0 i32) (param $1 i32) + (if + (i32.eqz + (global.get $global$3) + ) + (block + (global.set $global$3 + (local.get $0) + ) + (global.set $global$4 + (local.get $1) + ) + ) + ) + ) + (func $5 (; 18 ;) (type $2) (param $0 i32) + (global.set $global$5 + (local.get $0) + ) + ) + (func $6 (; 19 ;) (type $3) (result i32) + (global.get $global$5) + ) + (func $7 (; 20 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (block $label$1 (result i32) + (local.set $3 + (call $35 + (local.tee $15 + (i32.shl + (local.tee $4 + (i32.load offset=4 + (local.get $0) + ) + ) + (i32.const 2) + ) + ) + ) + ) + (local.set $6 + (call $35 + (local.get $15) + ) + ) + (local.set $10 + (call $35 + (local.get $15) + ) + ) + (if + (local.tee $2 + (i32.gt_s + (local.get $4) + (i32.const 0) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (loop $label$3 + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + (local.get $1) + ) + (br_if $label$3 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $4) + ) + ) + ) + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.tee $0 + (i32.load + (local.get $0) + ) + ) + (i32.const 2) + ) + ) + (local.tee $11 + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + ) + (i32.store + (local.tee $14 + (i32.add + (local.get $3) + (i32.shl + (local.get $11) + (i32.const 2) + ) + ) + ) + (local.get $0) + ) + (if + (local.get $2) + (block + (local.set $0 + (i32.const 0) + ) + (local.set $1 + (local.get $4) + ) + (loop $label$5 + (block $label$6 + (if + (i32.gt_s + (local.get $1) + (i32.const 1) + ) + (loop $label$8 + (i32.store + (i32.add + (local.get $10) + (i32.shl + (local.tee $2 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.const 2) + ) + ) + (local.get $1) + ) + (if + (i32.gt_s + (local.get $2) + (i32.const 1) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$8) + ) + (local.set $2 + (i32.const 1) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + ) + (if + (local.tee $7 + (i32.load + (local.get $3) + ) + ) + (if + (i32.ne + (i32.load + (local.get $14) + ) + (local.get $11) + ) + (block + (drop + (call $40 + (local.get $6) + (local.get $3) + (local.get $15) + ) + ) + (local.set $8 + (i32.const 0) + ) + (local.set $9 + (i32.load + (local.get $6) + ) + ) + (loop $label$14 + (if + (i32.gt_s + (local.tee $1 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.const 1) + ) + (block + (local.set $5 + (i32.const 1) + ) + (loop $label$16 + (local.set $17 + (i32.load + (local.tee $12 + (i32.add + (local.get $6) + (i32.shl + (local.get $5) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $12) + (i32.load + (local.tee $12 + (i32.add + (local.get $6) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $12) + (local.get $17) + ) + (br_if $label$16 + (i32.lt_s + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $8) + (i32.const 1) + ) + ) + (local.set $1 + (i32.load + (local.tee $12 + (i32.add + (local.get $6) + (i32.shl + (local.get $9) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $12) + (local.get $9) + ) + (if + (local.get $1) + (block + (local.set $8 + (local.get $5) + ) + (local.set $9 + (local.get $1) + ) + (br $label$14) + ) + ) + ) + (if + (i32.le_s + (local.get $0) + (local.get $8) + ) + (local.set $0 + (local.get $5) + ) + ) + ) + ) + ) + (if + (i32.lt_s + (local.get $2) + (local.get $11) + ) + (local.set $1 + (local.get $2) + ) + (block + (local.set $1 + (i32.const 31) + ) + (br $label$6) + ) + ) + (loop $label$21 + (if + (i32.gt_s + (local.get $1) + (i32.const 0) + ) + (block + (local.set $2 + (i32.const 0) + ) + (loop $label$23 + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + (i32.load + (i32.add + (local.get $3) + (i32.shl + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 2) + ) + ) + ) + ) + (br_if $label$23 + (i32.lt_s + (local.get $2) + (local.get $1) + ) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (local.set $2 + (i32.const 0) + ) + ) + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + (local.get $7) + ) + (local.set $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $10) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $5) + (i32.const -1) + ) + ) + (br_if $label$5 + (i32.gt_s + (local.get $5) + (i32.const 1) + ) + ) + (if + (i32.lt_s + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $11) + ) + (block + (local.set $7 + (i32.load + (local.get $3) + ) + ) + (br $label$21) + ) + (block + (local.set $1 + (i32.const 31) + ) + (br $label$6) + ) + ) + ) + ) + ) + (if + (i32.eq + (local.get $1) + (i32.const 31) + ) + (block + (call $36 + (local.get $3) + ) + (call $36 + (local.get $6) + ) + (call $36 + (local.get $10) + ) + (return + (local.get $0) + ) + ) + ) + ) + (block + (local.set $16 + (local.get $14) + ) + (local.set $13 + (local.get $11) + ) + ) + ) + ) + (block + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.tee $0 + (i32.load + (local.get $0) + ) + ) + (i32.const 2) + ) + ) + (local.tee $13 + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + ) + (i32.store + (local.tee $16 + (i32.add + (local.get $3) + (i32.shl + (local.get $13) + (i32.const 2) + ) + ) + ) + (local.get $0) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + (local.set $1 + (local.get $4) + ) + (loop $label$30 + (block $label$31 + (if + (i32.gt_s + (local.get $1) + (i32.const 1) + ) + (loop $label$33 + (i32.store + (i32.add + (local.get $10) + (i32.shl + (local.tee $2 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.const 2) + ) + ) + (local.get $1) + ) + (if + (i32.gt_s + (local.get $2) + (i32.const 1) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$33) + ) + (local.set $2 + (i32.const 1) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + ) + (if + (local.tee $9 + (i32.load + (local.get $3) + ) + ) + (if + (i32.ne + (i32.load + (local.get $16) + ) + (local.get $13) + ) + (block + (local.set $5 + (i32.const 0) + ) + (local.set $8 + (i32.load + (local.get $6) + ) + ) + (loop $label$39 + (if + (i32.gt_s + (local.tee $1 + (i32.add + (local.get $8) + (i32.const -1) + ) + ) + (i32.const 1) + ) + (block + (local.set $4 + (i32.const 1) + ) + (loop $label$41 + (local.set $14 + (i32.load + (local.tee $7 + (i32.add + (local.get $6) + (i32.shl + (local.get $4) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $7) + (i32.load + (local.tee $7 + (i32.add + (local.get $6) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $7) + (local.get $14) + ) + (br_if $label$41 + (i32.lt_s + (local.tee $4 + (i32.add + (local.get $4) + (i32.const 1) + ) + ) + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + ) + ) + ) + (local.set $4 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (local.set $1 + (i32.load + (local.tee $7 + (i32.add + (local.get $6) + (i32.shl + (local.get $8) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $7) + (local.get $8) + ) + (if + (local.get $1) + (block + (local.set $5 + (local.get $4) + ) + (local.set $8 + (local.get $1) + ) + (br $label$39) + ) + ) + ) + (if + (i32.le_s + (local.get $0) + (local.get $5) + ) + (local.set $0 + (local.get $4) + ) + ) + ) + ) + ) + (if + (i32.lt_s + (local.get $2) + (local.get $13) + ) + (local.set $1 + (local.get $2) + ) + (block + (local.set $1 + (i32.const 31) + ) + (br $label$31) + ) + ) + (loop $label$46 + (if + (i32.gt_s + (local.get $1) + (i32.const 0) + ) + (block + (local.set $2 + (i32.const 0) + ) + (loop $label$48 + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + (i32.load + (i32.add + (local.get $3) + (i32.shl + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 2) + ) + ) + ) + ) + (br_if $label$48 + (i32.lt_s + (local.get $2) + (local.get $1) + ) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (local.set $2 + (i32.const 0) + ) + ) + (i32.store + (i32.add + (local.get $3) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + (local.get $9) + ) + (local.set $4 + (i32.load + (local.tee $2 + (i32.add + (local.get $10) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + (br_if $label$30 + (i32.gt_s + (local.get $4) + (i32.const 1) + ) + ) + (if + (i32.lt_s + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $13) + ) + (block + (local.set $9 + (i32.load + (local.get $3) + ) + ) + (br $label$46) + ) + (block + (local.set $1 + (i32.const 31) + ) + (br $label$31) + ) + ) + ) + ) + ) + (if + (i32.eq + (local.get $1) + (i32.const 31) + ) + (block + (call $36 + (local.get $3) + ) + (call $36 + (local.get $6) + ) + (call $36 + (local.get $10) + ) + (return + (local.get $0) + ) + ) + ) + (i32.const 0) + ) + ) + (func $8 (; 21 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (block $label$1 (result i32) + (local.set $5 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 32) + ) + ) + (local.set $7 + (i32.add + (local.get $5) + (i32.const 16) + ) + ) + (local.set $10 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + (local.set $2 + (local.get $5) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.le_s + (local.get $0) + (i32.const 1) + ) + ) + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (br_table $label$5 $label$10 $label$8 $label$9 $label$7 $label$6 $label$4 + (i32.sub + (local.tee $0 + (i32.load8_s + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (i32.const 48) + ) + ) + ) + (local.set $3 + (i32.const 9) + ) + (br $label$2) + ) + (br $label$3) + ) + (local.set $3 + (i32.const 10) + ) + (br $label$2) + ) + (local.set $3 + (i32.const 11) + ) + (br $label$2) + ) + (local.set $3 + (i32.const 12) + ) + (br $label$2) + ) + (global.set $global$1 + (local.get $5) + ) + (return + (i32.const 0) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $0) + (i32.const -48) + ) + ) + (drop + (call $33 + (i32.const 1140) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $5) + ) + (return + (i32.const -1) + ) + ) + (local.set $3 + (i32.const 11) + ) + ) + (local.set $6 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + (local.set $2 + (i32.const 0) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$11 + (i32.store + (local.tee $1 + (call $35 + (i32.const 12) + ) + ) + (local.get $0) + ) + (i32.store offset=4 + (local.get $1) + (local.get $3) + ) + (i32.store offset=8 + (local.get $1) + (local.get $2) + ) + (if + (i32.ne + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.get $6) + ) + (block + (local.set $2 + (local.get $1) + ) + (br $label$11) + ) + ) + ) + (local.set $4 + (call $35 + (local.tee $0 + (i32.shl + (local.get $3) + (i32.const 2) + ) + ) + ) + ) + (local.set $8 + (call $35 + (local.get $0) + ) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$13 + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + (local.get $0) + ) + (br_if $label$13 + (i32.ne + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.get $3) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + (local.set $6 + (i32.const 30) + ) + (loop $label$14 + (block $label$15 + (local.set $2 + (i32.const 0) + ) + (loop $label$16 + (i32.store + (local.get $10) + (i32.add + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + ) + (i32.const 1) + ) + ) + (drop + (call $33 + (i32.const 1174) + (local.get $10) + ) + ) + (br_if $label$16 + (i32.ne + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $3) + ) + ) + ) + (drop + (call $34 + (i32.const 10) + ) + ) + (if + (i32.gt_s + (local.get $0) + (i32.const 1) + ) + (loop $label$18 + (i32.store + (i32.add + (local.get $8) + (i32.shl + (local.tee $2 + (i32.add + (local.get $0) + (i32.const -1) + ) + ) + (i32.const 2) + ) + ) + (local.get $0) + ) + (if + (i32.gt_s + (local.get $2) + (i32.const 1) + ) + (block + (local.set $0 + (local.get $2) + ) + (br $label$18) + ) + (local.set $0 + (i32.const 1) + ) + ) + ) + (br_if $label$15 + (i32.eq + (local.get $0) + (local.get $3) + ) + ) + ) + (local.set $6 + (i32.add + (local.get $6) + (i32.const -1) + ) + ) + (loop $label$22 + (block $label$23 + (local.set $9 + (i32.load + (local.get $4) + ) + ) + (if + (i32.gt_s + (local.get $0) + (i32.const 0) + ) + (block + (local.set $2 + (i32.const 0) + ) + (loop $label$25 + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 2) + ) + ) + ) + ) + (br_if $label$25 + (i32.lt_s + (local.get $2) + (local.get $0) + ) + ) + (local.set $2 + (local.get $0) + ) + ) + ) + (local.set $2 + (i32.const 0) + ) + ) + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $2) + (i32.const 2) + ) + ) + (local.get $9) + ) + (local.set $2 + (i32.load + (local.tee $9 + (i32.add + (local.get $8) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + ) + ) + (i32.store + (local.get $9) + (i32.add + (local.get $2) + (i32.const -1) + ) + ) + (br_if $label$23 + (i32.gt_s + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$22 + (i32.ne + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.get $3) + ) + ) + (br $label$15) + ) + ) + (br_if $label$14 + (local.get $6) + ) + ) + ) + (call $36 + (local.get $4) + ) + (call $36 + (local.get $8) + ) + (if + (local.get $1) + (block + (local.set $0 + (i32.const 0) + ) + (loop $label$28 + (if + (i32.lt_s + (local.get $0) + (local.tee $2 + (call $7 + (local.get $1) + ) + ) + ) + (local.set $0 + (local.get $2) + ) + ) + (local.set $2 + (i32.load offset=8 + (local.get $1) + ) + ) + (call $36 + (local.get $1) + ) + (if + (local.get $2) + (block + (local.set $1 + (local.get $2) + ) + (br $label$28) + ) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + (i32.store + (local.get $7) + (local.get $3) + ) + (i32.store offset=4 + (local.get $7) + (local.get $0) + ) + (drop + (call $33 + (i32.const 1151) + (local.get $7) + ) + ) + (global.set $global$1 + (local.get $5) + ) + (i32.const 0) + ) + ) + (func $9 (; 22 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $2 + (local.get $1) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (local.set $0 + (call $11 + (call $fimport$8 + (i32.const 6) + (local.get $2) + ) + ) + ) + (global.set $global$1 + (local.get $1) + ) + (local.get $0) + ) + ) + (func $10 (; 23 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 32) + ) + ) + (i32.store + (local.tee $3 + (local.get $4) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + (i32.store offset=16 + (local.get $3) + (local.get $2) + ) + (local.set $0 + (if (result i32) + (i32.lt_s + (call $11 + (call $fimport$14 + (i32.const 140) + (local.get $3) + ) + ) + (i32.const 0) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.const -1) + ) + (i32.const -1) + ) + (i32.load + (local.get $0) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $11 (; 24 ;) (type $1) (param $0 i32) (result i32) + (if (result i32) + (i32.gt_u + (local.get $0) + (i32.const -4096) + ) + (block (result i32) + (i32.store + (call $12) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + (local.get $0) + ) + ) + (func $12 (; 25 ;) (type $3) (result i32) + (i32.const 3648) + ) + (func $13 (; 26 ;) (type $2) (param $0 i32) + (nop) + ) + (func $14 (; 27 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 80) + ) + ) + (local.set $3 + (local.get $4) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + (i32.store offset=36 + (local.get $0) + (i32.const 3) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 64) + ) + ) + (block + (i32.store + (local.get $3) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 21505) + ) + (i32.store offset=8 + (local.get $3) + (local.get $5) + ) + (if + (call $fimport$13 + (i32.const 54) + (local.get $3) + ) + (i32.store8 offset=75 + (local.get $0) + (i32.const -1) + ) + ) + ) + ) + (local.set $0 + (call $15 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $15 (; 28 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $8 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 48) + ) + ) + (local.set $9 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + (local.set $10 + (local.get $8) + ) + (i32.store + (local.tee $3 + (i32.add + (local.get $8) + (i32.const 32) + ) + ) + (local.tee $4 + (i32.load + (local.tee $6 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (local.tee $5 + (i32.sub + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + (local.get $4) + ) + ) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.get $2) + ) + (local.set $13 + (i32.add + (local.get $0) + (i32.const 60) + ) + ) + (local.set $14 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + (local.set $1 + (local.get $3) + ) + (local.set $4 + (i32.const 2) + ) + (local.set $12 + (i32.add + (local.get $5) + (local.get $2) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (if + (i32.load + (i32.const 3604) + ) + (block + (call $fimport$9 + (i32.const 1) + (local.get $0) + ) + (i32.store + (local.get $10) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $10) + (local.get $1) + ) + (i32.store offset=8 + (local.get $10) + (local.get $4) + ) + (local.set $3 + (call $11 + (call $fimport$15 + (i32.const 146) + (local.get $10) + ) + ) + ) + (call $fimport$7 + (i32.const 0) + ) + ) + (block + (i32.store + (local.get $9) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $9) + (local.get $1) + ) + (i32.store offset=8 + (local.get $9) + (local.get $4) + ) + (local.set $3 + (call $11 + (call $fimport$15 + (i32.const 146) + (local.get $9) + ) + ) + ) + ) + ) + (br_if $label$4 + (i32.eq + (local.get $12) + (local.get $3) + ) + ) + (br_if $label$3 + (i32.lt_s + (local.get $3) + (i32.const 0) + ) + ) + (local.set $5 + (if (result i32) + (i32.gt_u + (local.get $3) + (local.tee $5 + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (block (result i32) + (i32.store + (local.get $6) + (local.tee $7 + (i32.load + (local.get $14) + ) + ) + ) + (i32.store + (local.get $11) + (local.get $7) + ) + (local.set $7 + (i32.load offset=12 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + (i32.sub + (local.get $3) + (local.get $5) + ) + ) + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (block (result i32) + (i32.store + (local.get $6) + (i32.add + (i32.load + (local.get $6) + ) + (local.get $3) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $4 + (i32.const 2) + ) + (local.get $3) + ) + (block (result i32) + (local.set $7 + (local.get $5) + ) + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $1) + (i32.add + (i32.load + (local.get $1) + ) + (local.get $5) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.sub + (local.get $7) + (local.get $5) + ) + ) + (local.set $12 + (i32.sub + (local.get $12) + (local.get $3) + ) + ) + (br $label$5) + ) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.tee $1 + (i32.load + (local.get $14) + ) + ) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $1) + ) + (i32.store + (local.get $11) + (local.get $1) + ) + (br $label$2) + ) + (i32.store offset=16 + (local.get $0) + (i32.const 0) + ) + (i32.store + (local.get $6) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (local.set $2 + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (i32.const 0) + (i32.sub + (local.get $2) + (i32.load offset=4 + (local.get $1) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $8) + ) + (local.get $2) + ) + ) + (func $16 (; 29 ;) (type $2) (param $0 i32) + (if + (i32.eqz + (i32.load offset=68 + (local.get $0) + ) + ) + (call $13 + (local.get $0) + ) + ) + ) + (func $17 (; 30 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $5 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (if + (i32.and + (local.tee $4 + (i32.ne + (local.get $2) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 0) + ) + ) + (block + (local.set $4 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (loop $label$6 + (if + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $4) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + ) + (br_if $label$6 + (i32.and + (local.tee $0 + (i32.ne + (local.tee $3 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 3) + ) + (i32.const 0) + ) + ) + ) + (br $label$4) + ) + ) + (block + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (local.set $0 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $0) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 0) + ) + ) + (br $label$2) + ) + (if + (i32.ne + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $3 + (i32.mul + (local.get $5) + (i32.const 16843009) + ) + ) + (block $label$12 + (block $label$13 + (br_if $label$13 + (i32.le_u + (local.get $0) + (i32.const 3) + ) + ) + (loop $label$14 + (if + (i32.eqz + (i32.and + (i32.xor + (i32.and + (local.tee $4 + (i32.xor + (i32.load + (local.get $2) + ) + (local.get $3) + ) + ) + (i32.const -2139062144) + ) + (i32.const -2139062144) + ) + (i32.add + (local.get $4) + (i32.const -16843009) + ) + ) + ) + (block + (local.set $2 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + (br_if $label$14 + (i32.gt_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + (i32.const 3) + ) + ) + (br $label$13) + ) + ) + ) + (br $label$12) + ) + (if + (i32.eqz + (local.get $0) + ) + (block + (local.set $0 + (i32.const 0) + ) + (br $label$2) + ) + ) + ) + (loop $label$17 + (br_if $label$2 + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $1) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$17 + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -1) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + ) + ) + ) + (if (result i32) + (local.get $0) + (local.get $2) + (i32.const 0) + ) + ) + ) + (func $18 (; 31 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 224) + ) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 136) + ) + ) + (i64.store align=4 + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 80) + ) + ) + (i64.const 0) + ) + (i64.store offset=8 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=16 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=24 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=32 align=4 + (local.get $3) + (i64.const 0) + ) + (i32.store + (local.tee $6 + (i32.add + (local.get $4) + (i32.const 120) + ) + ) + (i32.load + (local.get $2) + ) + ) + (if + (i32.lt_s + (call $19 + (i32.const 0) + (local.get $1) + (local.get $6) + (local.tee $2 + (local.get $4) + ) + (local.get $3) + ) + (i32.const 0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $12 + (if (result i32) + (i32.gt_s + (i32.load offset=76 + (local.get $0) + ) + (i32.const -1) + ) + (call $20 + (local.get $0) + ) + (i32.const 0) + ) + ) + (local.set $7 + (i32.load + (local.get $0) + ) + ) + (if + (i32.lt_s + (i32.load8_s offset=74 + (local.get $0) + ) + (i32.const 1) + ) + (i32.store + (local.get $0) + (i32.and + (local.get $7) + (i32.const -33) + ) + ) + ) + (if + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + ) + (local.set $1 + (call $19 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (block + (local.set $10 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + ) + ) + (i32.store + (local.get $9) + (local.get $5) + ) + (i32.store + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + (local.get $5) + ) + (i32.store + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + (local.get $5) + ) + (i32.store + (local.get $8) + (i32.const 80) + ) + (i32.store + (local.tee $14 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.add + (local.get $5) + (i32.const 80) + ) + ) + (local.set $1 + (call $19 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (if + (local.get $10) + (block + (drop + (call_indirect (type $0) + (local.get $0) + (i32.const 0) + (i32.const 0) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $11) + ) + ) + (local.set $1 + (i32.const -1) + ) + ) + (i32.store + (local.get $9) + (local.get $10) + ) + (i32.store + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $14) + (i32.const 0) + ) + (i32.store + (local.get $13) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (local.get $0) + (i32.or + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.and + (local.get $7) + (i32.const 32) + ) + ) + ) + (if + (local.get $12) + (call $13 + (local.get $0) + ) + ) + (if + (i32.and + (local.get $2) + (i32.const 32) + ) + (local.set $1 + (i32.const -1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $1) + ) + ) + (func $19 (; 32 ;) (type $7) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (local $22 i32) + (local $23 i32) + (local $24 i32) + (local $25 i32) + (local $26 i32) + (local $27 i32) + (local $28 i32) + (local $29 i32) + (local $30 i32) + (local $31 i32) + (local $32 i32) + (local $33 i32) + (local $34 i32) + (local $35 i32) + (local $36 i32) + (local $37 i32) + (local $38 i32) + (local $39 i32) + (local $40 i32) + (local $41 i32) + (local $42 i32) + (local $43 i32) + (local $44 i32) + (local $45 i32) + (local $46 i32) + (local $47 i32) + (local $48 i32) + (local $49 i32) + (local $50 i64) + (local $51 i64) + (local $52 f64) + (local $53 f64) + (block $label$1 (result i32) + (local.set $23 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 624) + ) + ) + (local.set $20 + (i32.add + (local.get $23) + (i32.const 16) + ) + ) + (local.set $16 + (local.get $23) + ) + (local.set $36 + (i32.add + (local.get $23) + (i32.const 528) + ) + ) + (local.set $30 + (i32.ne + (local.get $0) + (i32.const 0) + ) + ) + (local.set $38 + (local.tee $21 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 536) + ) + ) + (i32.const 40) + ) + ) + ) + (local.set $39 + (i32.add + (local.get $17) + (i32.const 39) + ) + ) + (local.set $42 + (i32.add + (local.tee $37 + (i32.add + (local.get $23) + (i32.const 8) + ) + ) + (i32.const 4) + ) + ) + (local.set $43 + (i32.sub + (i32.const 0) + (local.tee $27 + (local.tee $19 + (i32.add + (local.get $23) + (i32.const 588) + ) + ) + ) + ) + ) + (local.set $33 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 576) + ) + ) + (i32.const 12) + ) + ) + (local.set $40 + (i32.add + (local.get $17) + (i32.const 11) + ) + ) + (local.set $44 + (i32.sub + (local.tee $28 + (local.get $33) + ) + (local.get $27) + ) + ) + (local.set $45 + (i32.sub + (i32.const -2) + (local.get $27) + ) + ) + (local.set $46 + (i32.add + (local.get $28) + (i32.const 2) + ) + ) + (local.set $48 + (i32.add + (local.tee $47 + (i32.add + (local.get $23) + (i32.const 24) + ) + ) + (i32.const 288) + ) + ) + (local.set $41 + (local.tee $31 + (i32.add + (local.get $19) + (i32.const 9) + ) + ) + ) + (local.set $34 + (i32.add + (local.get $19) + (i32.const 8) + ) + ) + (local.set $15 + (i32.const 0) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $17 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (loop $label$4 + (block $label$5 + (if + (i32.gt_s + (local.get $15) + (i32.const -1) + ) + (local.set $15 + (if (result i32) + (i32.gt_s + (local.get $10) + (i32.sub + (i32.const 2147483647) + (local.get $15) + ) + ) + (block (result i32) + (i32.store + (call $12) + (i32.const 75) + ) + (i32.const -1) + ) + (i32.add + (local.get $10) + (local.get $15) + ) + ) + ) + ) + (br_if $label$3 + (i32.eqz + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.get $1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $11 + (local.get $1) + ) + (block $label$9 + (block $label$10 + (loop $label$11 + (block $label$12 + (block $label$13 + (block $label$14 + (block $label$15 + (br_table $label$14 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$15 $label$13 + (i32.sub + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (local.set $5 + (local.get $11) + ) + (br $label$10) + ) + (local.set $5 + (local.get $11) + ) + (br $label$12) + ) + (local.set $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (br $label$11) + ) + ) + (br $label$9) + ) + (loop $label$16 + (br_if $label$9 + (i32.ne + (i32.load8_s offset=1 + (local.get $5) + ) + (i32.const 37) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + (br_if $label$16 + (i32.eq + (i32.load8_s + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + (i32.const 37) + ) + ) + ) + ) + (local.set $10 + (i32.sub + (local.get $11) + (local.get $1) + ) + ) + (if + (local.get $30) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (local.get $10) + (local.get $0) + ) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $1 + (local.get $5) + ) + (br $label$4) + ) + ) + (local.set $10 + (if (result i32) + (i32.lt_u + (local.tee $9 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $10 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block (result i32) + (local.set $10 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + (if + (local.tee $12 + (i32.eq + (i32.load8_s offset=2 + (local.get $5) + ) + (i32.const 36) + ) + ) + (local.set $11 + (local.get $10) + ) + ) + (if + (local.get $12) + (local.set $17 + (i32.const 1) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.get $12) + ) + (local.set $9 + (i32.const -1) + ) + ) + (local.get $17) + ) + (block (result i32) + (local.set $5 + (local.get $10) + ) + (local.set $9 + (i32.const -1) + ) + (local.get $17) + ) + ) + ) + (block $label$25 + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + (block + (local.set $17 + (i32.const 0) + ) + (loop $label$27 + (br_if $label$25 + (i32.eqz + (i32.and + (i32.shl + (i32.const 1) + (local.get $12) + ) + (i32.const 75913) + ) + ) + ) + (local.set $17 + (i32.or + (i32.shl + (i32.const 1) + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (local.get $17) + ) + ) + (br_if $label$27 + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + ) + ) + ) + (local.set $17 + (i32.const 0) + ) + ) + ) + (block $label$29 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (local.set $11 + (block $label$31 (result i32) + (block $label$32 + (br_if $label$32 + (i32.ge_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (br_if $label$32 + (i32.ne + (i32.load8_s offset=2 + (local.get $11) + ) + (i32.const 36) + ) + ) + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $12) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $8 + (i32.const 1) + ) + (local.set $10 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (br $label$31 + (i32.add + (local.get $11) + (i32.const 3) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (local.set $10 + (i32.const 0) + ) + (br $label$29) + ) + ) + (local.set $10 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.set $8 + (i32.const 0) + ) + (local.get $7) + ) + ) + (local.set $12 + (i32.or + (local.get $17) + (i32.const 8192) + ) + ) + (local.set $7 + (i32.sub + (i32.const 0) + (local.get $10) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.tee $6 + (i32.lt_s + (local.get $10) + (i32.const 0) + ) + ) + ) + (local.set $12 + (local.get $17) + ) + ) + (local.set $17 + (local.get $8) + ) + (if + (local.get $6) + (local.set $10 + (local.get $7) + ) + ) + ) + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $7 + (i32.const 0) + ) + (local.set $5 + (local.get $12) + ) + (loop $label$39 + (local.set $7 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$39 + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $12 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + ) + (if + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (block + (local.set $5 + (local.get $12) + ) + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (local.get $7) + ) + ) + ) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (i32.const 0) + ) + ) + ) + ) + ) + (block $label$43 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 46) + ) + (block + (if + (i32.ne + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $11 + (local.get $7) + ) + (local.set $7 + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (br $label$43) + ) + ) + (loop $label$48 + (local.set $5 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$43 + (i32.ge_u + (local.tee $8 + (i32.add + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $5 + (local.get $8) + ) + (br $label$48) + ) + ) + ) + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 2) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (if + (i32.eq + (i32.load8_s offset=3 + (local.get $11) + ) + (i32.const 36) + ) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $5) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $5 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (br $label$43) + ) + ) + ) + (if + (local.get $17) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (if (result i32) + (local.get $30) + (block (result i32) + (local.set $5 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block (result i32) + (local.set $5 + (i32.const 0) + ) + (local.get $7) + ) + ) + ) + ) + (local.set $5 + (i32.const -1) + ) + ) + ) + (local.set $7 + (local.get $11) + ) + (local.set $8 + (i32.const 0) + ) + (loop $label$55 + (if + (i32.gt_u + (local.tee $6 + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -65) + ) + ) + (i32.const 57) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (if + (i32.lt_u + (i32.add + (local.tee $6 + (i32.and + (local.tee $13 + (i32.load8_s + (i32.add + (i32.add + (i32.mul + (local.get $8) + (i32.const 58) + ) + (i32.const 1177) + ) + (local.get $6) + ) + ) + ) + (i32.const 255) + ) + ) + (i32.const -1) + ) + (i32.const 8) + ) + (block + (local.set $7 + (local.get $11) + ) + (local.set $8 + (local.get $6) + ) + (br $label$55) + ) + ) + ) + (if + (i32.eqz + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $14 + (i32.gt_s + (local.get $9) + (i32.const -1) + ) + ) + (block $label$59 + (block $label$60 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 19) + ) + (if + (local.get $14) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (br $label$60) + ) + (block + (if + (local.get $14) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $9) + (i32.const 2) + ) + ) + (local.get $6) + ) + (i64.store + (local.get $16) + (i64.load + (i32.add + (local.get $3) + (i32.shl + (local.get $9) + (i32.const 3) + ) + ) + ) + ) + (br $label$60) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $15 + (i32.const 0) + ) + (br $label$5) + ) + ) + (call $22 + (local.get $16) + (local.get $6) + (local.get $2) + ) + ) + ) + (br $label$59) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + ) + (local.set $9 + (i32.and + (local.tee $7 + (i32.load8_s + (local.get $7) + ) + ) + (i32.const -33) + ) + ) + (if + (i32.eqz + (i32.and + (i32.ne + (local.get $8) + (i32.const 0) + ) + (i32.eq + (i32.and + (local.get $7) + (i32.const 15) + ) + (i32.const 3) + ) + ) + ) + (local.set $9 + (local.get $7) + ) + ) + (local.set $7 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8192) + ) + (local.set $12 + (local.get $7) + ) + ) + (block $label$70 + (block $label$71 + (block $label$72 + (block $label$73 + (block $label$74 + (block $label$75 + (block $label$76 + (block $label$77 + (block $label$78 + (block $label$79 + (block $label$80 + (block $label$81 + (block $label$82 + (block $label$83 + (block $label$84 + (block $label$85 + (block $label$86 + (block $label$87 + (block $label$88 + (block $label$89 + (br_table $label$78 $label$77 $label$80 $label$77 $label$78 $label$78 $label$78 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$79 $label$77 $label$77 $label$77 $label$77 $label$87 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$78 $label$77 $label$83 $label$85 $label$78 $label$78 $label$78 $label$77 $label$85 $label$77 $label$77 $label$77 $label$82 $label$89 $label$86 $label$88 $label$77 $label$77 $label$81 $label$77 $label$84 $label$77 $label$77 $label$87 $label$77 + (i32.sub + (local.get $9) + (i32.const 65) + ) + ) + ) + (block $label$90 + (block $label$91 + (block $label$92 + (block $label$93 + (block $label$94 + (block $label$95 + (block $label$96 + (block $label$97 + (br_table $label$97 $label$96 $label$95 $label$94 $label$93 $label$90 $label$92 $label$91 $label$90 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (local.get $8) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store16 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store8 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $12 + (i32.or + (local.get $12) + (i32.const 8) + ) + ) + (if + (i32.le_u + (local.get $5) + (i32.const 8) + ) + (local.set $5 + (i32.const 8) + ) + ) + (local.set $9 + (i32.const 120) + ) + (br $label$76) + ) + (br $label$76) + ) + (if + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (local.set $7 + (local.get $21) + ) + (block + (local.set $1 + (local.get $21) + ) + (loop $label$101 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.and + (local.get $50) + (i64.const 7) + ) + (i64.const 48) + ) + ) + (br_if $label$101 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 3) + ) + ) + (i64.const 0) + ) + ) + (local.set $7 + (local.get $1) + ) + ) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8) + ) + (block + (local.set $8 + (i32.add + (local.tee $1 + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + (i32.const 1) + ) + ) + (if + (i32.le_s + (local.get $5) + (local.get $1) + ) + (local.set $5 + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1657) + ) + (br $label$71) + ) + (block + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1657) + ) + (br $label$71) + ) + ) + ) + (if + (i64.lt_s + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block + (i64.store + (local.get $16) + (local.tee $50 + (i64.sub + (i64.const 0) + (local.get $50) + ) + ) + ) + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 1657) + ) + (br $label$75) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 1658) + ) + (br $label$75) + ) + (block + (local.set $6 + (local.tee $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + ) + (local.set $8 + (if (result i32) + (local.get $1) + (i32.const 1659) + (i32.const 1657) + ) + ) + (br $label$75) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1657) + ) + (br $label$75) + ) + (i64.store8 + (local.get $39) + (i64.load + (local.get $16) + ) + ) + (local.set $1 + (local.get $39) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (i32.const 1) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1657) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $1 + (call $24 + (i32.load + (call $12) + ) + ) + ) + (br $label$74) + ) + (if + (i32.eqz + (local.tee $1 + (i32.load + (local.get $16) + ) + ) + ) + (local.set $1 + (i32.const 1667) + ) + ) + (br $label$74) + ) + (i64.store32 + (local.get $37) + (i64.load + (local.get $16) + ) + ) + (i32.store + (local.get $42) + (i32.const 0) + ) + (i32.store + (local.get $16) + (local.get $37) + ) + (local.set $7 + (local.get $37) + ) + (local.set $6 + (i32.const -1) + ) + (br $label$73) + ) + (local.set $7 + (i32.load + (local.get $16) + ) + ) + (if + (local.get $5) + (block + (local.set $6 + (local.get $5) + ) + (br $label$73) + ) + (block + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (i32.const 0) + (local.get $12) + ) + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (local.set $52 + (f64.load + (local.get $16) + ) + ) + (i32.store + (local.get $20) + (i32.const 0) + ) + (local.set $26 + (if (result i32) + (i64.lt_s + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + (i32.const 1674) + ) + (block (result i32) + (local.set $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + (if (result i32) + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (i32.const 1677) + ) + (block (result i32) + (local.set $24 + (local.get $1) + ) + (if (result i32) + (local.get $1) + (i32.const 1680) + (i32.const 1675) + ) + ) + ) + ) + ) + ) + (block $label$119 + (if + (i64.lt_u + (i64.and + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 9218868437227405312) + ) + (i64.const 9218868437227405312) + ) + (block + (if + (local.tee $1 + (f64.ne + (local.tee $52 + (f64.mul + (call $27 + (local.get $52) + (local.get $20) + ) + (f64.const 2) + ) + ) + (f64.const 0) + ) + ) + (i32.store + (local.get $20) + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -1) + ) + ) + ) + (if + (i32.eq + (local.tee $22 + (i32.or + (local.get $9) + (i32.const 32) + ) + ) + (i32.const 97) + ) + (block + (local.set $1 + (i32.add + (local.get $26) + (i32.const 9) + ) + ) + (if + (local.tee $6 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $26 + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.or + (i32.gt_u + (local.get $5) + (i32.const 11) + ) + (i32.eqz + (local.tee $1 + (i32.sub + (i32.const 12) + (local.get $5) + ) + ) + ) + ) + ) + (block + (local.set $53 + (f64.const 8) + ) + (loop $label$125 + (local.set $53 + (f64.mul + (local.get $53) + (f64.const 16) + ) + ) + (br_if $label$125 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (local.set $52 + (if (result f64) + (i32.eq + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + (f64.neg + (f64.add + (local.get $53) + (f64.sub + (f64.neg + (local.get $52) + ) + (local.get $53) + ) + ) + ) + (f64.sub + (f64.add + (local.get $52) + (local.get $53) + ) + (local.get $53) + ) + ) + ) + ) + ) + (local.set $1 + (i32.sub + (i32.const 0) + (local.tee $7 + (i32.load + (local.get $20) + ) + ) + ) + ) + (if + (i32.eq + (local.tee $1 + (call $23 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (local.get $1) + (local.get $7) + ) + ) + (local.get $33) + ) + ) + (local.get $33) + ) + (block + (i32.store8 + (local.get $40) + (i32.const 48) + ) + (local.set $1 + (local.get $40) + ) + ) + ) + (local.set $13 + (i32.or + (local.get $24) + (i32.const 2) + ) + ) + (i32.store8 + (i32.add + (local.get $1) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $7) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $8 + (i32.add + (local.get $1) + (i32.const -2) + ) + ) + (i32.add + (local.get $9) + (i32.const 15) + ) + ) + (local.set $9 + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + (local.set $14 + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (local.set $1 + (local.get $19) + ) + (loop $label$131 + (i32.store8 + (local.get $1) + (i32.or + (i32.load8_u + (i32.add + (local.tee $7 + (i32.trunc_f64_s + (local.get $52) + ) + ) + (i32.const 1641) + ) + ) + (local.get $6) + ) + ) + (local.set $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_s + (local.get $7) + ) + ) + (f64.const 16) + ) + ) + (local.set $1 + (block $label$132 (result i32) + (if (result i32) + (i32.eq + (i32.sub + (local.tee $7 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $27) + ) + (i32.const 1) + ) + (block (result i32) + (drop + (br_if $label$132 + (local.get $7) + (i32.and + (local.get $14) + (i32.and + (local.get $9) + (f64.eq + (local.get $52) + (f64.const 0) + ) + ) + ) + ) + ) + (i32.store8 + (local.get $7) + (i32.const 46) + ) + (i32.add + (local.get $1) + (i32.const 2) + ) + ) + (local.get $7) + ) + ) + ) + (br_if $label$131 + (f64.ne + (local.get $52) + (f64.const 0) + ) + ) + ) + (local.set $6 + (i32.sub + (i32.add + (local.get $46) + (local.get $5) + ) + (local.tee $7 + (local.get $8) + ) + ) + ) + (local.set $9 + (i32.add + (i32.sub + (local.get $44) + (local.get $7) + ) + (local.get $1) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.and + (i32.ne + (local.get $5) + (i32.const 0) + ) + (i32.lt_s + (i32.add + (local.get $45) + (local.get $1) + ) + (local.get $5) + ) + ) + (local.get $6) + (local.tee $6 + (local.get $9) + ) + ) + (local.get $13) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $26) + (local.get $13) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $27) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $19) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.sub + (local.get $6) + (i32.add + (local.get $1) + (local.tee $1 + (i32.sub + (local.get $28) + (local.get $7) + ) + ) + ) + ) + (i32.const 0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $8) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $5) + (local.get $10) + ) + (local.set $10 + (local.get $5) + ) + ) + (br $label$119) + ) + ) + (if + (local.get $1) + (block + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -28) + ) + ) + ) + (local.set $52 + (f64.mul + (local.get $52) + (f64.const 268435456) + ) + ) + ) + (local.set $6 + (i32.load + (local.get $20) + ) + ) + ) + (local.set $8 + (local.tee $7 + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $47) + (local.get $48) + ) + ) + ) + (loop $label$145 + (i32.store + (local.get $8) + (local.tee $1 + (i32.trunc_f64_s + (local.get $52) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$145 + (f64.ne + (local.tee $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_u + (local.get $1) + ) + ) + (f64.const 1e9) + ) + ) + (f64.const 0) + ) + ) + ) + (if + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $1 + (local.get $7) + ) + (loop $label$147 + (local.set $14 + (if (result i32) + (i32.gt_s + (local.get $6) + (i32.const 29) + ) + (i32.const 29) + (local.get $6) + ) + ) + (block $label$150 + (if + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (block + (local.set $50 + (i64.extend_i32_u + (local.get $14) + ) + ) + (local.set $13 + (i32.const 0) + ) + (loop $label$152 + (i64.store32 + (local.get $6) + (i64.rem_u + (local.tee $51 + (i64.add + (i64.shl + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $50) + ) + (i64.extend_i32_u + (local.get $13) + ) + ) + ) + (i64.const 1000000000) + ) + ) + (local.set $13 + (i32.wrap_i64 + (i64.div_u + (local.get $51) + (i64.const 1000000000) + ) + ) + ) + (br_if $label$152 + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $6) + (i32.const -4) + ) + ) + (local.get $1) + ) + ) + ) + (br_if $label$150 + (i32.eqz + (local.get $13) + ) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (local.get $13) + ) + ) + ) + ) + (loop $label$153 + (if + (i32.gt_u + (local.get $8) + (local.get $1) + ) + (if + (i32.eqz + (i32.load + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $8 + (local.get $6) + ) + (br $label$153) + ) + ) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.sub + (i32.load + (local.get $20) + ) + (local.get $14) + ) + ) + ) + (br_if $label$147 + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + ) + ) + ) + (local.set $1 + (local.get $7) + ) + ) + (local.set $18 + (if (result i32) + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (i32.const 6) + (local.get $5) + ) + ) + (if + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $14 + (i32.add + (i32.div_s + (i32.add + (local.get $18) + (i32.const 25) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (local.set $25 + (i32.eq + (local.get $22) + (i32.const 102) + ) + ) + (local.set $5 + (local.get $8) + ) + (loop $label$160 + (if + (i32.gt_s + (local.tee $13 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const 9) + ) + (local.set $13 + (i32.const 9) + ) + ) + (block $label$162 + (if + (i32.lt_u + (local.get $1) + (local.get $5) + ) + (block + (local.set $29 + (i32.add + (i32.shl + (i32.const 1) + (local.get $13) + ) + (i32.const -1) + ) + ) + (local.set $35 + (i32.shr_u + (i32.const 1000000000) + (local.get $13) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (local.get $1) + ) + (loop $label$164 + (i32.store + (local.get $8) + (i32.add + (i32.shr_u + (local.tee $32 + (i32.load + (local.get $8) + ) + ) + (local.get $13) + ) + (local.get $6) + ) + ) + (local.set $6 + (i32.mul + (i32.and + (local.get $32) + (local.get $29) + ) + (local.get $35) + ) + ) + (br_if $label$164 + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (local.get $5) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + (br_if $label$162 + (i32.eqz + (local.get $6) + ) + ) + (i32.store + (local.get $5) + (local.get $6) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + ) + (block + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $6 + (i32.add + (local.tee $8 + (if (result i32) + (local.get $25) + (local.get $7) + (local.get $1) + ) + ) + (i32.shl + (local.get $14) + (i32.const 2) + ) + ) + ) + (if + (i32.gt_s + (i32.shr_s + (i32.sub + (local.get $5) + (local.get $8) + ) + (i32.const 2) + ) + (local.get $14) + ) + (local.set $5 + (local.get $6) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (local.get $13) + ) + ) + ) + (br_if $label$160 + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + ) + (local.set $13 + (local.get $5) + ) + ) + ) + (local.set $13 + (local.get $8) + ) + ) + (local.set $25 + (local.get $7) + ) + (block $label$172 + (if + (i32.lt_u + (local.get $1) + (local.get $13) + ) + (block + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$172 + (i32.lt_u + (local.tee $6 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $8 + (i32.const 10) + ) + (loop $label$174 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$174 + (i32.ge_u + (local.get $6) + (local.tee $8 + (i32.mul + (local.get $8) + (i32.const 10) + ) + ) + ) + ) + ) + ) + (local.set $5 + (i32.const 0) + ) + ) + ) + (local.set $29 + (i32.eq + (local.get $22) + (i32.const 103) + ) + ) + (local.set $35 + (i32.ne + (local.get $18) + (i32.const 0) + ) + ) + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.sub + (local.get $18) + (if (result i32) + (i32.ne + (local.get $22) + (i32.const 102) + ) + (local.get $5) + (i32.const 0) + ) + ) + (i32.shr_s + (i32.shl + (i32.and + (local.get $35) + (local.get $29) + ) + (i32.const 31) + ) + (i32.const 31) + ) + ) + ) + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $13) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (block + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.rem_s + (local.tee $14 + (i32.add + (local.get $8) + (i32.const 9216) + ) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (i32.const 9) + ) + (block + (local.set $6 + (i32.const 10) + ) + (loop $label$180 + (local.set $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + (br_if $label$180 + (i32.ne + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 1) + ) + ) + (i32.const 9) + ) + ) + ) + ) + (local.set $6 + (i32.const 10) + ) + ) + (local.set $14 + (i32.rem_u + (local.tee $22 + (i32.load + (local.tee $8 + (i32.add + (i32.add + (local.get $7) + (i32.const 4) + ) + (i32.shl + (i32.add + (i32.div_s + (local.get $14) + (i32.const 9) + ) + (i32.const -1024) + ) + (i32.const 2) + ) + ) + ) + ) + ) + (local.get $6) + ) + ) + (block $label$182 + (if + (i32.eqz + (i32.and + (local.tee $32 + (i32.eq + (i32.add + (local.get $8) + (i32.const 4) + ) + (local.get $13) + ) + ) + (i32.eqz + (local.get $14) + ) + ) + ) + (block + (local.set $52 + (if (result f64) + (i32.lt_u + (local.get $14) + (local.tee $49 + (i32.div_s + (local.get $6) + (i32.const 2) + ) + ) + ) + (f64.const 0.5) + (if (result f64) + (i32.and + (local.get $32) + (i32.eq + (local.get $14) + (local.get $49) + ) + ) + (f64.const 1) + (f64.const 1.5) + ) + ) + ) + (local.set $53 + (if (result f64) + (i32.and + (i32.div_u + (local.get $22) + (local.get $6) + ) + (i32.const 1) + ) + (f64.const 9007199254740994) + (f64.const 9007199254740992) + ) + ) + (block $label$190 + (if + (local.get $24) + (block + (br_if $label$190 + (i32.ne + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + ) + (local.set $53 + (f64.neg + (local.get $53) + ) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + ) + ) + ) + (i32.store + (local.get $8) + (local.tee $14 + (i32.sub + (local.get $22) + (local.get $14) + ) + ) + ) + (br_if $label$182 + (f64.eq + (f64.add + (local.get $53) + (local.get $52) + ) + (local.get $53) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (local.get $14) + (local.get $6) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + (loop $label$193 + (i32.store + (local.get $8) + (i32.const 0) + ) + (if + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (i32.const 0) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (i32.load + (local.get $8) + ) + (i32.const 1) + ) + ) + ) + (br_if $label$193 + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + ) + ) + ) + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$182 + (i32.lt_u + (local.tee $14 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $6 + (i32.const 10) + ) + (loop $label$195 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$195 + (i32.ge_u + (local.get $14) + (local.tee $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + ) + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (if + (i32.le_u + (local.get $13) + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (block + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (local.set $32 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (loop $label$198 + (block $label$199 + (if + (i32.le_u + (local.get $8) + (local.get $14) + ) + (block + (local.set $22 + (i32.const 0) + ) + (br $label$199) + ) + ) + (if + (i32.load + (local.tee $1 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + (local.set $22 + (i32.const 1) + ) + (block + (local.set $8 + (local.get $1) + ) + (br $label$198) + ) + ) + ) + ) + (block $label$203 + (if + (local.get $29) + (block + (local.set $1 + (if (result i32) + (i32.and + (i32.gt_s + (local.tee $1 + (i32.add + (i32.xor + (i32.and + (local.get $35) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $18) + ) + ) + (local.get $6) + ) + (i32.gt_s + (local.get $6) + (i32.const -5) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.sub + (i32.add + (local.get $1) + (i32.const -1) + ) + (local.get $6) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (br_if $label$203 + (local.tee $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (block $label$207 + (if + (local.get $22) + (block + (if + (i32.eqz + (local.tee $18 + (i32.load + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $9 + (i32.const 9) + ) + (br $label$207) + ) + ) + (if + (i32.rem_u + (local.get $18) + (i32.const 10) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$207) + ) + (block + (local.set $13 + (i32.const 10) + ) + (local.set $9 + (i32.const 0) + ) + ) + ) + (loop $label$212 + (local.set $9 + (i32.add + (local.get $9) + (i32.const 1) + ) + ) + (br_if $label$212 + (i32.eqz + (i32.rem_u + (local.get $18) + (local.tee $13 + (i32.mul + (local.get $13) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + (local.set $9 + (i32.const 9) + ) + ) + ) + (local.set $18 + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $8) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (if + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (local.get $18) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (i32.add + (local.get $18) + (local.get $6) + ) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $18) + ) + (local.set $5 + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $25 + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (if + (i32.le_s + (local.get $6) + (i32.const 0) + ) + (local.set $6 + (i32.const 0) + ) + ) + ) + (block + (if + (i32.lt_s + (i32.sub + (local.get $28) + (local.tee $9 + (call $23 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $32) + (local.get $6) + ) + ) + (local.get $33) + ) + ) + ) + (i32.const 2) + ) + (loop $label$229 + (i32.store8 + (local.tee $9 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.const 48) + ) + (br_if $label$229 + (i32.lt_s + (i32.sub + (local.get $28) + (local.get $9) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (i32.add + (local.get $9) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $6) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $6 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (local.get $5) + ) + (local.set $9 + (local.get $6) + ) + (local.set $6 + (i32.sub + (local.get $28) + (local.get $6) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $18 + (i32.add + (i32.add + (i32.add + (i32.add + (local.get $24) + (i32.const 1) + ) + (local.get $1) + ) + (i32.ne + (local.tee $29 + (i32.or + (local.get $1) + (local.get $13) + ) + ) + (i32.const 0) + ) + ) + (local.get $6) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (block $label$231 + (if + (local.get $25) + (block + (local.set $6 + (local.tee $9 + (if (result i32) + (i32.gt_u + (local.get $14) + (local.get $7) + ) + (local.get $7) + (local.get $14) + ) + ) + ) + (loop $label$235 + (local.set $5 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $31) + ) + ) + (block $label$236 + (if + (i32.eq + (local.get $6) + (local.get $9) + ) + (block + (br_if $label$236 + (i32.ne + (local.get $5) + (local.get $31) + ) + ) + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $5 + (local.get $34) + ) + ) + (block + (br_if $label$236 + (i32.le_u + (local.get $5) + (local.get $19) + ) + ) + (drop + (call $39 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $5) + (local.get $27) + ) + ) + ) + (loop $label$239 + (br_if $label$239 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $5) + (i32.sub + (local.get $41) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (if + (i32.le_u + (local.tee $5 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block + (local.set $6 + (local.get $5) + ) + (br $label$235) + ) + ) + ) + (block $label$242 + (if + (local.get $29) + (block + (br_if $label$242 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (i32.const 1709) + (i32.const 1) + (local.get $0) + ) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 0) + ) + (i32.lt_u + (local.get $5) + (local.get $8) + ) + ) + (loop $label$245 + (if + (i32.gt_u + (local.tee $7 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $5) + ) + ) + (local.get $31) + ) + ) + (local.get $19) + ) + (block + (drop + (call $39 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $7) + (local.get $27) + ) + ) + ) + (loop $label$247 + (br_if $label$247 + (i32.gt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $7) + (if (result i32) + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (local.get $1) + ) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $1) + (i32.const -9) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.lt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (local.get $8) + ) + ) + (block + (local.set $1 + (local.get $7) + ) + (br $label$245) + ) + (local.set $1 + (local.get $7) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.add + (local.get $14) + (i32.const 4) + ) + ) + (if + (i32.eqz + (local.get $22) + ) + (local.set $8 + (local.get $5) + ) + ) + (if + (i32.gt_s + (local.get $1) + (i32.const -1) + ) + (block + (local.set $13 + (i32.eqz + (local.get $13) + ) + ) + (local.set $7 + (local.get $14) + ) + (local.set $5 + (local.get $1) + ) + (loop $label$256 + (if + (i32.eq + (local.tee $1 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $7) + ) + ) + (local.get $31) + ) + ) + (local.get $31) + ) + (block + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $1 + (local.get $34) + ) + ) + ) + (block $label$258 + (if + (i32.eq + (local.get $7) + (local.get $14) + ) + (block + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (i32.const 1) + (local.get $0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$258 + (i32.and + (local.get $13) + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + ) + (br_if $label$258 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (i32.const 1709) + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (br_if $label$258 + (i32.le_u + (local.get $1) + (local.get $19) + ) + ) + (drop + (call $39 + (local.get $19) + (i32.const 48) + (i32.add + (local.get $1) + (local.get $43) + ) + ) + ) + (loop $label$262 + (br_if $label$262 + (i32.gt_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (local.set $6 + (i32.sub + (local.get $41) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (if (result i32) + (i32.gt_s + (local.get $5) + (local.get $6) + ) + (local.get $6) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (br_if $label$256 + (i32.and + (i32.lt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (local.get $8) + ) + (i32.gt_s + (local.tee $5 + (i32.sub + (local.get $5) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + ) + (local.set $1 + (local.get $5) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 18) + ) + (i32.const 18) + (i32.const 0) + ) + (br_if $label$231 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $9) + (i32.sub + (local.get $28) + (local.get $9) + ) + (local.get $0) + ) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $18) + (local.get $10) + ) + (local.set $10 + (local.get $18) + ) + ) + ) + (block + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $8 + (i32.add + (if (result i32) + (local.tee $6 + (i32.or + (f64.ne + (local.get $52) + (local.get $52) + ) + (i32.const 0) + ) + ) + (local.tee $24 + (i32.const 0) + ) + (local.get $24) + ) + (i32.const 3) + ) + ) + (local.get $7) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + (block + (drop + (call $21 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $7 + (if (result i32) + (local.tee $5 + (i32.ne + (i32.and + (local.get $9) + (i32.const 32) + ) + (i32.const 0) + ) + ) + (i32.const 1693) + (i32.const 1697) + ) + ) + (local.set $5 + (if (result i32) + (local.get $5) + (i32.const 1701) + (i32.const 1705) + ) + ) + (if + (i32.eqz + (local.get $6) + ) + (local.set $5 + (local.get $7) + ) + ) + (if + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $5) + (i32.const 3) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $8) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $8) + (local.get $10) + ) + (local.set $10 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $7 + (local.get $5) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1657) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $7 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $7 + (if (result i32) + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $50 + (i64.const 0) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (loop $label$280 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.load8_u + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $50) + ) + (i32.const 15) + ) + (i32.const 1641) + ) + ) + (local.get $7) + ) + ) + (br_if $label$280 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 4) + ) + ) + (i64.const 0) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.get $1) + ) + ) + ) + (local.set $8 + (i32.add + (i32.shr_s + (local.get $9) + (i32.const 4) + ) + (i32.const 1657) + ) + ) + (if + (local.tee $1 + (i32.or + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (i64.eq + (local.get $50) + (i64.const 0) + ) + ) + ) + (local.set $8 + (i32.const 1657) + ) + ) + (local.set $6 + (if (result i32) + (local.get $1) + (i32.const 0) + (i32.const 2) + ) + ) + (br $label$71) + ) + (local.set $7 + (call $23 + (local.get $50) + (local.get $21) + ) + ) + (br $label$71) + ) + (local.set $14 + (i32.eqz + (local.tee $13 + (call $17 + (local.get $1) + (i32.const 0) + (local.get $5) + ) + ) + ) + ) + (local.set $8 + (i32.sub + (local.get $13) + (local.get $1) + ) + ) + (local.set $9 + (i32.add + (local.get $1) + (local.get $5) + ) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (if (result i32) + (local.get $14) + (local.get $5) + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1657) + ) + (local.set $5 + (if (result i32) + (local.get $14) + (local.get $9) + (local.get $13) + ) + ) + (br $label$70) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $5 + (i32.const 0) + ) + (local.set $8 + (local.get $7) + ) + (loop $label$288 + (block $label$289 + (br_if $label$289 + (i32.eqz + (local.tee $9 + (i32.load + (local.get $8) + ) + ) + ) + ) + (br_if $label$289 + (i32.or + (i32.lt_s + (local.tee $5 + (call $26 + (local.get $36) + (local.get $9) + ) + ) + (i32.const 0) + ) + (i32.gt_u + (local.get $5) + (i32.sub + (local.get $6) + (local.get $1) + ) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$288 + (i32.gt_u + (local.get $6) + (local.tee $1 + (i32.add + (local.get $5) + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (local.get $12) + ) + (if + (local.get $1) + (block + (local.set $5 + (i32.const 0) + ) + (loop $label$292 + (br_if $label$72 + (i32.eqz + (local.tee $8 + (i32.load + (local.get $7) + ) + ) + ) + ) + (br_if $label$72 + (i32.gt_s + (local.tee $5 + (i32.add + (local.tee $8 + (call $26 + (local.get $36) + (local.get $8) + ) + ) + (local.get $5) + ) + ) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $36) + (local.get $8) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (br_if $label$292 + (i32.lt_u + (local.get $5) + (local.get $1) + ) + ) + (br $label$72) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.le_s + (local.get $10) + (local.get $1) + ) + (local.set $10 + (local.get $1) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $1 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.gt_s + (local.get $5) + (i32.const -1) + ) + (local.set $12 + (local.get $1) + ) + ) + (local.set $5 + (if (result i32) + (i32.or + (local.get $5) + (local.tee $9 + (i64.ne + (i64.load + (local.get $16) + ) + (i64.const 0) + ) + ) + ) + (block (result i32) + (local.set $1 + (local.get $7) + ) + (if + (i32.gt_s + (local.get $5) + (local.tee $7 + (i32.add + (i32.xor + (i32.and + (local.get $9) + (i32.const 1) + ) + (i32.const 1) + ) + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + ) + ) + (local.set $7 + (local.get $5) + ) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (local.set $7 + (i32.const 0) + ) + (local.get $21) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (if (result i32) + (i32.lt_s + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.lt_s + (local.get $7) + (local.tee $9 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (local.tee $7 + (local.get $9) + ) + (local.get $7) + ) + (local.get $6) + ) + ) + ) + (local.tee $10 + (local.get $5) + ) + (local.get $10) + ) + (local.get $5) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $8) + (local.get $6) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $7) + (local.get $9) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (local.get $9) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + (br $label$2) + ) + (if + (i32.eqz + (local.get $0) + ) + (if + (local.get $17) + (block + (local.set $0 + (i32.const 1) + ) + (loop $label$308 + (if + (local.tee $1 + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + ) + (block + (call $22 + (i32.add + (local.get $3) + (i32.shl + (local.get $0) + (i32.const 3) + ) + ) + (local.get $1) + (local.get $2) + ) + (br_if $label$308 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + (br $label$2) + ) + ) + ) + (loop $label$310 + (if + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$2) + ) + ) + (br_if $label$310 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + ) + ) + (local.set $15 + (i32.const 0) + ) + ) + ) + ) + (global.set $global$1 + (local.get $23) + ) + (local.get $15) + ) + ) + (func $20 (; 33 ;) (type $1) (param $0 i32) (result i32) + (i32.const 0) + ) + (func $21 (; 34 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $3 + (i32.load + (local.tee $4 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $30 + (local.get $2) + ) + (local.set $3 + (i32.const 0) + ) + (block + (local.set $3 + (i32.load + (local.get $4) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (i32.sub + (local.get $3) + (local.tee $4 + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (local.get $1) + ) + (block + (local.set $3 + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (br $label$2) + ) + ) + (local.set $2 + (block $label$7 (result i32) + (if (result i32) + (i32.gt_s + (i32.load8_s offset=75 + (local.get $2) + ) + (i32.const -1) + ) + (block (result i32) + (local.set $3 + (local.get $1) + ) + (loop $label$9 + (drop + (br_if $label$7 + (i32.const 0) + (i32.eqz + (local.get $3) + ) + ) + ) + (if + (i32.ne + (i32.load8_s + (i32.add + (local.get $0) + (local.tee $6 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + ) + ) + (i32.const 10) + ) + (block + (local.set $3 + (local.get $6) + ) + (br $label$9) + ) + ) + ) + (br_if $label$2 + (i32.lt_u + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $3) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.load + (local.get $5) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $3) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (local.get $3) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + ) + (drop + (call $40 + (local.get $4) + (local.get $0) + (local.get $1) + ) + ) + (i32.store + (local.get $5) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $1) + ) + ) + (local.set $3 + (i32.add + (local.get $2) + (local.get $1) + ) + ) + ) + (local.get $3) + ) + ) + (func $22 (; 35 ;) (type $8) (param $0 i32) (param $1 i32) (param $2 i32) + (local $3 i32) + (local $4 i64) + (local $5 f64) + (block $label$1 + (if + (i32.le_u + (local.get $1) + (i32.const 20) + ) + (block $label$3 + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (block $label$12 + (block $label$13 + (br_table $label$13 $label$12 $label$11 $label$10 $label$9 $label$8 $label$7 $label$6 $label$5 $label$4 $label$3 + (i32.sub + (local.get $1) + (i32.const 9) + ) + ) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $4 + (i64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (i64.store + (local.get $0) + (local.get $4) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 65535) + ) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 65535) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 255) + ) + ) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + ) + ) + ) + ) + (func $23 (; 36 ;) (type $9) (param $0 i64) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i64) + (block $label$1 (result i32) + (local.set $2 + (i32.wrap_i64 + (local.get $0) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 4294967295) + ) + (block + (loop $label$3 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.rem_u + (local.get $0) + (i64.const 10) + ) + (i64.const 48) + ) + ) + (local.set $4 + (i64.div_u + (local.get $0) + (i64.const 10) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 42949672959) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$3) + ) + ) + ) + (local.set $2 + (i32.wrap_i64 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $2) + (loop $label$6 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.rem_u + (local.get $2) + (i32.const 10) + ) + (i32.const 48) + ) + ) + (local.set $3 + (i32.div_u + (local.get $2) + (i32.const 10) + ) + ) + (if + (i32.ge_u + (local.get $2) + (i32.const 10) + ) + (block + (local.set $2 + (local.get $3) + ) + (br $label$6) + ) + ) + ) + ) + (local.get $1) + ) + ) + (func $24 (; 37 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (br_if $label$4 + (i32.eq + (i32.load8_u + (i32.add + (local.get $1) + (i32.const 1711) + ) + ) + (local.get $0) + ) + ) + (br_if $label$5 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 87) + ) + ) + (local.set $1 + (i32.const 87) + ) + (local.set $0 + (i32.const 1799) + ) + (br $label$3) + ) + ) + (if + (local.get $1) + (block + (local.set $0 + (i32.const 1799) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 1799) + ) + ) + (br $label$2) + ) + (loop $label$8 + (local.set $2 + (local.get $0) + ) + (loop $label$9 + (local.set $0 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (if + (i32.load8_s + (local.get $2) + ) + (block + (local.set $2 + (local.get $0) + ) + (br $label$9) + ) + ) + ) + (br_if $label$8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $25 (; 38 ;) (type $10) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 + (local.set $7 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 256) + ) + ) + (local.set $6 + (local.get $7) + ) + (block $label$2 + (if + (i32.and + (i32.gt_s + (local.get $2) + (local.get $3) + ) + (i32.eqz + (i32.and + (local.get $4) + (i32.const 73728) + ) + ) + ) + (block + (drop + (call $39 + (local.get $6) + (local.get $1) + (if (result i32) + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + (i32.const 256) + ) + (i32.const 256) + (local.get $5) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 255) + ) + (block + (loop $label$7 + (if + (local.get $4) + (block + (drop + (call $21 + (local.get $6) + (i32.const 256) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + ) + (br_if $label$7 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -256) + ) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + (local.set $5 + (i32.and + (i32.sub + (local.get $2) + (local.get $3) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + ) + (drop + (call $21 + (local.get $6) + (local.get $5) + (local.get $0) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $7) + ) + ) + ) + (func $26 (; 39 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (if (result i32) + (local.get $0) + (call $29 + (local.get $0) + (local.get $1) + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $27 (; 40 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (call $28 + (local.get $0) + (local.get $1) + ) + ) + (func $28 (; 41 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (local $2 i64) + (local $3 i64) + (block $label$1 (result f64) + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_table $label$5 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$4 $label$3 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (i32.and + (i32.wrap_i64 + (local.tee $3 + (i64.shr_u + (local.tee $2 + (i64.reinterpret_f64 + (local.get $0) + ) + ) + (i64.const 52) + ) + ) + ) + (i32.const 65535) + ) + (i32.const 2047) + ) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (local.get $1) + (if (result i32) + (f64.ne + (local.get $0) + (f64.const 0) + ) + (block (result i32) + (local.set $0 + (call $28 + (f64.mul + (local.get $0) + (f64.const 18446744073709551615) + ) + (local.get $1) + ) + ) + (i32.add + (i32.load + (local.get $1) + ) + (i32.const -64) + ) + ) + (i32.const 0) + ) + ) + (br $label$2) + ) + (br $label$2) + ) + (i32.store + (local.get $1) + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $3) + ) + (i32.const 2047) + ) + (i32.const -1022) + ) + ) + (local.set $0 + (f64.reinterpret_i64 + (i64.or + (i64.and + (local.get $2) + (i64.const -9218868437227405313) + ) + (i64.const 4602678819172646912) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $29 (; 42 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (if (result i32) + (local.get $0) + (block (result i32) + (if + (i32.lt_u + (local.get $1) + (i32.const 128) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (br $label$1 + (i32.const 1) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.const 2048) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 192) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 2) + ) + ) + ) + (if + (i32.or + (i32.lt_u + (local.get $1) + (i32.const 55296) + ) + (i32.eq + (i32.and + (local.get $1) + (i32.const -8192) + ) + (i32.const 57344) + ) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 224) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 3) + ) + ) + ) + (if (result i32) + (i32.lt_u + (i32.add + (local.get $1) + (i32.const -65536) + ) + (i32.const 1048576) + ) + (block (result i32) + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 18) + ) + (i32.const 240) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=3 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.const 4) + ) + (block (result i32) + (i32.store + (call $12) + (i32.const 84) + ) + (i32.const -1) + ) + ) + ) + (i32.const 1) + ) + ) + ) + (func $30 (; 43 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.load8_s + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 74) + ) + ) + ) + ) + (i32.store8 + (local.get $2) + (i32.or + (i32.add + (local.get $1) + (i32.const 255) + ) + (local.get $1) + ) + ) + (local.tee $0 + (if (result i32) + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.or + (local.get $1) + (i32.const 32) + ) + ) + (i32.const -1) + ) + (block (result i32) + (i32.store offset=8 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=28 + (local.get $0) + (local.tee $1 + (i32.load offset=44 + (local.get $0) + ) + ) + ) + (i32.store offset=20 + (local.get $0) + (local.get $1) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.get $1) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.const 0) + ) + ) + ) + ) + ) + (func $31 (; 44 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 (result i32) + (local.set $3 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store8 + (local.tee $4 + (local.get $3) + ) + (local.tee $7 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $30 + (local.get $0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $5 + (i32.load + (local.get $2) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (local.tee $6 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (local.get $5) + ) + (if + (i32.ne + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.load8_s offset=75 + (local.get $0) + ) + ) + (block + (i32.store + (local.get $2) + (i32.add + (local.get $6) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $6) + (local.get $7) + ) + (br $label$2) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.eq + (call_indirect (type $0) + (local.get $0) + (local.get $4) + (i32.const 1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (i32.const 1) + ) + (i32.load8_u + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (global.set $global$1 + (local.get $3) + ) + (local.get $1) + ) + ) + (func $32 (; 45 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.lt_s + (i32.load offset=76 + (local.get $1) + ) + (i32.const 0) + ) + ) + (br_if $label$3 + (i32.eqz + (call $20 + (local.get $1) + ) + ) + ) + (local.set $0 + (block $label$4 (result i32) + (block $label$5 + (br_if $label$5 + (i32.eq + (i32.load8_s offset=75 + (local.get $1) + ) + (local.get $0) + ) + ) + (br_if $label$5 + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $1) + (i32.const 20) + ) + ) + ) + ) + (i32.load offset=16 + (local.get $1) + ) + ) + ) + (i32.store + (local.get $3) + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $2) + (local.get $0) + ) + (br $label$4 + (i32.and + (local.get $0) + (i32.const 255) + ) + ) + ) + (call $31 + (local.get $1) + (local.get $0) + ) + ) + ) + (call $13 + (local.get $1) + ) + (br $label$2) + ) + (if + (i32.ne + (i32.load8_s offset=75 + (local.get $1) + ) + (local.get $0) + ) + (if + (i32.lt_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $1) + (i32.const 20) + ) + ) + ) + ) + (i32.load offset=16 + (local.get $1) + ) + ) + (block + (i32.store + (local.get $3) + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $2) + (local.get $0) + ) + (local.set $0 + (i32.and + (local.get $0) + (i32.const 255) + ) + ) + (br $label$2) + ) + ) + ) + (local.set $0 + (call $31 + (local.get $1) + (local.get $0) + ) + ) + ) + (local.get $0) + ) + ) + (func $33 (; 46 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (local.set $2 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $3 + (local.get $2) + ) + (local.get $1) + ) + (local.set $0 + (call $18 + (i32.load + (i32.const 1024) + ) + (local.get $0) + (local.get $3) + ) + ) + (global.set $global$1 + (local.get $2) + ) + (local.get $0) + ) + ) + (func $34 (; 47 ;) (type $1) (param $0 i32) (result i32) + (call $32 + (local.get $0) + (i32.load + (i32.const 1024) + ) + ) + ) + (func $35 (; 48 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (block $label$1 (result i32) + (local.set $14 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (local.set $18 + (local.get $14) + ) + (block $label$2 + (if + (i32.lt_u + (local.get $0) + (i32.const 245) + ) + (block + (local.set $3 + (i32.and + (i32.add + (local.get $0) + (i32.const 11) + ) + (i32.const -8) + ) + ) + (if + (i32.and + (local.tee $0 + (i32.shr_u + (local.tee $8 + (i32.load + (i32.const 3652) + ) + ) + (local.tee $2 + (i32.shr_u + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 11) + ) + (local.tee $3 + (i32.const 16) + ) + (local.get $3) + ) + (i32.const 3) + ) + ) + ) + ) + (i32.const 3) + ) + (block + (local.set $4 + (i32.load + (local.tee $1 + (i32.add + (local.tee $7 + (i32.load + (local.tee $3 + (i32.add + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $5 + (i32.add + (i32.xor + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $4) + ) + (i32.store + (i32.const 3652) + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $5) + ) + (i32.const -1) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + ) + (local.get $7) + ) + (block + (i32.store + (local.get $0) + (local.get $2) + ) + (i32.store + (local.get $3) + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=4 + (local.get $7) + (i32.or + (local.tee $0 + (i32.shl + (local.get $5) + (i32.const 3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $7) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $1) + ) + ) + ) + (if + (i32.gt_u + (local.get $3) + (local.tee $16 + (i32.load + (i32.const 3660) + ) + ) + ) + (block + (if + (local.get $0) + (block + (local.set $5 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.tee $0 + (i32.and + (i32.shl + (local.get $0) + (local.get $2) + ) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $2) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $12 + (i32.load + (local.tee $5 + (i32.add + (local.tee $9 + (i32.load + (local.tee $2 + (i32.add + (local.tee $4 + (i32.add + (i32.shl + (i32.shl + (local.tee $11 + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $0) + (local.get $5) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $4) + (local.get $12) + ) + (i32.store + (i32.const 3652) + (local.tee $7 + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $11) + ) + (i32.const -1) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $12) + (i32.const 12) + ) + ) + ) + (local.get $9) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store + (local.get $2) + (local.get $12) + ) + (local.set $7 + (local.get $8) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=4 + (local.get $9) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.tee $4 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.or + (local.tee $11 + (i32.sub + (i32.shl + (local.get $11) + (i32.const 3) + ) + (local.get $3) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $4) + (local.get $11) + ) + (local.get $11) + ) + (if + (local.get $16) + (block + (local.set $9 + (i32.load + (i32.const 3672) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (if + (i32.and + (local.get $7) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (local.set $6 + (local.get $3) + ) + (local.set $1 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (i32.or + (local.get $7) + (local.get $0) + ) + ) + (local.set $6 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $1) + (local.get $9) + ) + (i32.store offset=8 + (local.get $9) + (local.get $1) + ) + (i32.store offset=12 + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.get $11) + ) + (i32.store + (i32.const 3672) + (local.get $4) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $5) + ) + ) + ) + (if + (local.tee $6 + (i32.load + (i32.const 3656) + ) + ) + (block + (local.set $2 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $6) + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $9 + (i32.sub + (i32.and + (i32.load offset=4 + (local.tee $2 + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $0) + (local.get $2) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $2) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $1 + (local.get $2) + ) + (loop $label$25 + (block $label$26 + (if + (i32.eqz + (local.tee $0 + (i32.load offset=16 + (local.get $1) + ) + ) + ) + (br_if $label$26 + (i32.eqz + (local.tee $0 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + ) + ) + (if + (local.tee $7 + (i32.lt_u + (local.tee $1 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.get $9) + ) + ) + (local.set $9 + (local.get $1) + ) + ) + (local.set $1 + (local.get $0) + ) + (if + (local.get $7) + (local.set $2 + (local.get $0) + ) + ) + (br $label$25) + ) + ) + (if + (i32.lt_u + (local.get $2) + (local.tee $12 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.ge_u + (local.get $2) + (local.tee $13 + (i32.add + (local.get $2) + (local.get $3) + ) + ) + ) + (call $fimport$10) + ) + (local.set $15 + (i32.load offset=24 + (local.get $2) + ) + ) + (block $label$32 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $2) + ) + ) + (local.get $2) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (br $label$32) + ) + ) + ) + (loop $label$36 + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $4 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $11 + (i32.load offset=8 + (local.get $2) + ) + ) + (local.get $12) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 12) + ) + ) + ) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $2) + ) + (block + (i32.store + (local.get $7) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $11) + ) + (local.set $4 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (block $label$46 + (if + (local.get $15) + (block + (if + (i32.eq + (local.get $2) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $2) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (if + (i32.eqz + (local.get $4) + ) + (block + (i32.store + (i32.const 3656) + (i32.and + (local.get $6) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$46) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $15) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $15) + (i32.const 16) + ) + ) + ) + (local.get $2) + ) + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store offset=20 + (local.get $15) + (local.get $4) + ) + ) + (br_if $label$46 + (i32.eqz + (local.get $4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.tee $0 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $4) + (local.get $15) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $4) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $4) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $4) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $4) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.tee $0 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $9) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $13) + (local.get $9) + ) + (local.get $9) + ) + (if + (local.get $16) + (block + (local.set $7 + (i32.load + (i32.const 3672) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (if + (i32.and + (local.get $8) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (local.set $10 + (local.get $1) + ) + (local.set $5 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (i32.or + (local.get $8) + (local.get $0) + ) + ) + (local.set $10 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $5 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $10) + (local.get $7) + ) + (i32.store offset=12 + (local.get $5) + (local.get $7) + ) + (i32.store offset=8 + (local.get $7) + (local.get $5) + ) + (i32.store offset=12 + (local.get $7) + (local.get $3) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.get $9) + ) + (i32.store + (i32.const 3672) + (local.get $13) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $0) + (i32.const -65) + ) + (local.set $0 + (i32.const -1) + ) + (block + (local.set $7 + (i32.and + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 11) + ) + ) + (i32.const -8) + ) + ) + (if + (local.tee $5 + (i32.load + (i32.const 3656) + ) + ) + (block + (local.set $17 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $0) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (local.set $3 + (i32.sub + (i32.const 0) + (local.get $7) + ) + ) + (block $label$78 + (block $label$79 + (block $label$80 + (if + (local.tee $1 + (i32.load + (i32.add + (i32.shl + (local.get $17) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + (block + (local.set $0 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $17) + (i32.const 1) + ) + ) + ) + (local.set $4 + (i32.const 0) + ) + (local.set $10 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $17) + (i32.const 31) + ) + (i32.const 0) + (local.get $0) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$84 + (if + (i32.lt_u + (local.tee $6 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + (if + (local.get $6) + (block + (local.set $3 + (local.get $6) + ) + (local.set $0 + (local.get $1) + ) + ) + (block + (local.set $3 + (i32.const 0) + ) + (local.set $0 + (local.get $1) + ) + (br $label$79) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.or + (i32.eqz + (local.tee $19 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + (i32.eq + (local.get $19) + (local.tee $6 + (i32.load + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $10) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + (local.get $4) + (local.get $19) + ) + ) + (local.set $10 + (i32.shl + (local.get $10) + (i32.xor + (i32.and + (local.tee $4 + (i32.eqz + (local.get $6) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (if + (local.get $4) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $0) + ) + (br $label$80) + ) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $6) + ) + (br $label$84) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (local.set $1 + (i32.const 0) + ) + ) + ) + ) + (br_if $label$79 + (local.tee $0 + (if (result i32) + (i32.and + (i32.eqz + (local.get $4) + ) + (i32.eqz + (local.get $1) + ) + ) + (block (result i32) + (if + (i32.eqz + (local.tee $0 + (i32.and + (local.get $5) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $17) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (br $label$2) + ) + ) + (local.set $10 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $0) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $0) + (local.get $10) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $10) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + (local.get $4) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + (br $label$78) + ) + (loop $label$96 + (if + (local.tee $10 + (i32.lt_u + (local.tee $4 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + ) + (local.set $3 + (local.get $4) + ) + ) + (if + (local.get $10) + (local.set $1 + (local.get $0) + ) + ) + (if + (local.tee $4 + (i32.load offset=16 + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$96) + ) + ) + (br_if $label$96 + (local.tee $0 + (i32.load offset=20 + (local.get $0) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + ) + ) + (if + (local.get $4) + (if + (i32.lt_u + (local.get $3) + (i32.sub + (i32.load + (i32.const 3660) + ) + (local.get $7) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.tee $12 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.ge_u + (local.get $4) + (local.tee $6 + (i32.add + (local.get $4) + (local.get $7) + ) + ) + ) + (call $fimport$10) + ) + (local.set $10 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$104 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (br $label$104) + ) + ) + ) + (loop $label$108 + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $9 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $12) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $11 + (i32.add + (local.get $9) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $11) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $9) + ) + (local.set $13 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (block $label$118 + (if + (local.get $10) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $13) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (i32.store + (i32.const 3656) + (local.tee $2 + (i32.and + (local.get $5) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + ) + (br $label$118) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $10) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $10) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $13) + ) + (i32.store offset=20 + (local.get $10) + (local.get $13) + ) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (local.set $2 + (local.get $5) + ) + (br $label$118) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $13) + (local.tee $0 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $13) + (local.get $10) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $13) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $13) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $13) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $13) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (block $label$136 + (if + (i32.lt_u + (local.get $3) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.tee $0 + (i32.add + (local.get $3) + (local.get $7) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $4) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $7) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $3) + ) + (local.get $3) + ) + (local.set $0 + (i32.shr_u + (local.get $3) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $3) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 3652) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (local.set $16 + (local.get $1) + ) + (local.set $8 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $16 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $8 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $16) + (local.get $6) + ) + (i32.store offset=12 + (local.get $8) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $8) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$136) + ) + ) + (local.set $1 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $3) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $3) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $3) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $5 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $5) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.get $2) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3656) + (i32.or + (local.get $2) + (local.get $0) + ) + ) + (i32.store + (local.get $1) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $1) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (local.set $0 + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $3) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$151 + (block $label$152 + (block $label$153 + (loop $label$154 + (br_if $label$152 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$153 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $0 + (local.get $1) + ) + (br $label$154) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (br $label$151) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 3668) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.tee $1 + (i32.load + (i32.const 3660) + ) + ) + (local.get $0) + ) + (block + (local.set $2 + (i32.load + (i32.const 3672) + ) + ) + (if + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + (i32.const 15) + ) + (block + (i32.store + (i32.const 3672) + (local.tee $1 + (i32.add + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.get $3) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $1) + (local.get $3) + ) + (local.get $3) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + ) + (block + (i32.store + (i32.const 3660) + (i32.const 0) + ) + (i32.store + (i32.const 3672) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $10 + (i32.load + (i32.const 3664) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 3664) + (local.tee $3 + (i32.sub + (local.get $10) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3676) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 3676) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.le_u + (local.tee $6 + (i32.and + (local.tee $8 + (i32.add + (local.tee $1 + (if (result i32) + (i32.load + (i32.const 4124) + ) + (i32.load + (i32.const 4132) + ) + (block (result i32) + (i32.store + (i32.const 4132) + (i32.const 4096) + ) + (i32.store + (i32.const 4128) + (i32.const 4096) + ) + (i32.store + (i32.const 4136) + (i32.const -1) + ) + (i32.store + (i32.const 4140) + (i32.const -1) + ) + (i32.store + (i32.const 4144) + (i32.const 0) + ) + (i32.store + (i32.const 4096) + (i32.const 0) + ) + (i32.store + (local.get $18) + (local.tee $1 + (i32.xor + (i32.and + (local.get $18) + (i32.const -16) + ) + (i32.const 1431655768) + ) + ) + ) + (i32.store + (i32.const 4124) + (local.get $1) + ) + (i32.const 4096) + ) + ) + ) + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 47) + ) + ) + ) + ) + (local.tee $4 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + ) + ) + (local.get $0) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + (if + (local.tee $2 + (i32.load + (i32.const 4092) + ) + ) + (if + (i32.or + (i32.le_u + (local.tee $1 + (i32.add + (local.tee $3 + (i32.load + (i32.const 4084) + ) + ) + (local.get $6) + ) + ) + (local.get $3) + ) + (i32.gt_u + (local.get $1) + (local.get $2) + ) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + (block $label$171 + (block $label$172 + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 4096) + ) + (i32.const 4) + ) + ) + (block + (block $label$174 + (block $label$175 + (block $label$176 + (br_if $label$176 + (i32.eqz + (local.tee $3 + (i32.load + (i32.const 3676) + ) + ) + ) + ) + (local.set $2 + (i32.const 4100) + ) + (loop $label$177 + (block $label$178 + (if + (i32.le_u + (local.tee $1 + (i32.load + (local.get $2) + ) + ) + (local.get $3) + ) + (br_if $label$178 + (i32.gt_u + (i32.add + (local.get $1) + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $3) + ) + ) + ) + (br_if $label$176 + (i32.eqz + (local.tee $1 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (br $label$177) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.sub + (local.get $8) + (local.get $10) + ) + (local.get $4) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (local.tee $1 + (call $38 + (local.get $3) + ) + ) + (i32.add + (i32.load + (local.get $2) + ) + (i32.load + (local.get $5) + ) + ) + ) + (br_if $label$172 + (i32.ne + (local.get $1) + (i32.const -1) + ) + ) + (block + (local.set $2 + (local.get $1) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + (br $label$174) + ) + (if + (i32.ne + (local.tee $1 + (call $38 + (i32.const 0) + ) + ) + (i32.const -1) + ) + (block + (local.set $2 + (i32.sub + (i32.and + (i32.add + (local.tee $5 + (i32.add + (local.tee $2 + (i32.load + (i32.const 4128) + ) + ) + (i32.const -1) + ) + ) + (local.tee $3 + (local.get $1) + ) + ) + (i32.sub + (i32.const 0) + (local.get $2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.add + (local.tee $3 + (i32.add + (if (result i32) + (i32.and + (local.get $5) + (local.get $3) + ) + (local.get $2) + (i32.const 0) + ) + (local.get $6) + ) + ) + (local.tee $5 + (i32.load + (i32.const 4084) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $3) + (local.get $0) + ) + (i32.lt_u + (local.get $3) + (i32.const 2147483647) + ) + ) + (block + (if + (local.tee $2 + (i32.load + (i32.const 4092) + ) + ) + (br_if $label$174 + (i32.or + (i32.le_u + (local.get $4) + (local.get $5) + ) + (i32.gt_u + (local.get $4) + (local.get $2) + ) + ) + ) + ) + (br_if $label$172 + (i32.eq + (local.tee $2 + (call $38 + (local.get $3) + ) + ) + (local.get $1) + ) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + ) + (br $label$174) + ) + (local.set $5 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $7) + (local.get $1) + ) + (i32.and + (i32.lt_u + (local.get $1) + (i32.const 2147483647) + ) + (i32.ne + (local.get $2) + (i32.const -1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.add + (i32.sub + (local.get $13) + (local.get $1) + ) + (local.tee $3 + (i32.load + (i32.const 4132) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $3) + ) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (call $38 + (local.get $3) + ) + (i32.const -1) + ) + (block + (drop + (call $38 + (local.get $5) + ) + ) + (br $label$174) + ) + (local.set $3 + (i32.add + (local.get $3) + (local.get $1) + ) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (if + (i32.ne + (local.get $2) + (i32.const -1) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$172) + ) + ) + ) + (i32.store + (i32.const 4096) + (i32.or + (i32.load + (i32.const 4096) + ) + (i32.const 4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $6) + (i32.const 2147483647) + ) + (if + (i32.and + (i32.lt_u + (local.tee $1 + (call $38 + (local.get $6) + ) + ) + (local.tee $3 + (call $38 + (i32.const 0) + ) + ) + ) + (i32.and + (i32.ne + (local.get $1) + (i32.const -1) + ) + (i32.ne + (local.get $3) + (i32.const -1) + ) + ) + ) + (br_if $label$172 + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $3) + (local.get $1) + ) + ) + (i32.add + (local.get $0) + (i32.const 40) + ) + ) + ) + ) + ) + (br $label$171) + ) + (i32.store + (i32.const 4084) + (local.tee $2 + (i32.add + (i32.load + (i32.const 4084) + ) + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $2) + (i32.load + (i32.const 4088) + ) + ) + (i32.store + (i32.const 4088) + (local.get $2) + ) + ) + (block $label$198 + (if + (local.tee $8 + (i32.load + (i32.const 3676) + ) + ) + (block + (local.set $2 + (i32.const 4100) + ) + (block $label$200 + (block $label$201 + (loop $label$202 + (br_if $label$201 + (i32.eq + (local.get $1) + (i32.add + (local.tee $4 + (i32.load + (local.get $2) + ) + ) + (local.tee $5 + (i32.load + (local.tee $7 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + ) + ) + ) + (br_if $label$202 + (local.tee $2 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (br $label$200) + ) + (if + (i32.eqz + (i32.and + (i32.load offset=12 + (local.get $2) + ) + (i32.const 8) + ) + ) + (if + (i32.and + (i32.lt_u + (local.get $8) + (local.get $1) + ) + (i32.ge_u + (local.get $8) + (local.get $4) + ) + ) + (block + (i32.store + (local.get $7) + (i32.add + (local.get $5) + (local.get $3) + ) + ) + (local.set $5 + (i32.load + (i32.const 3664) + ) + ) + (local.set $1 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $8) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3676) + (local.tee $2 + (i32.add + (local.get $8) + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $1) + (local.tee $1 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3664) + (local.tee $1 + (i32.add + (i32.sub + (local.get $3) + (local.get $1) + ) + (local.get $5) + ) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3680) + (i32.load + (i32.const 4140) + ) + ) + (br $label$198) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.tee $2 + (i32.load + (i32.const 3668) + ) + ) + ) + (block + (i32.store + (i32.const 3668) + (local.get $1) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (local.set $10 + (i32.add + (local.get $1) + (local.get $3) + ) + ) + (local.set $5 + (i32.const 4100) + ) + (block $label$208 + (block $label$209 + (loop $label$210 + (br_if $label$209 + (i32.eq + (i32.load + (local.get $5) + ) + (local.get $10) + ) + ) + (br_if $label$210 + (local.tee $5 + (i32.load offset=8 + (local.get $5) + ) + ) + ) + (local.set $5 + (i32.const 4100) + ) + ) + (br $label$208) + ) + (if + (i32.and + (i32.load offset=12 + (local.get $5) + ) + (i32.const 8) + ) + (local.set $5 + (i32.const 4100) + ) + (block + (i32.store + (local.get $5) + (local.get $1) + ) + (i32.store + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $3) + ) + ) + (local.set $7 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.get $10) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $6 + (i32.add + (local.tee $13 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $7) + (i32.const 0) + ) + ) + ) + (local.get $0) + ) + ) + (local.set $7 + (i32.sub + (i32.sub + (local.tee $4 + (i32.add + (local.get $10) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + (local.get $13) + ) + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (block $label$217 + (if + (i32.eq + (local.get $4) + (local.get $8) + ) + (block + (i32.store + (i32.const 3664) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3664) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 3676) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + ) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (i32.const 3672) + ) + ) + (block + (i32.store + (i32.const 3660) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3660) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 3672) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $0) + ) + (local.get $0) + ) + (br $label$217) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (local.tee $0 + (if (result i32) + (i32.eq + (i32.and + (local.tee $0 + (i32.load offset=4 + (local.get $4) + ) + ) + (i32.const 3) + ) + (i32.const 1) + ) + (block (result i32) + (local.set $11 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$222 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $5 + (i32.load offset=12 + (local.get $4) + ) + ) + (block $label$224 + (if + (i32.ne + (local.tee $3 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $2) + ) + (call $fimport$10) + ) + (br_if $label$224 + (i32.eq + (i32.load offset=12 + (local.get $3) + ) + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $5) + (local.get $3) + ) + (block + (i32.store + (i32.const 3652) + (i32.and + (i32.load + (i32.const 3652) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + ) + (block $label$228 + (if + (i32.eq + (local.get $5) + (local.get $0) + ) + (local.set $20 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $5) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (local.set $20 + (local.get $0) + ) + (br $label$228) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $3) + (local.get $5) + ) + (i32.store + (local.get $20) + (local.get $3) + ) + ) + (block + (local.set $8 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$234 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (local.get $3) + ) + (block + (local.set $12 + (i32.const 0) + ) + (br $label$234) + ) + ) + ) + (loop $label$239 + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $2) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $12 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $3 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $3) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $5) + ) + (local.set $12 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $8) + ) + ) + (block $label$249 + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $12) + ) + (br_if $label$249 + (local.get $12) + ) + (i32.store + (i32.const 3656) + (i32.and + (i32.load + (i32.const 3656) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + (block + (if + (i32.lt_u + (local.get $8) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $12) + ) + (i32.store offset=20 + (local.get $8) + (local.get $12) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $12) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $12) + (local.tee $1 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $12) + (local.get $8) + ) + (if + (local.tee $3 + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $3) + (local.get $1) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $12) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $12) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.tee $0 + (i32.load offset=4 + (local.get $0) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $12) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $12) + ) + ) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $11) + (local.get $7) + ) + ) + (i32.add + (local.get $4) + (local.get $11) + ) + ) + (local.get $4) + ) + ) + (i32.const 4) + ) + ) + (i32.and + (i32.load + (local.get $0) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $7) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $7) + ) + (local.get $7) + ) + (local.set $0 + (i32.shr_u + (local.get $7) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $7) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (block $label$263 + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 3652) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (if + (i32.ge_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (block + (local.set $21 + (local.get $1) + ) + (local.set $9 + (local.get $0) + ) + (br $label$263) + ) + ) + (call $fimport$10) + ) + (block + (i32.store + (i32.const 3652) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $21 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $9 + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $21) + (local.get $6) + ) + (i32.store offset=12 + (local.get $9) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$217) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (local.tee $2 + (block $label$267 (result i32) + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $7) + (i32.const 8) + ) + ) + (block (result i32) + (drop + (br_if $label$267 + (i32.const 31) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + ) + ) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $2) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (i32.const 3656) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $2) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3656) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $3) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (local.set $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $2) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $2) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$273 + (block $label$274 + (block $label$275 + (loop $label$276 + (br_if $label$274 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.set $3 + (i32.shl + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$275 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $2) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $2 + (local.get $3) + ) + (local.set $0 + (local.get $1) + ) + (br $label$276) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $2) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (br $label$273) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 3668) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $13) + (i32.const 8) + ) + ) + ) + ) + ) + (loop $label$281 + (block $label$282 + (if + (i32.le_u + (local.tee $2 + (i32.load + (local.get $5) + ) + ) + (local.get $8) + ) + (br_if $label$282 + (i32.gt_u + (local.tee $13 + (i32.add + (local.get $2) + (i32.load offset=4 + (local.get $5) + ) + ) + ) + (local.get $8) + ) + ) + ) + (local.set $5 + (i32.load offset=8 + (local.get $5) + ) + ) + (br $label$281) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.tee $7 + (i32.add + (local.get $13) + (i32.const -47) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $10 + (i32.add + (local.tee $7 + (if (result i32) + (i32.lt_u + (local.tee $2 + (i32.add + (local.get $7) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $2) + (i32.const 0) + ) + ) + ) + (local.tee $12 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $8) + (local.get $2) + ) + ) + (i32.const 8) + ) + ) + (local.set $5 + (i32.add + (local.get $7) + (i32.const 24) + ) + ) + (local.set $9 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3676) + (local.tee $4 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $2) + (local.tee $2 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3664) + (local.tee $2 + (i32.sub + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $4) + (local.get $2) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3680) + (i32.load + (i32.const 4140) + ) + ) + (i32.store + (local.tee $2 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (i32.const 27) + ) + (i64.store align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4100) + ) + ) + (i64.store offset=8 align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4108) + ) + ) + (i32.store + (i32.const 4100) + (local.get $1) + ) + (i32.store + (i32.const 4104) + (local.get $3) + ) + (i32.store + (i32.const 4112) + (i32.const 0) + ) + (i32.store + (i32.const 4108) + (local.get $10) + ) + (local.set $1 + (local.get $5) + ) + (loop $label$290 + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.const 7) + ) + (br_if $label$290 + (i32.lt_u + (i32.add + (local.get $1) + (i32.const 4) + ) + (local.get $13) + ) + ) + ) + (if + (i32.ne + (local.get $7) + (local.get $8) + ) + (block + (i32.store + (local.get $2) + (i32.and + (i32.load + (local.get $2) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $8) + (i32.or + (local.tee $4 + (i32.sub + (local.get $7) + (local.get $8) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (local.get $7) + (local.get $4) + ) + (local.set $1 + (i32.shr_u + (local.get $4) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.const 256) + ) + (block + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (if + (i32.and + (local.tee $3 + (i32.load + (i32.const 3652) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (local.set $15 + (local.get $3) + ) + (local.set $11 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $11 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $8) + ) + (i32.store offset=12 + (local.get $11) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $11) + ) + (i32.store offset=12 + (local.get $8) + (local.get $2) + ) + (br $label$198) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $1 + (i32.shr_u + (local.get $4) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $4) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $4) + (i32.add + (local.tee $1 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $1) + (local.tee $2 + (i32.and + (i32.shr_u + (i32.add + (local.get $1) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $2) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $3) + (local.get $1) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $3) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + (i32.store offset=28 + (local.get $8) + (local.get $5) + ) + (i32.store offset=20 + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $12) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $3 + (i32.load + (i32.const 3656) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3656) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $2) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (local.set $1 + (i32.load + (local.get $2) + ) + ) + (local.set $3 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $4) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $3) + ) + ) + ) + (block $label$304 + (block $label$305 + (block $label$306 + (loop $label$307 + (br_if $label$305 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $4) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$306 + (i32.eqz + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $1 + (local.get $3) + ) + (br $label$307) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $1) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (br $label$304) + ) + (if + (i32.and + (i32.ge_u + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + ) + (local.tee $3 + (i32.load + (i32.const 3668) + ) + ) + ) + (i32.ge_u + (local.get $1) + (local.get $3) + ) + ) + (block + (i32.store offset=12 + (local.get $5) + (local.get $8) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $5) + ) + (i32.store offset=12 + (local.get $8) + (local.get $1) + ) + (i32.store offset=24 + (local.get $8) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (block + (if + (i32.or + (i32.eqz + (local.tee $2 + (i32.load + (i32.const 3668) + ) + ) + ) + (i32.lt_u + (local.get $1) + (local.get $2) + ) + ) + (i32.store + (i32.const 3668) + (local.get $1) + ) + ) + (i32.store + (i32.const 4100) + (local.get $1) + ) + (i32.store + (i32.const 4104) + (local.get $3) + ) + (i32.store + (i32.const 4112) + (i32.const 0) + ) + (i32.store + (i32.const 3688) + (i32.load + (i32.const 4124) + ) + ) + (i32.store + (i32.const 3684) + (i32.const -1) + ) + (local.set $2 + (i32.const 0) + ) + (loop $label$314 + (i32.store offset=12 + (local.tee $5 + (i32.add + (i32.shl + (i32.shl + (local.get $2) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (local.get $5) + ) + (i32.store offset=8 + (local.get $5) + (local.get $5) + ) + (br_if $label$314 + (i32.ne + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 32) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3676) + (local.tee $3 + (i32.add + (local.get $1) + (local.tee $1 + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3664) + (local.tee $1 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $3) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3680) + (i32.load + (i32.const 4140) + ) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $1 + (i32.load + (i32.const 3664) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 3664) + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3676) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 3676) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + ) + (i32.store + (call $12) + (i32.const 12) + ) + (global.set $global$1 + (local.get $14) + ) + (i32.const 0) + ) + ) + (func $36 (; 49 ;) (type $2) (param $0 i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (block $label$1 + (if + (i32.eqz + (local.get $0) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.add + (local.get $0) + (i32.const -8) + ) + ) + (local.tee $11 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (local.tee $8 + (i32.and + (local.tee $0 + (i32.load + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + ) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (call $fimport$10) + ) + (local.set $6 + (i32.add + (local.get $1) + (local.tee $4 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + ) + ) + (block $label$5 + (if + (i32.and + (local.get $0) + (i32.const 1) + ) + (block + (local.set $3 + (local.get $1) + ) + (local.set $2 + (local.get $4) + ) + ) + (block + (if + (i32.eqz + (local.get $8) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.add + (local.get $1) + (i32.sub + (i32.const 0) + (local.tee $8 + (i32.load + (local.get $1) + ) + ) + ) + ) + ) + (local.get $11) + ) + (call $fimport$10) + ) + (local.set $1 + (i32.add + (local.get $8) + (local.get $4) + ) + ) + (if + (i32.eq + (local.get $0) + (i32.load + (i32.const 3672) + ) + ) + (block + (if + (i32.ne + (i32.and + (local.tee $3 + (i32.load + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 3) + ) + (i32.const 3) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (i32.store + (i32.const 3660) + (local.get $1) + ) + (i32.store + (local.get $2) + (i32.and + (local.get $3) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $0) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $0) + (local.get $1) + ) + (local.get $1) + ) + (return) + ) + ) + (local.set $10 + (i32.shr_u + (local.get $8) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $8) + (i32.const 256) + ) + (block + (local.set $3 + (i32.load offset=12 + (local.get $0) + ) + ) + (if + (i32.ne + (local.tee $4 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.get $10) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $4) + ) + (local.get $0) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $4) + ) + (block + (i32.store + (i32.const 3652) + (i32.and + (i32.load + (i32.const 3652) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $10) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $2) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $2 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (local.set $5 + (local.get $2) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $4) + (local.get $3) + ) + (i32.store + (local.get $5) + (local.get $4) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (local.set $12 + (i32.load offset=24 + (local.get $0) + ) + ) + (block $label$22 + (if + (i32.eq + (local.tee $4 + (i32.load offset=12 + (local.get $0) + ) + ) + (local.get $0) + ) + (block + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $5 + (local.get $8) + ) + (if + (i32.eqz + (local.tee $4 + (i32.load + (local.get $5) + ) + ) + ) + (block + (local.set $7 + (i32.const 0) + ) + (br $label$22) + ) + ) + ) + (loop $label$27 + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + ) + (if + (i32.lt_u + (local.get $5) + (local.get $11) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (i32.const 0) + ) + (local.set $7 + (local.get $4) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $8 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $0) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $10 + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (block + (i32.store + (local.get $8) + (local.get $4) + ) + (i32.store + (local.get $10) + (local.get $5) + ) + (local.set $7 + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (if + (local.get $12) + (block + (if + (i32.eq + (local.get $0) + (i32.load + (local.tee $5 + (i32.add + (i32.shl + (local.tee $4 + (i32.load offset=28 + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + ) + (block + (i32.store + (local.get $5) + (local.get $7) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (i32.store + (i32.const 3656) + (i32.and + (i32.load + (i32.const 3656) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $12) + (i32.const 16) + ) + ) + ) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $7) + ) + (i32.store offset=20 + (local.get $12) + (local.get $7) + ) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $7) + (local.tee $5 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $7) + (local.get $12) + ) + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.get $5) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + ) + ) + ) + (if + (local.tee $4 + (i32.load offset=4 + (local.get $8) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.get $3) + (local.get $6) + ) + (call $fimport$10) + ) + (if + (i32.eqz + (i32.and + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 1) + ) + ) + (call $fimport$10) + ) + (if + (i32.and + (local.get $0) + (i32.const 2) + ) + (block + (i32.store + (local.get $1) + (i32.and + (local.get $0) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $2) + ) + (local.get $2) + ) + ) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 3676) + ) + ) + (block + (i32.store + (i32.const 3664) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3664) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3676) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (if + (i32.ne + (local.get $3) + (i32.load + (i32.const 3672) + ) + ) + (return) + ) + (i32.store + (i32.const 3672) + (i32.const 0) + ) + (i32.store + (i32.const 3660) + (i32.const 0) + ) + (return) + ) + ) + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 3672) + ) + ) + (block + (i32.store + (i32.const 3660) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3660) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3672) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $0) + ) + (local.get $0) + ) + (return) + ) + ) + (local.set $5 + (i32.add + (i32.and + (local.get $0) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$61 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $2 + (i32.load offset=12 + (local.get $6) + ) + ) + (if + (i32.ne + (local.tee $1 + (i32.load offset=8 + (local.get $6) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $4) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $1) + ) + (local.get $6) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $1) + ) + (block + (i32.store + (i32.const 3652) + (i32.and + (i32.load + (i32.const 3652) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $0) + ) + (local.set $14 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (local.set $14 + (local.get $0) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $1) + (local.get $2) + ) + (i32.store + (local.get $14) + (local.get $1) + ) + ) + (block + (local.set $7 + (i32.load offset=24 + (local.get $6) + ) + ) + (block $label$73 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $6) + ) + ) + (local.get $6) + ) + (block + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.get $2) + ) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$73) + ) + ) + ) + (loop $label$78 + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $2) + (i32.const 0) + ) + (local.set $9 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $2 + (i32.load offset=8 + (local.get $6) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 12) + ) + ) + ) + (local.get $6) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (block + (i32.store + (local.get $1) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $2) + ) + (local.set $9 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (if + (local.get $7) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (local.tee $2 + (i32.add + (i32.shl + (local.tee $0 + (i32.load offset=28 + (local.get $6) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + ) + ) + (block + (i32.store + (local.get $2) + (local.get $9) + ) + (if + (i32.eqz + (local.get $9) + ) + (block + (i32.store + (i32.const 3656) + (i32.and + (i32.load + (i32.const 3656) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $0) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $7) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $7) + (i32.const 16) + ) + ) + ) + (local.get $6) + ) + (i32.store + (local.get $0) + (local.get $9) + ) + (i32.store offset=20 + (local.get $7) + (local.get $9) + ) + ) + (br_if $label$61 + (i32.eqz + (local.get $9) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (local.tee $2 + (i32.load + (i32.const 3668) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $9) + (local.get $7) + ) + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (local.get $2) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=4 + (local.get $1) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $5) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $5) + ) + (local.get $5) + ) + (if + (i32.eq + (local.get $3) + (i32.load + (i32.const 3672) + ) + ) + (block + (i32.store + (i32.const 3660) + (local.get $5) + ) + (return) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $2) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.const 256) + ) + (block + (local.set $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3692) + ) + ) + (if + (i32.and + (local.tee $2 + (i32.load + (i32.const 3652) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (local.set $15 + (local.get $2) + ) + (local.set $13 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (i32.or + (local.get $2) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $3) + ) + (i32.store offset=12 + (local.get $13) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $13) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (return) + ) + ) + (local.set $0 + (i32.add + (i32.shl + (local.tee $1 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $2) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $2) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $2) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $4 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $0) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $0 + (i32.shl + (local.get $1) + (local.get $4) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $0) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3956) + ) + ) + (i32.store offset=28 + (local.get $3) + (local.get $1) + ) + (i32.store offset=20 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=16 + (local.get $3) + (i32.const 0) + ) + (block $label$113 + (if + (i32.and + (local.tee $4 + (i32.load + (i32.const 3656) + ) + ) + (local.tee $5 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (block + (local.set $0 + (i32.load + (local.get $0) + ) + ) + (local.set $4 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $1) + (i32.const 1) + ) + ) + ) + (local.set $1 + (i32.shl + (local.get $2) + (if (result i32) + (i32.eq + (local.get $1) + (i32.const 31) + ) + (i32.const 0) + (local.get $4) + ) + ) + ) + (block $label$117 + (block $label$118 + (block $label$119 + (loop $label$120 + (br_if $label$118 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$119 + (i32.eqz + (local.tee $5 + (i32.load + (local.tee $1 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $1) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $4) + ) + (local.set $0 + (local.get $5) + ) + (br $label$120) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 3668) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + (br $label$113) + ) + ) + (br $label$117) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $4 + (i32.load + (i32.const 3668) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $4) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $3) + ) + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $2) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (i32.store offset=24 + (local.get $3) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + (block + (i32.store + (i32.const 3656) + (i32.or + (local.get $4) + (local.get $5) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + ) + ) + ) + (i32.store + (i32.const 3684) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3684) + ) + (i32.const -1) + ) + ) + ) + (if + (local.get $0) + (return) + (local.set $0 + (i32.const 4108) + ) + ) + (loop $label$128 + (local.set $0 + (i32.add + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + ) + (br_if $label$128 + (local.get $2) + ) + ) + (i32.store + (i32.const 3684) + (i32.const -1) + ) + ) + ) + (func $37 (; 50 ;) (type $6) + (nop) + ) + (func $38 (; 51 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.add + (local.tee $2 + (i32.load + (global.get $global$0) + ) + ) + (local.tee $0 + (i32.and + (i32.add + (local.get $0) + (i32.const 15) + ) + (i32.const -16) + ) + ) + ) + ) + (if + (i32.or + (i32.and + (i32.gt_s + (local.get $0) + (i32.const 0) + ) + (i32.lt_s + (local.get $1) + (local.get $2) + ) + ) + (i32.lt_s + (local.get $1) + (i32.const 0) + ) + ) + (block + (drop + (call $fimport$6) + ) + (call $fimport$11 + (i32.const 12) + ) + (return + (i32.const -1) + ) + ) + ) + (i32.store + (global.get $global$0) + (local.get $1) + ) + (if + (i32.gt_s + (local.get $1) + (call $fimport$5) + ) + (if + (i32.eqz + (call $fimport$4) + ) + (block + (call $fimport$11 + (i32.const 12) + ) + (i32.store + (global.get $global$0) + (local.get $2) + ) + (return + (i32.const -1) + ) + ) + ) + ) + (local.get $2) + ) + ) + (func $39 (; 52 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (if + (i32.ge_s + (local.get $2) + (i32.const 20) + ) + (block + (local.set $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (if + (local.tee $3 + (i32.and + (local.get $0) + (i32.const 3) + ) + ) + (block + (local.set $3 + (i32.sub + (i32.add + (local.get $0) + (i32.const 4) + ) + (local.get $3) + ) + ) + (loop $label$4 + (if + (i32.lt_s + (local.get $0) + (local.get $3) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + ) + ) + (local.set $3 + (i32.or + (i32.or + (i32.or + (local.get $1) + (i32.shl + (local.get $1) + (i32.const 8) + ) + ) + (i32.shl + (local.get $1) + (i32.const 16) + ) + ) + (i32.shl + (local.get $1) + (i32.const 24) + ) + ) + ) + (local.set $5 + (i32.and + (local.get $4) + (i32.const -4) + ) + ) + (loop $label$6 + (if + (i32.lt_s + (local.get $0) + (local.get $5) + ) + (block + (i32.store + (local.get $0) + (local.get $3) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (br $label$6) + ) + ) + ) + ) + ) + (loop $label$8 + (if + (i32.lt_s + (local.get $0) + (local.get $4) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$8) + ) + ) + ) + (i32.sub + (local.get $0) + (local.get $2) + ) + ) + ) + (func $40 (; 53 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (block $label$1 (result i32) + (if + (i32.ge_s + (local.get $2) + (i32.const 4096) + ) + (return + (call $fimport$12 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (if + (i32.eq + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.and + (local.get $1) + (i32.const 3) + ) + ) + (block + (loop $label$4 + (if + (i32.and + (local.get $0) + (i32.const 3) + ) + (block + (if + (i32.eqz + (local.get $2) + ) + (return + (local.get $3) + ) + ) + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + (loop $label$7 + (if + (i32.ge_s + (local.get $2) + (i32.const 4) + ) + (block + (i32.store + (local.get $0) + (i32.load + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 4) + ) + ) + (br $label$7) + ) + ) + ) + ) + ) + (loop $label$9 + (if + (i32.gt_s + (local.get $2) + (i32.const 0) + ) + (block + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$9) + ) + ) + ) + (local.get $3) + ) + ) + (func $41 (; 54 ;) (type $3) (result i32) + (i32.const 0) + ) + (func $42 (; 55 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (call_indirect (type $1) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 0) + ) + ) + ) + (func $43 (; 56 ;) (type $12) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (call_indirect (type $0) + (local.get $1) + (local.get $2) + (local.get $3) + (i32.add + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (func $44 (; 57 ;) (type $5) (param $0 i32) (param $1 i32) + (call_indirect (type $2) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 6) + ) + ) + ) + (func $45 (; 58 ;) (type $1) (param $0 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $46 (; 59 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 1) + ) + (i32.const 0) + ) + ) + (func $47 (; 60 ;) (type $2) (param $0 i32) + (call $fimport$3 + (i32.const 2) + ) + ) +) + diff --git a/cranelift/wasmtests/embenchen_fasta.wat b/cranelift/wasmtests/embenchen_fasta.wat new file mode 100644 index 0000000000..54baa68e15 --- /dev/null +++ b/cranelift/wasmtests/embenchen_fasta.wat @@ -0,0 +1,16547 @@ +(module + (type $0 (func (param i32 i32 i32) (result i32))) + (type $1 (func)) + (type $2 (func (param i32) (result i32))) + (type $3 (func (param i32))) + (type $4 (func (result i32))) + (type $5 (func (param i32 i32))) + (type $6 (func (param i32 i32) (result i32))) + (type $7 (func (param i32 i32 i32 i32 i32) (result i32))) + (type $8 (func (param i32 i32 i32))) + (type $9 (func (param i64 i32) (result i32))) + (type $10 (func (param i32 i32 i32 i32 i32))) + (type $11 (func (param f64 i32) (result f64))) + (type $12 (func (param i32 i32 i32 i32) (result i32))) + (import "env" "memory" (memory $16 2048 2048)) + (data (i32.const 1024) "&\02\00\00a\00\00\00q=\8a>\00\00\00\00c\00\00\00\8f\c2\f5=\00\00\00\00g\00\00\00\8f\c2\f5=\00\00\00\00t\00\00\00q=\8a>\00\00\00\00B\00\00\00\n\d7\a3<\00\00\00\00D\00\00\00\n\d7\a3<\00\00\00\00H\00\00\00\n\d7\a3<\00\00\00\00K\00\00\00\n\d7\a3<\00\00\00\00M\00\00\00\n\d7\a3<\00\00\00\00N\00\00\00\n\d7\a3<\00\00\00\00R\00\00\00\n\d7\a3<\00\00\00\00S\00\00\00\n\d7\a3<\00\00\00\00V\00\00\00\n\d7\a3<\00\00\00\00W\00\00\00\n\d7\a3<\00\00\00\00Y\00\00\00\n\d7\a3<") + (data (i32.const 1220) "a\00\00\00\e9\1c\9b>\00\00\00\00c\00\00\00r\bdJ>\00\00\00\00g\00\00\00\d7IJ>\00\00\00\00t\00\00\00r_\9a>") + (data (i32.const 1280) "\04\05\00\00\05") + (data (i32.const 1296) "\01") + (data (i32.const 1320) "\01\00\00\00\02\00\00\00L\12\00\00\00\04") + (data (i32.const 1344) "\01") + (data (i32.const 1359) "\n\ff\ff\ff\ff") + (data (i32.const 1396) "*\00\00\00error: %d\n\00GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA\00\11\00\n\00\11\11\11\00\00\00\00\05\00\00\00\00\00\00\t\00\00\00\00\0b") + (data (i32.const 1731) "\11\00\0f\n\11\11\11\03\n\07\00\01\13\t\0b\0b\00\00\t\06\0b\00\00\0b\00\06\11\00\00\00\11\11\11") + (data (i32.const 1780) "\0b") + (data (i32.const 1789) "\11\00\n\n\11\11\11\00\n\00\00\02\00\t\0b\00\00\00\t\00\0b\00\00\0b") + (data (i32.const 1838) "\0c") + (data (i32.const 1850) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c") + (data (i32.const 1896) "\0e") + (data (i32.const 1908) "\0d\00\00\00\04\0d\00\00\00\00\t\0e\00\00\00\00\00\0e\00\00\0e") + (data (i32.const 1954) "\10") + (data (i32.const 1966) "\0f\00\00\00\00\0f\00\00\00\00\t\10\00\00\00\00\00\10\00\00\10\00\00\12\00\00\00\12\12\12") + (data (i32.const 2021) "\12\00\00\00\12\12\12\00\00\00\00\00\00\t") + (data (i32.const 2070) "\0b") + (data (i32.const 2082) "\n\00\00\00\00\n\00\00\00\00\t\0b\00\00\00\00\00\0b\00\00\0b") + (data (i32.const 2128) "\0c") + (data (i32.const 2140) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c\00\000123456789ABCDEF-+ 0X0x\00(null)\00-0X+0X 0X-0x+0x 0x\00inf\00INF\00nan\00NAN\00.\00T!\"\19\0d\01\02\03\11K\1c\0c\10\04\0b\1d\12\1e\'hnopqb \05\06\0f\13\14\15\1a\08\16\07($\17\18\t\n\0e\1b\1f%#\83\82}&*+<=>?CGJMXYZ[\\]^_`acdefgijklrstyz{|\00Illegal byte sequence\00Domain error\00Result not representable\00Not a tty\00Permission denied\00Operation not permitted\00No such file or directory\00No such process\00File exists\00Value too large for data type\00No space left on device\00Out of memory\00Resource busy\00Interrupted system call\00Resource temporarily unavailable\00Invalid seek\00Cross-device link\00Read-only file system\00Directory not empty\00Connection reset by peer\00Operation timed out\00Connection refused\00Host is down\00Host is unreachable\00Address in use\00Broken pipe\00I/O error\00No such device or address\00Block device required\00No such device\00Not a directory\00Is a directory\00Text file busy\00Exec format error\00Invalid argument\00Argument list too long\00Symbolic link loop\00Filename too long\00Too many open files in system\00No file descriptors available\00Bad file descriptor\00No child process\00Bad address\00File too large\00Too many links\00No locks available\00Resource deadlock would occur\00State not recoverable\00Previous owner died\00Operation canceled\00Function not implemented\00No message of desired type\00Identifier removed\00Device not a stream\00No data available\00Device timeout\00Out of streams resources\00Link has been severed\00Protocol error\00Bad message\00File descriptor in bad state\00Not a socket\00Destination address required\00Message too large\00Protocol wrong type for socket\00Protocol not available\00Protocol not supported\00Socket type not supported\00Not supported\00Protocol family not supported\00Address family not supported by protocol\00Address not available\00Network is down\00Network unreachable\00Connection reset by network\00Connection aborted\00No buffer space available\00Socket is connected\00Socket not connected\00Cannot send after socket shutdown\00Operation already in progress\00Operation in progress\00Stale file handle\00Remote I/O error\00Quota exceeded\00No medium found\00Wrong medium type\00No error information") + (import "env" "table" (table $timport$17 9 9 funcref)) + (elem (global.get $gimport$19) $53 $9 $54 $14 $10 $15 $55 $16 $56) + (import "env" "DYNAMICTOP_PTR" (global $gimport$0 i32)) + (import "env" "STACKTOP" (global $gimport$1 i32)) + (import "env" "STACK_MAX" (global $gimport$2 i32)) + (import "env" "memoryBase" (global $gimport$18 i32)) + (import "env" "tableBase" (global $gimport$19 i32)) + (import "env" "abort" (func $fimport$3 (param i32))) + (import "env" "enlargeMemory" (func $fimport$4 (result i32))) + (import "env" "getTotalMemory" (func $fimport$5 (result i32))) + (import "env" "abortOnCannotGrowMemory" (func $fimport$6 (result i32))) + (import "env" "_pthread_cleanup_pop" (func $fimport$7 (param i32))) + (import "env" "_abort" (func $fimport$8)) + (import "env" "_pthread_cleanup_push" (func $fimport$9 (param i32 i32))) + (import "env" "___syscall6" (func $fimport$10 (param i32 i32) (result i32))) + (import "env" "___setErrNo" (func $fimport$11 (param i32))) + (import "env" "_emscripten_memcpy_big" (func $fimport$12 (param i32 i32 i32) (result i32))) + (import "env" "___syscall54" (func $fimport$13 (param i32 i32) (result i32))) + (import "env" "___syscall140" (func $fimport$14 (param i32 i32) (result i32))) + (import "env" "___syscall146" (func $fimport$15 (param i32 i32) (result i32))) + (global $global$0 (mut i32) (global.get $gimport$0)) + (global $global$1 (mut i32) (global.get $gimport$1)) + (global $global$2 (mut i32) (global.get $gimport$2)) + (global $global$3 (mut i32) (i32.const 0)) + (global $global$4 (mut i32) (i32.const 0)) + (global $global$5 (mut i32) (i32.const 0)) + (export "_sbrk" (func $45)) + (export "_free" (func $38)) + (export "_main" (func $7)) + (export "_pthread_self" (func $48)) + (export "_memset" (func $46)) + (export "_malloc" (func $37)) + (export "_memcpy" (func $47)) + (export "___errno_location" (func $12)) + (export "runPostSets" (func $44)) + (export "stackAlloc" (func $0)) + (export "stackSave" (func $1)) + (export "stackRestore" (func $2)) + (export "establishStackSpace" (func $3)) + (export "setThrew" (func $4)) + (export "setTempRet0" (func $5)) + (export "getTempRet0" (func $6)) + (export "dynCall_ii" (func $49)) + (export "dynCall_iiii" (func $50)) + (export "dynCall_vi" (func $51)) + (export "dynCall_v" (func $52)) + (func $0 (; 13 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (local.get $0) + ) + ) + (global.set $global$1 + (i32.and + (i32.add + (global.get $global$1) + (i32.const 15) + ) + (i32.const -16) + ) + ) + (local.get $1) + ) + ) + (func $1 (; 14 ;) (type $4) (result i32) + (global.get $global$1) + ) + (func $2 (; 15 ;) (type $3) (param $0 i32) + (global.set $global$1 + (local.get $0) + ) + ) + (func $3 (; 16 ;) (type $5) (param $0 i32) (param $1 i32) + (block $label$1 + (global.set $global$1 + (local.get $0) + ) + (global.set $global$2 + (local.get $1) + ) + ) + ) + (func $4 (; 17 ;) (type $5) (param $0 i32) (param $1 i32) + (if + (i32.eqz + (global.get $global$3) + ) + (block + (global.set $global$3 + (local.get $0) + ) + (global.set $global$4 + (local.get $1) + ) + ) + ) + ) + (func $5 (; 18 ;) (type $3) (param $0 i32) + (global.set $global$5 + (local.get $0) + ) + ) + (func $6 (; 19 ;) (type $4) (result i32) + (global.get $global$5) + ) + (func $7 (; 20 ;) (type $6) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 f32) + (local $12 f32) + (local $13 f64) + (block $label$1 (result i32) + (local.set $5 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 4256) + ) + ) + (local.set $3 + (local.get $5) + ) + (local.set $6 + (i32.add + (local.get $5) + (i32.const 2128) + ) + ) + (local.set $7 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.le_s + (local.get $0) + (i32.const 1) + ) + ) + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (br_table $label$5 $label$10 $label$8 $label$9 $label$7 $label$6 $label$4 + (i32.sub + (local.tee $0 + (i32.load8_s + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (i32.const 48) + ) + ) + ) + (local.set $4 + (i32.const 950000) + ) + (br $label$2) + ) + (br $label$3) + ) + (local.set $4 + (i32.const 9500000) + ) + (br $label$2) + ) + (local.set $4 + (i32.const 95000000) + ) + (br $label$2) + ) + (local.set $4 + (i32.const 190000000) + ) + (br $label$2) + ) + (global.set $global$1 + (local.get $5) + ) + (return + (i32.const 0) + ) + ) + (i32.store + (local.get $3) + (i32.add + (local.get $0) + (i32.const -48) + ) + ) + (drop + (call $34 + (i32.const 1400) + (local.get $3) + ) + ) + (global.set $global$1 + (local.get $5) + ) + (return + (i32.const -1) + ) + ) + (local.set $4 + (i32.const 19000000) + ) + ) + (drop + (call $47 + (local.tee $8 + (call $40 + (i32.const 347) + ) + ) + (i32.const 1411) + (i32.const 287) + ) + ) + (i64.store align=1 + (local.tee $0 + (i32.add + (local.get $8) + (i32.const 287) + ) + ) + (i64.load align=1 + (i32.const 1411) + ) + ) + (i64.store offset=8 align=1 + (local.get $0) + (i64.load align=1 + (i32.const 1419) + ) + ) + (i64.store offset=16 align=1 + (local.get $0) + (i64.load align=1 + (i32.const 1427) + ) + ) + (i64.store offset=24 align=1 + (local.get $0) + (i64.load align=1 + (i32.const 1435) + ) + ) + (i64.store offset=32 align=1 + (local.get $0) + (i64.load align=1 + (i32.const 1443) + ) + ) + (i64.store offset=40 align=1 + (local.get $0) + (i64.load align=1 + (i32.const 1451) + ) + ) + (i64.store offset=48 align=1 + (local.get $0) + (i64.load align=1 + (i32.const 1459) + ) + ) + (i32.store offset=56 align=1 + (local.get $0) + (i32.load align=1 + (i32.const 1467) + ) + ) + (local.set $0 + (i32.shl + (local.get $4) + (i32.const 1) + ) + ) + (local.set $1 + (i32.const 0) + ) + (loop $label$11 + (drop + (call $47 + (local.tee $2 + (call $40 + (i32.add + (local.tee $3 + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 60) + ) + (local.get $0) + (i32.const 60) + ) + ) + (i32.const 2) + ) + ) + ) + (i32.add + (local.get $8) + (local.get $1) + ) + (local.get $3) + ) + ) + (i32.store8 + (i32.add + (local.get $2) + (local.get $3) + ) + (i32.const 0) + ) + (if + (i32.gt_s + (local.tee $10 + (call $31 + (local.get $2) + ) + ) + (local.tee $9 + (i32.load + (i32.const 1024) + ) + ) + ) + (if + (i32.gt_s + (local.get $9) + (i32.const 0) + ) + (block + (i32.store8 + (i32.add + (local.get $2) + (local.get $9) + ) + (i32.const 0) + ) + (drop + (call $35 + (local.get $2) + ) + ) + (i32.store + (i32.const 1024) + (i32.const 0) + ) + ) + ) + (block + (drop + (call $35 + (local.get $2) + ) + ) + (i32.store + (i32.const 1024) + (i32.sub + (i32.load + (i32.const 1024) + ) + (local.get $10) + ) + ) + ) + ) + (call $41 + (local.get $2) + ) + (local.set $1 + (i32.add + (local.tee $2 + (i32.add + (local.get $3) + (local.get $1) + ) + ) + (i32.const -287) + ) + ) + (if + (i32.le_u + (local.get $2) + (i32.const 287) + ) + (local.set $1 + (local.get $2) + ) + ) + (br_if $label$11 + (local.tee $0 + (i32.sub + (local.get $0) + (local.get $3) + ) + ) + ) + ) + (call $42 + (local.get $8) + ) + (if + (i32.load + (i32.const 1028) + ) + (block + (local.set $0 + (i32.const 1028) + ) + (local.set $11 + (f32.const 0) + ) + (loop $label$19 + (local.set $12 + (f32.demote_f64 + (if (result f64) + (f64.lt + (local.tee $13 + (f64.promote_f32 + (local.tee $11 + (f32.add + (local.get $11) + (f32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + ) + ) + ) + ) + ) + (f64.const 1) + ) + (local.get $13) + (f64.const 1) + ) + ) + ) + (f32.store + (local.get $1) + (local.get $12) + ) + (i32.store offset=8 + (local.get $0) + (i32.trunc_f32_s + (f32.mul + (local.get $12) + (f32.const 512) + ) + ) + ) + (br_if $label$19 + (i32.load + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 12) + ) + ) + ) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $0 + (i32.const 1028) + ) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (local.set $0 + (i32.const 1028) + ) + ) + ) + (loop $label$23 + (loop $label$24 + (local.set $3 + (i32.add + (local.get $0) + (i32.const 12) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $1) + (local.tee $2 + (i32.load offset=8 + (local.get $0) + ) + ) + ) + (i32.ne + (local.get $2) + (i32.const 0) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (br $label$24) + ) + ) + ) + (i32.store + (i32.add + (local.get $6) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + (local.get $0) + ) + (br_if $label$23 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 513) + ) + ) + ) + (i32.store + (i32.add + (local.get $6) + (i32.const 2116) + ) + (i32.const 0) + ) + (local.set $0 + (i32.mul + (local.get $4) + (i32.const 3) + ) + ) + (loop $label$26 + (call $8 + (local.get $6) + (local.tee $1 + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 60) + ) + (local.get $0) + (i32.const 60) + ) + ) + ) + (br_if $label$26 + (local.tee $0 + (i32.sub + (local.get $0) + (local.get $1) + ) + ) + ) + ) + (if + (i32.load + (i32.const 1220) + ) + (block + (local.set $0 + (i32.const 1220) + ) + (local.set $11 + (f32.const 0) + ) + (loop $label$30 + (local.set $12 + (f32.demote_f64 + (if (result f64) + (f64.lt + (local.tee $13 + (f64.promote_f32 + (local.tee $11 + (f32.add + (local.get $11) + (f32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + ) + ) + ) + ) + ) + (f64.const 1) + ) + (local.get $13) + (f64.const 1) + ) + ) + ) + (f32.store + (local.get $1) + (local.get $12) + ) + (i32.store offset=8 + (local.get $0) + (i32.trunc_f32_s + (f32.mul + (local.get $12) + (f32.const 512) + ) + ) + ) + (br_if $label$30 + (i32.load + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 12) + ) + ) + ) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $0 + (i32.const 1220) + ) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (local.set $0 + (i32.const 1220) + ) + ) + ) + (loop $label$34 + (loop $label$35 + (local.set $3 + (i32.add + (local.get $0) + (i32.const 12) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $1) + (local.tee $2 + (i32.load offset=8 + (local.get $0) + ) + ) + ) + (i32.ne + (local.get $2) + (i32.const 0) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (br $label$35) + ) + ) + ) + (i32.store + (i32.add + (local.get $7) + (i32.shl + (local.get $1) + (i32.const 2) + ) + ) + (local.get $0) + ) + (br_if $label$34 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 513) + ) + ) + ) + (i32.store + (i32.add + (local.get $7) + (i32.const 2116) + ) + (i32.const 0) + ) + (local.set $0 + (i32.mul + (local.get $4) + (i32.const 5) + ) + ) + (loop $label$37 + (call $8 + (local.get $7) + (local.tee $1 + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 60) + ) + (local.get $0) + (i32.const 60) + ) + ) + ) + (br_if $label$37 + (local.tee $0 + (i32.sub + (local.get $0) + (local.get $1) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + (global.set $global$1 + (local.get $5) + ) + (local.get $0) + ) + ) + (func $8 (; 21 ;) (type $5) (param $0 i32) (param $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 f32) + (block $label$1 + (if + (local.get $1) + (block + (local.set $3 + (i32.const 0) + ) + (local.set $2 + (i32.load + (i32.const 1396) + ) + ) + (loop $label$3 + (local.set $2 + (i32.load + (i32.add + (local.get $0) + (i32.shl + (i32.trunc_f32_s + (f32.mul + (local.tee $6 + (f32.div + (f32.convert_i32_u + (local.tee $4 + (i32.rem_u + (i32.add + (i32.mul + (local.get $2) + (i32.const 3877) + ) + (i32.const 29573) + ) + (i32.const 139968) + ) + ) + ) + (f32.const 139968) + ) + ) + (f32.const 512) + ) + ) + (i32.const 2) + ) + ) + ) + ) + (loop $label$4 + (local.set $5 + (i32.add + (local.get $2) + (i32.const 12) + ) + ) + (if + (f32.lt + (f32.load offset=4 + (local.get $2) + ) + (local.get $6) + ) + (block + (local.set $2 + (local.get $5) + ) + (br $label$4) + ) + ) + ) + (i32.store8 + (i32.add + (i32.add + (local.get $0) + (i32.const 2052) + ) + (local.get $3) + ) + (i32.load + (local.get $2) + ) + ) + (if + (i32.ne + (local.tee $3 + (i32.add + (local.get $3) + (i32.const 1) + ) + ) + (local.get $1) + ) + (block + (local.set $2 + (local.get $4) + ) + (br $label$3) + ) + ) + ) + (i32.store + (i32.const 1396) + (local.get $4) + ) + ) + ) + (i32.store8 + (i32.add + (i32.add + (local.get $0) + (i32.const 2052) + ) + (local.get $1) + ) + (i32.const 10) + ) + (i32.store8 + (i32.add + (i32.add + (local.get $0) + (i32.const 2052) + ) + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + (i32.store + (i32.add + (local.get $0) + (i32.const 2116) + ) + (local.get $1) + ) + (if + (i32.le_s + (local.tee $3 + (call $31 + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 2052) + ) + ) + ) + ) + (local.tee $2 + (i32.load + (i32.const 1024) + ) + ) + ) + (block + (drop + (call $35 + (local.get $1) + ) + ) + (i32.store + (i32.const 1024) + (i32.sub + (i32.load + (i32.const 1024) + ) + (local.get $3) + ) + ) + (return) + ) + ) + (if + (i32.le_s + (local.get $2) + (i32.const 0) + ) + (return) + ) + (i32.store8 + (i32.add + (i32.add + (local.get $0) + (i32.const 2052) + ) + (local.get $2) + ) + (i32.const 0) + ) + (drop + (call $35 + (local.get $1) + ) + ) + (i32.store8 + (i32.add + (i32.add + (local.get $0) + (i32.const 2052) + ) + (i32.load + (i32.const 1024) + ) + ) + (i32.const 122) + ) + (i32.store + (i32.const 1024) + (i32.const 0) + ) + ) + ) + (func $9 (; 22 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $2 + (local.get $1) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (local.set $0 + (call $11 + (call $fimport$10 + (i32.const 6) + (local.get $2) + ) + ) + ) + (global.set $global$1 + (local.get $1) + ) + (local.get $0) + ) + ) + (func $10 (; 23 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 32) + ) + ) + (i32.store + (local.tee $3 + (local.get $4) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + (i32.store offset=16 + (local.get $3) + (local.get $2) + ) + (local.set $0 + (if (result i32) + (i32.lt_s + (call $11 + (call $fimport$14 + (i32.const 140) + (local.get $3) + ) + ) + (i32.const 0) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.const -1) + ) + (i32.const -1) + ) + (i32.load + (local.get $0) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $11 (; 24 ;) (type $2) (param $0 i32) (result i32) + (if (result i32) + (i32.gt_u + (local.get $0) + (i32.const -4096) + ) + (block (result i32) + (i32.store + (call $12) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + (local.get $0) + ) + ) + (func $12 (; 25 ;) (type $4) (result i32) + (i32.const 4172) + ) + (func $13 (; 26 ;) (type $3) (param $0 i32) + (nop) + ) + (func $14 (; 27 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 80) + ) + ) + (local.set $3 + (local.get $4) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + (i32.store offset=36 + (local.get $0) + (i32.const 3) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 64) + ) + ) + (block + (i32.store + (local.get $3) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 21505) + ) + (i32.store offset=8 + (local.get $3) + (local.get $5) + ) + (if + (call $fimport$13 + (i32.const 54) + (local.get $3) + ) + (i32.store8 offset=75 + (local.get $0) + (i32.const -1) + ) + ) + ) + ) + (local.set $0 + (call $15 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $15 (; 28 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $8 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 48) + ) + ) + (local.set $9 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + (local.set $10 + (local.get $8) + ) + (i32.store + (local.tee $3 + (i32.add + (local.get $8) + (i32.const 32) + ) + ) + (local.tee $4 + (i32.load + (local.tee $6 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (local.tee $5 + (i32.sub + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + (local.get $4) + ) + ) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.get $2) + ) + (local.set $13 + (i32.add + (local.get $0) + (i32.const 60) + ) + ) + (local.set $14 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + (local.set $1 + (local.get $3) + ) + (local.set $4 + (i32.const 2) + ) + (local.set $12 + (i32.add + (local.get $5) + (local.get $2) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (if + (i32.load + (i32.const 4128) + ) + (block + (call $fimport$9 + (i32.const 1) + (local.get $0) + ) + (i32.store + (local.get $10) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $10) + (local.get $1) + ) + (i32.store offset=8 + (local.get $10) + (local.get $4) + ) + (local.set $3 + (call $11 + (call $fimport$15 + (i32.const 146) + (local.get $10) + ) + ) + ) + (call $fimport$7 + (i32.const 0) + ) + ) + (block + (i32.store + (local.get $9) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $9) + (local.get $1) + ) + (i32.store offset=8 + (local.get $9) + (local.get $4) + ) + (local.set $3 + (call $11 + (call $fimport$15 + (i32.const 146) + (local.get $9) + ) + ) + ) + ) + ) + (br_if $label$4 + (i32.eq + (local.get $12) + (local.get $3) + ) + ) + (br_if $label$3 + (i32.lt_s + (local.get $3) + (i32.const 0) + ) + ) + (local.set $5 + (if (result i32) + (i32.gt_u + (local.get $3) + (local.tee $5 + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (block (result i32) + (i32.store + (local.get $6) + (local.tee $7 + (i32.load + (local.get $14) + ) + ) + ) + (i32.store + (local.get $11) + (local.get $7) + ) + (local.set $7 + (i32.load offset=12 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + (i32.sub + (local.get $3) + (local.get $5) + ) + ) + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (block (result i32) + (i32.store + (local.get $6) + (i32.add + (i32.load + (local.get $6) + ) + (local.get $3) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $4 + (i32.const 2) + ) + (local.get $3) + ) + (block (result i32) + (local.set $7 + (local.get $5) + ) + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $1) + (i32.add + (i32.load + (local.get $1) + ) + (local.get $5) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.sub + (local.get $7) + (local.get $5) + ) + ) + (local.set $12 + (i32.sub + (local.get $12) + (local.get $3) + ) + ) + (br $label$5) + ) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.tee $1 + (i32.load + (local.get $14) + ) + ) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $1) + ) + (i32.store + (local.get $11) + (local.get $1) + ) + (br $label$2) + ) + (i32.store offset=16 + (local.get $0) + (i32.const 0) + ) + (i32.store + (local.get $6) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (local.set $2 + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (i32.const 0) + (i32.sub + (local.get $2) + (i32.load offset=4 + (local.get $1) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $8) + ) + (local.get $2) + ) + ) + (func $16 (; 29 ;) (type $3) (param $0 i32) + (if + (i32.eqz + (i32.load offset=68 + (local.get $0) + ) + ) + (call $13 + (local.get $0) + ) + ) + ) + (func $17 (; 30 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $5 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (if + (i32.and + (local.tee $4 + (i32.ne + (local.get $2) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 0) + ) + ) + (block + (local.set $4 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (loop $label$6 + (if + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $4) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + ) + (br_if $label$6 + (i32.and + (local.tee $0 + (i32.ne + (local.tee $3 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 3) + ) + (i32.const 0) + ) + ) + ) + (br $label$4) + ) + ) + (block + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (local.set $0 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $0) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 0) + ) + ) + (br $label$2) + ) + (if + (i32.ne + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $3 + (i32.mul + (local.get $5) + (i32.const 16843009) + ) + ) + (block $label$12 + (block $label$13 + (br_if $label$13 + (i32.le_u + (local.get $0) + (i32.const 3) + ) + ) + (loop $label$14 + (if + (i32.eqz + (i32.and + (i32.xor + (i32.and + (local.tee $4 + (i32.xor + (i32.load + (local.get $2) + ) + (local.get $3) + ) + ) + (i32.const -2139062144) + ) + (i32.const -2139062144) + ) + (i32.add + (local.get $4) + (i32.const -16843009) + ) + ) + ) + (block + (local.set $2 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + (br_if $label$14 + (i32.gt_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + (i32.const 3) + ) + ) + (br $label$13) + ) + ) + ) + (br $label$12) + ) + (if + (i32.eqz + (local.get $0) + ) + (block + (local.set $0 + (i32.const 0) + ) + (br $label$2) + ) + ) + ) + (loop $label$17 + (br_if $label$2 + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $1) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$17 + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -1) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + ) + ) + ) + (if (result i32) + (local.get $0) + (local.get $2) + (i32.const 0) + ) + ) + ) + (func $18 (; 31 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 224) + ) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 136) + ) + ) + (i64.store align=4 + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 80) + ) + ) + (i64.const 0) + ) + (i64.store offset=8 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=16 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=24 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=32 align=4 + (local.get $3) + (i64.const 0) + ) + (i32.store + (local.tee $6 + (i32.add + (local.get $4) + (i32.const 120) + ) + ) + (i32.load + (local.get $2) + ) + ) + (if + (i32.lt_s + (call $19 + (i32.const 0) + (local.get $1) + (local.get $6) + (local.tee $2 + (local.get $4) + ) + (local.get $3) + ) + (i32.const 0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $12 + (if (result i32) + (i32.gt_s + (i32.load offset=76 + (local.get $0) + ) + (i32.const -1) + ) + (call $20 + (local.get $0) + ) + (i32.const 0) + ) + ) + (local.set $7 + (i32.load + (local.get $0) + ) + ) + (if + (i32.lt_s + (i32.load8_s offset=74 + (local.get $0) + ) + (i32.const 1) + ) + (i32.store + (local.get $0) + (i32.and + (local.get $7) + (i32.const -33) + ) + ) + ) + (if + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + ) + (local.set $1 + (call $19 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (block + (local.set $10 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + ) + ) + (i32.store + (local.get $9) + (local.get $5) + ) + (i32.store + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + (local.get $5) + ) + (i32.store + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + (local.get $5) + ) + (i32.store + (local.get $8) + (i32.const 80) + ) + (i32.store + (local.tee $14 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.add + (local.get $5) + (i32.const 80) + ) + ) + (local.set $1 + (call $19 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (if + (local.get $10) + (block + (drop + (call_indirect (type $0) + (local.get $0) + (i32.const 0) + (i32.const 0) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $11) + ) + ) + (local.set $1 + (i32.const -1) + ) + ) + (i32.store + (local.get $9) + (local.get $10) + ) + (i32.store + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $14) + (i32.const 0) + ) + (i32.store + (local.get $13) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (local.get $0) + (i32.or + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.and + (local.get $7) + (i32.const 32) + ) + ) + ) + (if + (local.get $12) + (call $13 + (local.get $0) + ) + ) + (if + (i32.and + (local.get $2) + (i32.const 32) + ) + (local.set $1 + (i32.const -1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $1) + ) + ) + (func $19 (; 32 ;) (type $7) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (local $22 i32) + (local $23 i32) + (local $24 i32) + (local $25 i32) + (local $26 i32) + (local $27 i32) + (local $28 i32) + (local $29 i32) + (local $30 i32) + (local $31 i32) + (local $32 i32) + (local $33 i32) + (local $34 i32) + (local $35 i32) + (local $36 i32) + (local $37 i32) + (local $38 i32) + (local $39 i32) + (local $40 i32) + (local $41 i32) + (local $42 i32) + (local $43 i32) + (local $44 i32) + (local $45 i32) + (local $46 i32) + (local $47 i32) + (local $48 i32) + (local $49 i32) + (local $50 i64) + (local $51 i64) + (local $52 f64) + (local $53 f64) + (block $label$1 (result i32) + (local.set $23 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 624) + ) + ) + (local.set $20 + (i32.add + (local.get $23) + (i32.const 16) + ) + ) + (local.set $16 + (local.get $23) + ) + (local.set $36 + (i32.add + (local.get $23) + (i32.const 528) + ) + ) + (local.set $30 + (i32.ne + (local.get $0) + (i32.const 0) + ) + ) + (local.set $38 + (local.tee $21 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 536) + ) + ) + (i32.const 40) + ) + ) + ) + (local.set $39 + (i32.add + (local.get $17) + (i32.const 39) + ) + ) + (local.set $42 + (i32.add + (local.tee $37 + (i32.add + (local.get $23) + (i32.const 8) + ) + ) + (i32.const 4) + ) + ) + (local.set $43 + (i32.sub + (i32.const 0) + (local.tee $27 + (local.tee $19 + (i32.add + (local.get $23) + (i32.const 588) + ) + ) + ) + ) + ) + (local.set $33 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 576) + ) + ) + (i32.const 12) + ) + ) + (local.set $40 + (i32.add + (local.get $17) + (i32.const 11) + ) + ) + (local.set $44 + (i32.sub + (local.tee $28 + (local.get $33) + ) + (local.get $27) + ) + ) + (local.set $45 + (i32.sub + (i32.const -2) + (local.get $27) + ) + ) + (local.set $46 + (i32.add + (local.get $28) + (i32.const 2) + ) + ) + (local.set $48 + (i32.add + (local.tee $47 + (i32.add + (local.get $23) + (i32.const 24) + ) + ) + (i32.const 288) + ) + ) + (local.set $41 + (local.tee $31 + (i32.add + (local.get $19) + (i32.const 9) + ) + ) + ) + (local.set $34 + (i32.add + (local.get $19) + (i32.const 8) + ) + ) + (local.set $15 + (i32.const 0) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $17 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (loop $label$4 + (block $label$5 + (if + (i32.gt_s + (local.get $15) + (i32.const -1) + ) + (local.set $15 + (if (result i32) + (i32.gt_s + (local.get $10) + (i32.sub + (i32.const 2147483647) + (local.get $15) + ) + ) + (block (result i32) + (i32.store + (call $12) + (i32.const 75) + ) + (i32.const -1) + ) + (i32.add + (local.get $10) + (local.get $15) + ) + ) + ) + ) + (br_if $label$3 + (i32.eqz + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.get $1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $11 + (local.get $1) + ) + (block $label$9 + (block $label$10 + (loop $label$11 + (block $label$12 + (block $label$13 + (block $label$14 + (block $label$15 + (br_table $label$14 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$15 $label$13 + (i32.sub + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (local.set $5 + (local.get $11) + ) + (br $label$10) + ) + (local.set $5 + (local.get $11) + ) + (br $label$12) + ) + (local.set $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (br $label$11) + ) + ) + (br $label$9) + ) + (loop $label$16 + (br_if $label$9 + (i32.ne + (i32.load8_s offset=1 + (local.get $5) + ) + (i32.const 37) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + (br_if $label$16 + (i32.eq + (i32.load8_s + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + (i32.const 37) + ) + ) + ) + ) + (local.set $10 + (i32.sub + (local.get $11) + (local.get $1) + ) + ) + (if + (local.get $30) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (local.get $10) + (local.get $0) + ) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $1 + (local.get $5) + ) + (br $label$4) + ) + ) + (local.set $10 + (if (result i32) + (i32.lt_u + (local.tee $9 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $10 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block (result i32) + (local.set $10 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + (if + (local.tee $12 + (i32.eq + (i32.load8_s offset=2 + (local.get $5) + ) + (i32.const 36) + ) + ) + (local.set $11 + (local.get $10) + ) + ) + (if + (local.get $12) + (local.set $17 + (i32.const 1) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.get $12) + ) + (local.set $9 + (i32.const -1) + ) + ) + (local.get $17) + ) + (block (result i32) + (local.set $5 + (local.get $10) + ) + (local.set $9 + (i32.const -1) + ) + (local.get $17) + ) + ) + ) + (block $label$25 + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + (block + (local.set $17 + (i32.const 0) + ) + (loop $label$27 + (br_if $label$25 + (i32.eqz + (i32.and + (i32.shl + (i32.const 1) + (local.get $12) + ) + (i32.const 75913) + ) + ) + ) + (local.set $17 + (i32.or + (i32.shl + (i32.const 1) + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (local.get $17) + ) + ) + (br_if $label$27 + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + ) + ) + ) + (local.set $17 + (i32.const 0) + ) + ) + ) + (block $label$29 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (local.set $11 + (block $label$31 (result i32) + (block $label$32 + (br_if $label$32 + (i32.ge_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (br_if $label$32 + (i32.ne + (i32.load8_s offset=2 + (local.get $11) + ) + (i32.const 36) + ) + ) + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $12) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $8 + (i32.const 1) + ) + (local.set $10 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (br $label$31 + (i32.add + (local.get $11) + (i32.const 3) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (local.set $10 + (i32.const 0) + ) + (br $label$29) + ) + ) + (local.set $10 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.set $8 + (i32.const 0) + ) + (local.get $7) + ) + ) + (local.set $12 + (i32.or + (local.get $17) + (i32.const 8192) + ) + ) + (local.set $7 + (i32.sub + (i32.const 0) + (local.get $10) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.tee $6 + (i32.lt_s + (local.get $10) + (i32.const 0) + ) + ) + ) + (local.set $12 + (local.get $17) + ) + ) + (local.set $17 + (local.get $8) + ) + (if + (local.get $6) + (local.set $10 + (local.get $7) + ) + ) + ) + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $7 + (i32.const 0) + ) + (local.set $5 + (local.get $12) + ) + (loop $label$39 + (local.set $7 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$39 + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $12 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + ) + (if + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (block + (local.set $5 + (local.get $12) + ) + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (local.get $7) + ) + ) + ) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (i32.const 0) + ) + ) + ) + ) + ) + (block $label$43 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 46) + ) + (block + (if + (i32.ne + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $11 + (local.get $7) + ) + (local.set $7 + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (br $label$43) + ) + ) + (loop $label$48 + (local.set $5 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$43 + (i32.ge_u + (local.tee $8 + (i32.add + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $5 + (local.get $8) + ) + (br $label$48) + ) + ) + ) + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 2) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (if + (i32.eq + (i32.load8_s offset=3 + (local.get $11) + ) + (i32.const 36) + ) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $5) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $5 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (br $label$43) + ) + ) + ) + (if + (local.get $17) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (if (result i32) + (local.get $30) + (block (result i32) + (local.set $5 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block (result i32) + (local.set $5 + (i32.const 0) + ) + (local.get $7) + ) + ) + ) + ) + (local.set $5 + (i32.const -1) + ) + ) + ) + (local.set $7 + (local.get $11) + ) + (local.set $8 + (i32.const 0) + ) + (loop $label$55 + (if + (i32.gt_u + (local.tee $6 + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -65) + ) + ) + (i32.const 57) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (if + (i32.lt_u + (i32.add + (local.tee $6 + (i32.and + (local.tee $13 + (i32.load8_s + (i32.add + (i32.add + (i32.mul + (local.get $8) + (i32.const 58) + ) + (i32.const 1699) + ) + (local.get $6) + ) + ) + ) + (i32.const 255) + ) + ) + (i32.const -1) + ) + (i32.const 8) + ) + (block + (local.set $7 + (local.get $11) + ) + (local.set $8 + (local.get $6) + ) + (br $label$55) + ) + ) + ) + (if + (i32.eqz + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $14 + (i32.gt_s + (local.get $9) + (i32.const -1) + ) + ) + (block $label$59 + (block $label$60 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 19) + ) + (if + (local.get $14) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (br $label$60) + ) + (block + (if + (local.get $14) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $9) + (i32.const 2) + ) + ) + (local.get $6) + ) + (i64.store + (local.get $16) + (i64.load + (i32.add + (local.get $3) + (i32.shl + (local.get $9) + (i32.const 3) + ) + ) + ) + ) + (br $label$60) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $15 + (i32.const 0) + ) + (br $label$5) + ) + ) + (call $22 + (local.get $16) + (local.get $6) + (local.get $2) + ) + ) + ) + (br $label$59) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + ) + (local.set $9 + (i32.and + (local.tee $7 + (i32.load8_s + (local.get $7) + ) + ) + (i32.const -33) + ) + ) + (if + (i32.eqz + (i32.and + (i32.ne + (local.get $8) + (i32.const 0) + ) + (i32.eq + (i32.and + (local.get $7) + (i32.const 15) + ) + (i32.const 3) + ) + ) + ) + (local.set $9 + (local.get $7) + ) + ) + (local.set $7 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8192) + ) + (local.set $12 + (local.get $7) + ) + ) + (block $label$70 + (block $label$71 + (block $label$72 + (block $label$73 + (block $label$74 + (block $label$75 + (block $label$76 + (block $label$77 + (block $label$78 + (block $label$79 + (block $label$80 + (block $label$81 + (block $label$82 + (block $label$83 + (block $label$84 + (block $label$85 + (block $label$86 + (block $label$87 + (block $label$88 + (block $label$89 + (br_table $label$78 $label$77 $label$80 $label$77 $label$78 $label$78 $label$78 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$79 $label$77 $label$77 $label$77 $label$77 $label$87 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$78 $label$77 $label$83 $label$85 $label$78 $label$78 $label$78 $label$77 $label$85 $label$77 $label$77 $label$77 $label$82 $label$89 $label$86 $label$88 $label$77 $label$77 $label$81 $label$77 $label$84 $label$77 $label$77 $label$87 $label$77 + (i32.sub + (local.get $9) + (i32.const 65) + ) + ) + ) + (block $label$90 + (block $label$91 + (block $label$92 + (block $label$93 + (block $label$94 + (block $label$95 + (block $label$96 + (block $label$97 + (br_table $label$97 $label$96 $label$95 $label$94 $label$93 $label$90 $label$92 $label$91 $label$90 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (local.get $8) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store16 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store8 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $12 + (i32.or + (local.get $12) + (i32.const 8) + ) + ) + (if + (i32.le_u + (local.get $5) + (i32.const 8) + ) + (local.set $5 + (i32.const 8) + ) + ) + (local.set $9 + (i32.const 120) + ) + (br $label$76) + ) + (br $label$76) + ) + (if + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (local.set $7 + (local.get $21) + ) + (block + (local.set $1 + (local.get $21) + ) + (loop $label$101 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.and + (local.get $50) + (i64.const 7) + ) + (i64.const 48) + ) + ) + (br_if $label$101 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 3) + ) + ) + (i64.const 0) + ) + ) + (local.set $7 + (local.get $1) + ) + ) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8) + ) + (block + (local.set $8 + (i32.add + (local.tee $1 + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + (i32.const 1) + ) + ) + (if + (i32.le_s + (local.get $5) + (local.get $1) + ) + (local.set $5 + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 2179) + ) + (br $label$71) + ) + (block + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 2179) + ) + (br $label$71) + ) + ) + ) + (if + (i64.lt_s + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block + (i64.store + (local.get $16) + (local.tee $50 + (i64.sub + (i64.const 0) + (local.get $50) + ) + ) + ) + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 2179) + ) + (br $label$75) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 2180) + ) + (br $label$75) + ) + (block + (local.set $6 + (local.tee $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + ) + (local.set $8 + (if (result i32) + (local.get $1) + (i32.const 2181) + (i32.const 2179) + ) + ) + (br $label$75) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 2179) + ) + (br $label$75) + ) + (i64.store8 + (local.get $39) + (i64.load + (local.get $16) + ) + ) + (local.set $1 + (local.get $39) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (i32.const 1) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 2179) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $1 + (call $24 + (i32.load + (call $12) + ) + ) + ) + (br $label$74) + ) + (if + (i32.eqz + (local.tee $1 + (i32.load + (local.get $16) + ) + ) + ) + (local.set $1 + (i32.const 2189) + ) + ) + (br $label$74) + ) + (i64.store32 + (local.get $37) + (i64.load + (local.get $16) + ) + ) + (i32.store + (local.get $42) + (i32.const 0) + ) + (i32.store + (local.get $16) + (local.get $37) + ) + (local.set $7 + (local.get $37) + ) + (local.set $6 + (i32.const -1) + ) + (br $label$73) + ) + (local.set $7 + (i32.load + (local.get $16) + ) + ) + (if + (local.get $5) + (block + (local.set $6 + (local.get $5) + ) + (br $label$73) + ) + (block + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (i32.const 0) + (local.get $12) + ) + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (local.set $52 + (f64.load + (local.get $16) + ) + ) + (i32.store + (local.get $20) + (i32.const 0) + ) + (local.set $26 + (if (result i32) + (i64.lt_s + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + (i32.const 2196) + ) + (block (result i32) + (local.set $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + (if (result i32) + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (i32.const 2199) + ) + (block (result i32) + (local.set $24 + (local.get $1) + ) + (if (result i32) + (local.get $1) + (i32.const 2202) + (i32.const 2197) + ) + ) + ) + ) + ) + ) + (block $label$119 + (if + (i64.lt_u + (i64.and + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 9218868437227405312) + ) + (i64.const 9218868437227405312) + ) + (block + (if + (local.tee $1 + (f64.ne + (local.tee $52 + (f64.mul + (call $27 + (local.get $52) + (local.get $20) + ) + (f64.const 2) + ) + ) + (f64.const 0) + ) + ) + (i32.store + (local.get $20) + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -1) + ) + ) + ) + (if + (i32.eq + (local.tee $22 + (i32.or + (local.get $9) + (i32.const 32) + ) + ) + (i32.const 97) + ) + (block + (local.set $1 + (i32.add + (local.get $26) + (i32.const 9) + ) + ) + (if + (local.tee $6 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $26 + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.or + (i32.gt_u + (local.get $5) + (i32.const 11) + ) + (i32.eqz + (local.tee $1 + (i32.sub + (i32.const 12) + (local.get $5) + ) + ) + ) + ) + ) + (block + (local.set $53 + (f64.const 8) + ) + (loop $label$125 + (local.set $53 + (f64.mul + (local.get $53) + (f64.const 16) + ) + ) + (br_if $label$125 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (local.set $52 + (if (result f64) + (i32.eq + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + (f64.neg + (f64.add + (local.get $53) + (f64.sub + (f64.neg + (local.get $52) + ) + (local.get $53) + ) + ) + ) + (f64.sub + (f64.add + (local.get $52) + (local.get $53) + ) + (local.get $53) + ) + ) + ) + ) + ) + (local.set $1 + (i32.sub + (i32.const 0) + (local.tee $7 + (i32.load + (local.get $20) + ) + ) + ) + ) + (if + (i32.eq + (local.tee $1 + (call $23 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (local.get $1) + (local.get $7) + ) + ) + (local.get $33) + ) + ) + (local.get $33) + ) + (block + (i32.store8 + (local.get $40) + (i32.const 48) + ) + (local.set $1 + (local.get $40) + ) + ) + ) + (local.set $13 + (i32.or + (local.get $24) + (i32.const 2) + ) + ) + (i32.store8 + (i32.add + (local.get $1) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $7) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $8 + (i32.add + (local.get $1) + (i32.const -2) + ) + ) + (i32.add + (local.get $9) + (i32.const 15) + ) + ) + (local.set $9 + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + (local.set $14 + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (local.set $1 + (local.get $19) + ) + (loop $label$131 + (i32.store8 + (local.get $1) + (i32.or + (i32.load8_u + (i32.add + (local.tee $7 + (i32.trunc_f64_s + (local.get $52) + ) + ) + (i32.const 2163) + ) + ) + (local.get $6) + ) + ) + (local.set $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_s + (local.get $7) + ) + ) + (f64.const 16) + ) + ) + (local.set $1 + (block $label$132 (result i32) + (if (result i32) + (i32.eq + (i32.sub + (local.tee $7 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $27) + ) + (i32.const 1) + ) + (block (result i32) + (drop + (br_if $label$132 + (local.get $7) + (i32.and + (local.get $14) + (i32.and + (local.get $9) + (f64.eq + (local.get $52) + (f64.const 0) + ) + ) + ) + ) + ) + (i32.store8 + (local.get $7) + (i32.const 46) + ) + (i32.add + (local.get $1) + (i32.const 2) + ) + ) + (local.get $7) + ) + ) + ) + (br_if $label$131 + (f64.ne + (local.get $52) + (f64.const 0) + ) + ) + ) + (local.set $6 + (i32.sub + (i32.add + (local.get $46) + (local.get $5) + ) + (local.tee $7 + (local.get $8) + ) + ) + ) + (local.set $9 + (i32.add + (i32.sub + (local.get $44) + (local.get $7) + ) + (local.get $1) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.and + (i32.ne + (local.get $5) + (i32.const 0) + ) + (i32.lt_s + (i32.add + (local.get $45) + (local.get $1) + ) + (local.get $5) + ) + ) + (local.get $6) + (local.tee $6 + (local.get $9) + ) + ) + (local.get $13) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $26) + (local.get $13) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $27) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $19) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.sub + (local.get $6) + (i32.add + (local.get $1) + (local.tee $1 + (i32.sub + (local.get $28) + (local.get $7) + ) + ) + ) + ) + (i32.const 0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $8) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $5) + (local.get $10) + ) + (local.set $10 + (local.get $5) + ) + ) + (br $label$119) + ) + ) + (if + (local.get $1) + (block + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -28) + ) + ) + ) + (local.set $52 + (f64.mul + (local.get $52) + (f64.const 268435456) + ) + ) + ) + (local.set $6 + (i32.load + (local.get $20) + ) + ) + ) + (local.set $8 + (local.tee $7 + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $47) + (local.get $48) + ) + ) + ) + (loop $label$145 + (i32.store + (local.get $8) + (local.tee $1 + (i32.trunc_f64_s + (local.get $52) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$145 + (f64.ne + (local.tee $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_u + (local.get $1) + ) + ) + (f64.const 1e9) + ) + ) + (f64.const 0) + ) + ) + ) + (if + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $1 + (local.get $7) + ) + (loop $label$147 + (local.set $14 + (if (result i32) + (i32.gt_s + (local.get $6) + (i32.const 29) + ) + (i32.const 29) + (local.get $6) + ) + ) + (block $label$150 + (if + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (block + (local.set $50 + (i64.extend_i32_u + (local.get $14) + ) + ) + (local.set $13 + (i32.const 0) + ) + (loop $label$152 + (i64.store32 + (local.get $6) + (i64.rem_u + (local.tee $51 + (i64.add + (i64.shl + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $50) + ) + (i64.extend_i32_u + (local.get $13) + ) + ) + ) + (i64.const 1000000000) + ) + ) + (local.set $13 + (i32.wrap_i64 + (i64.div_u + (local.get $51) + (i64.const 1000000000) + ) + ) + ) + (br_if $label$152 + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $6) + (i32.const -4) + ) + ) + (local.get $1) + ) + ) + ) + (br_if $label$150 + (i32.eqz + (local.get $13) + ) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (local.get $13) + ) + ) + ) + ) + (loop $label$153 + (if + (i32.gt_u + (local.get $8) + (local.get $1) + ) + (if + (i32.eqz + (i32.load + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $8 + (local.get $6) + ) + (br $label$153) + ) + ) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.sub + (i32.load + (local.get $20) + ) + (local.get $14) + ) + ) + ) + (br_if $label$147 + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + ) + ) + ) + (local.set $1 + (local.get $7) + ) + ) + (local.set $18 + (if (result i32) + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (i32.const 6) + (local.get $5) + ) + ) + (if + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $14 + (i32.add + (i32.div_s + (i32.add + (local.get $18) + (i32.const 25) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (local.set $25 + (i32.eq + (local.get $22) + (i32.const 102) + ) + ) + (local.set $5 + (local.get $8) + ) + (loop $label$160 + (if + (i32.gt_s + (local.tee $13 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const 9) + ) + (local.set $13 + (i32.const 9) + ) + ) + (block $label$162 + (if + (i32.lt_u + (local.get $1) + (local.get $5) + ) + (block + (local.set $29 + (i32.add + (i32.shl + (i32.const 1) + (local.get $13) + ) + (i32.const -1) + ) + ) + (local.set $35 + (i32.shr_u + (i32.const 1000000000) + (local.get $13) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (local.get $1) + ) + (loop $label$164 + (i32.store + (local.get $8) + (i32.add + (i32.shr_u + (local.tee $32 + (i32.load + (local.get $8) + ) + ) + (local.get $13) + ) + (local.get $6) + ) + ) + (local.set $6 + (i32.mul + (i32.and + (local.get $32) + (local.get $29) + ) + (local.get $35) + ) + ) + (br_if $label$164 + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (local.get $5) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + (br_if $label$162 + (i32.eqz + (local.get $6) + ) + ) + (i32.store + (local.get $5) + (local.get $6) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + ) + (block + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $6 + (i32.add + (local.tee $8 + (if (result i32) + (local.get $25) + (local.get $7) + (local.get $1) + ) + ) + (i32.shl + (local.get $14) + (i32.const 2) + ) + ) + ) + (if + (i32.gt_s + (i32.shr_s + (i32.sub + (local.get $5) + (local.get $8) + ) + (i32.const 2) + ) + (local.get $14) + ) + (local.set $5 + (local.get $6) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (local.get $13) + ) + ) + ) + (br_if $label$160 + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + ) + (local.set $13 + (local.get $5) + ) + ) + ) + (local.set $13 + (local.get $8) + ) + ) + (local.set $25 + (local.get $7) + ) + (block $label$172 + (if + (i32.lt_u + (local.get $1) + (local.get $13) + ) + (block + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$172 + (i32.lt_u + (local.tee $6 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $8 + (i32.const 10) + ) + (loop $label$174 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$174 + (i32.ge_u + (local.get $6) + (local.tee $8 + (i32.mul + (local.get $8) + (i32.const 10) + ) + ) + ) + ) + ) + ) + (local.set $5 + (i32.const 0) + ) + ) + ) + (local.set $29 + (i32.eq + (local.get $22) + (i32.const 103) + ) + ) + (local.set $35 + (i32.ne + (local.get $18) + (i32.const 0) + ) + ) + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.sub + (local.get $18) + (if (result i32) + (i32.ne + (local.get $22) + (i32.const 102) + ) + (local.get $5) + (i32.const 0) + ) + ) + (i32.shr_s + (i32.shl + (i32.and + (local.get $35) + (local.get $29) + ) + (i32.const 31) + ) + (i32.const 31) + ) + ) + ) + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $13) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (block + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.rem_s + (local.tee $14 + (i32.add + (local.get $8) + (i32.const 9216) + ) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (i32.const 9) + ) + (block + (local.set $6 + (i32.const 10) + ) + (loop $label$180 + (local.set $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + (br_if $label$180 + (i32.ne + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 1) + ) + ) + (i32.const 9) + ) + ) + ) + ) + (local.set $6 + (i32.const 10) + ) + ) + (local.set $14 + (i32.rem_u + (local.tee $22 + (i32.load + (local.tee $8 + (i32.add + (i32.add + (local.get $7) + (i32.const 4) + ) + (i32.shl + (i32.add + (i32.div_s + (local.get $14) + (i32.const 9) + ) + (i32.const -1024) + ) + (i32.const 2) + ) + ) + ) + ) + ) + (local.get $6) + ) + ) + (block $label$182 + (if + (i32.eqz + (i32.and + (local.tee $32 + (i32.eq + (i32.add + (local.get $8) + (i32.const 4) + ) + (local.get $13) + ) + ) + (i32.eqz + (local.get $14) + ) + ) + ) + (block + (local.set $52 + (if (result f64) + (i32.lt_u + (local.get $14) + (local.tee $49 + (i32.div_s + (local.get $6) + (i32.const 2) + ) + ) + ) + (f64.const 0.5) + (if (result f64) + (i32.and + (local.get $32) + (i32.eq + (local.get $14) + (local.get $49) + ) + ) + (f64.const 1) + (f64.const 1.5) + ) + ) + ) + (local.set $53 + (if (result f64) + (i32.and + (i32.div_u + (local.get $22) + (local.get $6) + ) + (i32.const 1) + ) + (f64.const 9007199254740994) + (f64.const 9007199254740992) + ) + ) + (block $label$190 + (if + (local.get $24) + (block + (br_if $label$190 + (i32.ne + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + ) + (local.set $53 + (f64.neg + (local.get $53) + ) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + ) + ) + ) + (i32.store + (local.get $8) + (local.tee $14 + (i32.sub + (local.get $22) + (local.get $14) + ) + ) + ) + (br_if $label$182 + (f64.eq + (f64.add + (local.get $53) + (local.get $52) + ) + (local.get $53) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (local.get $14) + (local.get $6) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + (loop $label$193 + (i32.store + (local.get $8) + (i32.const 0) + ) + (if + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (i32.const 0) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (i32.load + (local.get $8) + ) + (i32.const 1) + ) + ) + ) + (br_if $label$193 + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + ) + ) + ) + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$182 + (i32.lt_u + (local.tee $14 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $6 + (i32.const 10) + ) + (loop $label$195 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$195 + (i32.ge_u + (local.get $14) + (local.tee $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + ) + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (if + (i32.le_u + (local.get $13) + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (block + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (local.set $32 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (loop $label$198 + (block $label$199 + (if + (i32.le_u + (local.get $8) + (local.get $14) + ) + (block + (local.set $22 + (i32.const 0) + ) + (br $label$199) + ) + ) + (if + (i32.load + (local.tee $1 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + (local.set $22 + (i32.const 1) + ) + (block + (local.set $8 + (local.get $1) + ) + (br $label$198) + ) + ) + ) + ) + (block $label$203 + (if + (local.get $29) + (block + (local.set $1 + (if (result i32) + (i32.and + (i32.gt_s + (local.tee $1 + (i32.add + (i32.xor + (i32.and + (local.get $35) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $18) + ) + ) + (local.get $6) + ) + (i32.gt_s + (local.get $6) + (i32.const -5) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.sub + (i32.add + (local.get $1) + (i32.const -1) + ) + (local.get $6) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (br_if $label$203 + (local.tee $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (block $label$207 + (if + (local.get $22) + (block + (if + (i32.eqz + (local.tee $18 + (i32.load + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $9 + (i32.const 9) + ) + (br $label$207) + ) + ) + (if + (i32.rem_u + (local.get $18) + (i32.const 10) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$207) + ) + (block + (local.set $13 + (i32.const 10) + ) + (local.set $9 + (i32.const 0) + ) + ) + ) + (loop $label$212 + (local.set $9 + (i32.add + (local.get $9) + (i32.const 1) + ) + ) + (br_if $label$212 + (i32.eqz + (i32.rem_u + (local.get $18) + (local.tee $13 + (i32.mul + (local.get $13) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + (local.set $9 + (i32.const 9) + ) + ) + ) + (local.set $18 + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $8) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (if + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (local.get $18) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (i32.add + (local.get $18) + (local.get $6) + ) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $18) + ) + (local.set $5 + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $25 + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (if + (i32.le_s + (local.get $6) + (i32.const 0) + ) + (local.set $6 + (i32.const 0) + ) + ) + ) + (block + (if + (i32.lt_s + (i32.sub + (local.get $28) + (local.tee $9 + (call $23 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $32) + (local.get $6) + ) + ) + (local.get $33) + ) + ) + ) + (i32.const 2) + ) + (loop $label$229 + (i32.store8 + (local.tee $9 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.const 48) + ) + (br_if $label$229 + (i32.lt_s + (i32.sub + (local.get $28) + (local.get $9) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (i32.add + (local.get $9) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $6) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $6 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (local.get $5) + ) + (local.set $9 + (local.get $6) + ) + (local.set $6 + (i32.sub + (local.get $28) + (local.get $6) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $18 + (i32.add + (i32.add + (i32.add + (i32.add + (local.get $24) + (i32.const 1) + ) + (local.get $1) + ) + (i32.ne + (local.tee $29 + (i32.or + (local.get $1) + (local.get $13) + ) + ) + (i32.const 0) + ) + ) + (local.get $6) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (block $label$231 + (if + (local.get $25) + (block + (local.set $6 + (local.tee $9 + (if (result i32) + (i32.gt_u + (local.get $14) + (local.get $7) + ) + (local.get $7) + (local.get $14) + ) + ) + ) + (loop $label$235 + (local.set $5 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $31) + ) + ) + (block $label$236 + (if + (i32.eq + (local.get $6) + (local.get $9) + ) + (block + (br_if $label$236 + (i32.ne + (local.get $5) + (local.get $31) + ) + ) + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $5 + (local.get $34) + ) + ) + (block + (br_if $label$236 + (i32.le_u + (local.get $5) + (local.get $19) + ) + ) + (drop + (call $46 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $5) + (local.get $27) + ) + ) + ) + (loop $label$239 + (br_if $label$239 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $5) + (i32.sub + (local.get $41) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (if + (i32.le_u + (local.tee $5 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block + (local.set $6 + (local.get $5) + ) + (br $label$235) + ) + ) + ) + (block $label$242 + (if + (local.get $29) + (block + (br_if $label$242 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (i32.const 2231) + (i32.const 1) + (local.get $0) + ) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 0) + ) + (i32.lt_u + (local.get $5) + (local.get $8) + ) + ) + (loop $label$245 + (if + (i32.gt_u + (local.tee $7 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $5) + ) + ) + (local.get $31) + ) + ) + (local.get $19) + ) + (block + (drop + (call $46 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $7) + (local.get $27) + ) + ) + ) + (loop $label$247 + (br_if $label$247 + (i32.gt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $7) + (if (result i32) + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (local.get $1) + ) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $1) + (i32.const -9) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.lt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (local.get $8) + ) + ) + (block + (local.set $1 + (local.get $7) + ) + (br $label$245) + ) + (local.set $1 + (local.get $7) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.add + (local.get $14) + (i32.const 4) + ) + ) + (if + (i32.eqz + (local.get $22) + ) + (local.set $8 + (local.get $5) + ) + ) + (if + (i32.gt_s + (local.get $1) + (i32.const -1) + ) + (block + (local.set $13 + (i32.eqz + (local.get $13) + ) + ) + (local.set $7 + (local.get $14) + ) + (local.set $5 + (local.get $1) + ) + (loop $label$256 + (if + (i32.eq + (local.tee $1 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $7) + ) + ) + (local.get $31) + ) + ) + (local.get $31) + ) + (block + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $1 + (local.get $34) + ) + ) + ) + (block $label$258 + (if + (i32.eq + (local.get $7) + (local.get $14) + ) + (block + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (i32.const 1) + (local.get $0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$258 + (i32.and + (local.get $13) + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + ) + (br_if $label$258 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (i32.const 2231) + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (br_if $label$258 + (i32.le_u + (local.get $1) + (local.get $19) + ) + ) + (drop + (call $46 + (local.get $19) + (i32.const 48) + (i32.add + (local.get $1) + (local.get $43) + ) + ) + ) + (loop $label$262 + (br_if $label$262 + (i32.gt_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (local.set $6 + (i32.sub + (local.get $41) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (if (result i32) + (i32.gt_s + (local.get $5) + (local.get $6) + ) + (local.get $6) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (br_if $label$256 + (i32.and + (i32.lt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (local.get $8) + ) + (i32.gt_s + (local.tee $5 + (i32.sub + (local.get $5) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + ) + (local.set $1 + (local.get $5) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 18) + ) + (i32.const 18) + (i32.const 0) + ) + (br_if $label$231 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $9) + (i32.sub + (local.get $28) + (local.get $9) + ) + (local.get $0) + ) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $18) + (local.get $10) + ) + (local.set $10 + (local.get $18) + ) + ) + ) + (block + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $8 + (i32.add + (if (result i32) + (local.tee $6 + (i32.or + (f64.ne + (local.get $52) + (local.get $52) + ) + (i32.const 0) + ) + ) + (local.tee $24 + (i32.const 0) + ) + (local.get $24) + ) + (i32.const 3) + ) + ) + (local.get $7) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + (block + (drop + (call $21 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $7 + (if (result i32) + (local.tee $5 + (i32.ne + (i32.and + (local.get $9) + (i32.const 32) + ) + (i32.const 0) + ) + ) + (i32.const 2215) + (i32.const 2219) + ) + ) + (local.set $5 + (if (result i32) + (local.get $5) + (i32.const 2223) + (i32.const 2227) + ) + ) + (if + (i32.eqz + (local.get $6) + ) + (local.set $5 + (local.get $7) + ) + ) + (if + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $5) + (i32.const 3) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $8) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $8) + (local.get $10) + ) + (local.set $10 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $7 + (local.get $5) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 2179) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $7 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $7 + (if (result i32) + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $50 + (i64.const 0) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (loop $label$280 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.load8_u + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $50) + ) + (i32.const 15) + ) + (i32.const 2163) + ) + ) + (local.get $7) + ) + ) + (br_if $label$280 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 4) + ) + ) + (i64.const 0) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.get $1) + ) + ) + ) + (local.set $8 + (i32.add + (i32.shr_s + (local.get $9) + (i32.const 4) + ) + (i32.const 2179) + ) + ) + (if + (local.tee $1 + (i32.or + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (i64.eq + (local.get $50) + (i64.const 0) + ) + ) + ) + (local.set $8 + (i32.const 2179) + ) + ) + (local.set $6 + (if (result i32) + (local.get $1) + (i32.const 0) + (i32.const 2) + ) + ) + (br $label$71) + ) + (local.set $7 + (call $23 + (local.get $50) + (local.get $21) + ) + ) + (br $label$71) + ) + (local.set $14 + (i32.eqz + (local.tee $13 + (call $17 + (local.get $1) + (i32.const 0) + (local.get $5) + ) + ) + ) + ) + (local.set $8 + (i32.sub + (local.get $13) + (local.get $1) + ) + ) + (local.set $9 + (i32.add + (local.get $1) + (local.get $5) + ) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (if (result i32) + (local.get $14) + (local.get $5) + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 2179) + ) + (local.set $5 + (if (result i32) + (local.get $14) + (local.get $9) + (local.get $13) + ) + ) + (br $label$70) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $5 + (i32.const 0) + ) + (local.set $8 + (local.get $7) + ) + (loop $label$288 + (block $label$289 + (br_if $label$289 + (i32.eqz + (local.tee $9 + (i32.load + (local.get $8) + ) + ) + ) + ) + (br_if $label$289 + (i32.or + (i32.lt_s + (local.tee $5 + (call $26 + (local.get $36) + (local.get $9) + ) + ) + (i32.const 0) + ) + (i32.gt_u + (local.get $5) + (i32.sub + (local.get $6) + (local.get $1) + ) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$288 + (i32.gt_u + (local.get $6) + (local.tee $1 + (i32.add + (local.get $5) + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (local.get $12) + ) + (if + (local.get $1) + (block + (local.set $5 + (i32.const 0) + ) + (loop $label$292 + (br_if $label$72 + (i32.eqz + (local.tee $8 + (i32.load + (local.get $7) + ) + ) + ) + ) + (br_if $label$72 + (i32.gt_s + (local.tee $5 + (i32.add + (local.tee $8 + (call $26 + (local.get $36) + (local.get $8) + ) + ) + (local.get $5) + ) + ) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $36) + (local.get $8) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (br_if $label$292 + (i32.lt_u + (local.get $5) + (local.get $1) + ) + ) + (br $label$72) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.le_s + (local.get $10) + (local.get $1) + ) + (local.set $10 + (local.get $1) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $1 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.gt_s + (local.get $5) + (i32.const -1) + ) + (local.set $12 + (local.get $1) + ) + ) + (local.set $5 + (if (result i32) + (i32.or + (local.get $5) + (local.tee $9 + (i64.ne + (i64.load + (local.get $16) + ) + (i64.const 0) + ) + ) + ) + (block (result i32) + (local.set $1 + (local.get $7) + ) + (if + (i32.gt_s + (local.get $5) + (local.tee $7 + (i32.add + (i32.xor + (i32.and + (local.get $9) + (i32.const 1) + ) + (i32.const 1) + ) + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + ) + ) + (local.set $7 + (local.get $5) + ) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (local.set $7 + (i32.const 0) + ) + (local.get $21) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (if (result i32) + (i32.lt_s + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.lt_s + (local.get $7) + (local.tee $9 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (local.tee $7 + (local.get $9) + ) + (local.get $7) + ) + (local.get $6) + ) + ) + ) + (local.tee $10 + (local.get $5) + ) + (local.get $10) + ) + (local.get $5) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $8) + (local.get $6) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $7) + (local.get $9) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (local.get $9) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + (br $label$2) + ) + (if + (i32.eqz + (local.get $0) + ) + (if + (local.get $17) + (block + (local.set $0 + (i32.const 1) + ) + (loop $label$308 + (if + (local.tee $1 + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + ) + (block + (call $22 + (i32.add + (local.get $3) + (i32.shl + (local.get $0) + (i32.const 3) + ) + ) + (local.get $1) + (local.get $2) + ) + (br_if $label$308 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + (br $label$2) + ) + ) + ) + (loop $label$310 + (if + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$2) + ) + ) + (br_if $label$310 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + ) + ) + (local.set $15 + (i32.const 0) + ) + ) + ) + ) + (global.set $global$1 + (local.get $23) + ) + (local.get $15) + ) + ) + (func $20 (; 33 ;) (type $2) (param $0 i32) (result i32) + (i32.const 0) + ) + (func $21 (; 34 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $3 + (i32.load + (local.tee $4 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $30 + (local.get $2) + ) + (local.set $3 + (i32.const 0) + ) + (block + (local.set $3 + (i32.load + (local.get $4) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (i32.sub + (local.get $3) + (local.tee $4 + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (local.get $1) + ) + (block + (local.set $3 + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (br $label$2) + ) + ) + (local.set $2 + (block $label$7 (result i32) + (if (result i32) + (i32.gt_s + (i32.load8_s offset=75 + (local.get $2) + ) + (i32.const -1) + ) + (block (result i32) + (local.set $3 + (local.get $1) + ) + (loop $label$9 + (drop + (br_if $label$7 + (i32.const 0) + (i32.eqz + (local.get $3) + ) + ) + ) + (if + (i32.ne + (i32.load8_s + (i32.add + (local.get $0) + (local.tee $6 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + ) + ) + (i32.const 10) + ) + (block + (local.set $3 + (local.get $6) + ) + (br $label$9) + ) + ) + ) + (br_if $label$2 + (i32.lt_u + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $3) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.load + (local.get $5) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $3) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (local.get $3) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + ) + (drop + (call $47 + (local.get $4) + (local.get $0) + (local.get $1) + ) + ) + (i32.store + (local.get $5) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $1) + ) + ) + (local.set $3 + (i32.add + (local.get $2) + (local.get $1) + ) + ) + ) + (local.get $3) + ) + ) + (func $22 (; 35 ;) (type $8) (param $0 i32) (param $1 i32) (param $2 i32) + (local $3 i32) + (local $4 i64) + (local $5 f64) + (block $label$1 + (if + (i32.le_u + (local.get $1) + (i32.const 20) + ) + (block $label$3 + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (block $label$12 + (block $label$13 + (br_table $label$13 $label$12 $label$11 $label$10 $label$9 $label$8 $label$7 $label$6 $label$5 $label$4 $label$3 + (i32.sub + (local.get $1) + (i32.const 9) + ) + ) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $4 + (i64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (i64.store + (local.get $0) + (local.get $4) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 65535) + ) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 65535) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 255) + ) + ) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + ) + ) + ) + ) + (func $23 (; 36 ;) (type $9) (param $0 i64) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i64) + (block $label$1 (result i32) + (local.set $2 + (i32.wrap_i64 + (local.get $0) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 4294967295) + ) + (block + (loop $label$3 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.rem_u + (local.get $0) + (i64.const 10) + ) + (i64.const 48) + ) + ) + (local.set $4 + (i64.div_u + (local.get $0) + (i64.const 10) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 42949672959) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$3) + ) + ) + ) + (local.set $2 + (i32.wrap_i64 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $2) + (loop $label$6 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.rem_u + (local.get $2) + (i32.const 10) + ) + (i32.const 48) + ) + ) + (local.set $3 + (i32.div_u + (local.get $2) + (i32.const 10) + ) + ) + (if + (i32.ge_u + (local.get $2) + (i32.const 10) + ) + (block + (local.set $2 + (local.get $3) + ) + (br $label$6) + ) + ) + ) + ) + (local.get $1) + ) + ) + (func $24 (; 37 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (br_if $label$4 + (i32.eq + (i32.load8_u + (i32.add + (local.get $1) + (i32.const 2233) + ) + ) + (local.get $0) + ) + ) + (br_if $label$5 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 87) + ) + ) + (local.set $1 + (i32.const 87) + ) + (local.set $0 + (i32.const 2321) + ) + (br $label$3) + ) + ) + (if + (local.get $1) + (block + (local.set $0 + (i32.const 2321) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 2321) + ) + ) + (br $label$2) + ) + (loop $label$8 + (local.set $2 + (local.get $0) + ) + (loop $label$9 + (local.set $0 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (if + (i32.load8_s + (local.get $2) + ) + (block + (local.set $2 + (local.get $0) + ) + (br $label$9) + ) + ) + ) + (br_if $label$8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $25 (; 38 ;) (type $10) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 + (local.set $7 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 256) + ) + ) + (local.set $6 + (local.get $7) + ) + (block $label$2 + (if + (i32.and + (i32.gt_s + (local.get $2) + (local.get $3) + ) + (i32.eqz + (i32.and + (local.get $4) + (i32.const 73728) + ) + ) + ) + (block + (drop + (call $46 + (local.get $6) + (local.get $1) + (if (result i32) + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + (i32.const 256) + ) + (i32.const 256) + (local.get $5) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 255) + ) + (block + (loop $label$7 + (if + (local.get $4) + (block + (drop + (call $21 + (local.get $6) + (i32.const 256) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + ) + (br_if $label$7 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -256) + ) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + (local.set $5 + (i32.and + (i32.sub + (local.get $2) + (local.get $3) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + ) + (drop + (call $21 + (local.get $6) + (local.get $5) + (local.get $0) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $7) + ) + ) + ) + (func $26 (; 39 ;) (type $6) (param $0 i32) (param $1 i32) (result i32) + (if (result i32) + (local.get $0) + (call $29 + (local.get $0) + (local.get $1) + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $27 (; 40 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (call $28 + (local.get $0) + (local.get $1) + ) + ) + (func $28 (; 41 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (local $2 i64) + (local $3 i64) + (block $label$1 (result f64) + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_table $label$5 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$4 $label$3 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (i32.and + (i32.wrap_i64 + (local.tee $3 + (i64.shr_u + (local.tee $2 + (i64.reinterpret_f64 + (local.get $0) + ) + ) + (i64.const 52) + ) + ) + ) + (i32.const 65535) + ) + (i32.const 2047) + ) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (local.get $1) + (if (result i32) + (f64.ne + (local.get $0) + (f64.const 0) + ) + (block (result i32) + (local.set $0 + (call $28 + (f64.mul + (local.get $0) + (f64.const 18446744073709551615) + ) + (local.get $1) + ) + ) + (i32.add + (i32.load + (local.get $1) + ) + (i32.const -64) + ) + ) + (i32.const 0) + ) + ) + (br $label$2) + ) + (br $label$2) + ) + (i32.store + (local.get $1) + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $3) + ) + (i32.const 2047) + ) + (i32.const -1022) + ) + ) + (local.set $0 + (f64.reinterpret_i64 + (i64.or + (i64.and + (local.get $2) + (i64.const -9218868437227405313) + ) + (i64.const 4602678819172646912) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $29 (; 42 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (if (result i32) + (local.get $0) + (block (result i32) + (if + (i32.lt_u + (local.get $1) + (i32.const 128) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (br $label$1 + (i32.const 1) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.const 2048) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 192) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 2) + ) + ) + ) + (if + (i32.or + (i32.lt_u + (local.get $1) + (i32.const 55296) + ) + (i32.eq + (i32.and + (local.get $1) + (i32.const -8192) + ) + (i32.const 57344) + ) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 224) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 3) + ) + ) + ) + (if (result i32) + (i32.lt_u + (i32.add + (local.get $1) + (i32.const -65536) + ) + (i32.const 1048576) + ) + (block (result i32) + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 18) + ) + (i32.const 240) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=3 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.const 4) + ) + (block (result i32) + (i32.store + (call $12) + (i32.const 84) + ) + (i32.const -1) + ) + ) + ) + (i32.const 1) + ) + ) + ) + (func $30 (; 43 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.load8_s + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 74) + ) + ) + ) + ) + (i32.store8 + (local.get $2) + (i32.or + (i32.add + (local.get $1) + (i32.const 255) + ) + (local.get $1) + ) + ) + (local.tee $0 + (if (result i32) + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.or + (local.get $1) + (i32.const 32) + ) + ) + (i32.const -1) + ) + (block (result i32) + (i32.store offset=8 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=28 + (local.get $0) + (local.tee $1 + (i32.load offset=44 + (local.get $0) + ) + ) + ) + (i32.store offset=20 + (local.get $0) + (local.get $1) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.get $1) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.const 0) + ) + ) + ) + ) + ) + (func $31 (; 44 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.eqz + (i32.and + (local.tee $2 + (local.get $0) + ) + (i32.const 3) + ) + ) + ) + (local.set $1 + (local.get $2) + ) + (loop $label$4 + (if + (i32.eqz + (i32.load8_s + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $1) + ) + (br $label$2) + ) + ) + (br_if $label$4 + (i32.and + (local.tee $1 + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 3) + ) + ) + (br $label$3) + ) + ) + (loop $label$6 + (local.set $1 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.and + (i32.xor + (i32.and + (local.tee $3 + (i32.load + (local.get $0) + ) + ) + (i32.const -2139062144) + ) + (i32.const -2139062144) + ) + (i32.add + (local.get $3) + (i32.const -16843009) + ) + ) + ) + (block + (local.set $0 + (local.get $1) + ) + (br $label$6) + ) + ) + ) + (if + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (loop $label$9 + (br_if $label$9 + (i32.load8_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + ) + ) + ) + ) + (i32.sub + (local.get $0) + (local.get $2) + ) + ) + ) + (func $32 (; 45 ;) (type $6) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 (result i32) + (local.set $3 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store8 + (local.tee $4 + (local.get $3) + ) + (local.tee $7 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $30 + (local.get $0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $5 + (i32.load + (local.get $2) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (local.tee $6 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (local.get $5) + ) + (if + (i32.ne + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.load8_s offset=75 + (local.get $0) + ) + ) + (block + (i32.store + (local.get $2) + (i32.add + (local.get $6) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $6) + (local.get $7) + ) + (br $label$2) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.eq + (call_indirect (type $0) + (local.get $0) + (local.get $4) + (i32.const 1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (i32.const 1) + ) + (i32.load8_u + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (global.set $global$1 + (local.get $3) + ) + (local.get $1) + ) + ) + (func $33 (; 46 ;) (type $12) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (i32.mul + (local.get $2) + (local.get $1) + ) + ) + (if + (i32.gt_s + (i32.load offset=76 + (local.get $3) + ) + (i32.const -1) + ) + (block + (local.set $5 + (i32.eqz + (call $20 + (local.get $3) + ) + ) + ) + (local.set $0 + (call $21 + (local.get $0) + (local.get $4) + (local.get $3) + ) + ) + (if + (i32.eqz + (local.get $5) + ) + (call $13 + (local.get $3) + ) + ) + ) + (local.set $0 + (call $21 + (local.get $0) + (local.get $4) + (local.get $3) + ) + ) + ) + (if + (i32.ne + (local.get $0) + (local.get $4) + ) + (local.set $2 + (i32.div_u + (local.get $0) + (local.get $1) + ) + ) + ) + (local.get $2) + ) + ) + (func $34 (; 47 ;) (type $6) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (local.set $2 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $3 + (local.get $2) + ) + (local.get $1) + ) + (local.set $0 + (call $18 + (i32.load + (i32.const 1280) + ) + (local.get $0) + (local.get $3) + ) + ) + (global.set $global$1 + (local.get $2) + ) + (local.get $0) + ) + ) + (func $35 (; 48 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (local.set $2 + (if (result i32) + (i32.gt_s + (i32.load offset=76 + (local.tee $1 + (i32.load + (i32.const 1280) + ) + ) + ) + (i32.const -1) + ) + (call $20 + (local.get $1) + ) + (i32.const 0) + ) + ) + (local.set $0 + (block $label$4 (result i32) + (if (result i32) + (i32.lt_s + (call $36 + (local.get $0) + (local.get $1) + ) + (i32.const 0) + ) + (i32.const 1) + (block (result i32) + (if + (i32.ne + (i32.load8_s offset=75 + (local.get $1) + ) + (i32.const 10) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $3 + (i32.add + (local.get $1) + (i32.const 20) + ) + ) + ) + ) + (i32.load offset=16 + (local.get $1) + ) + ) + (block + (i32.store + (local.get $3) + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $0) + (i32.const 10) + ) + (br $label$4 + (i32.const 0) + ) + ) + ) + ) + (i32.lt_s + (call $32 + (local.get $1) + (i32.const 10) + ) + (i32.const 0) + ) + ) + ) + ) + ) + (if + (local.get $2) + (call $13 + (local.get $1) + ) + ) + (i32.shr_s + (i32.shl + (local.get $0) + (i32.const 31) + ) + (i32.const 31) + ) + ) + ) + (func $36 (; 49 ;) (type $6) (param $0 i32) (param $1 i32) (result i32) + (i32.add + (call $33 + (local.get $0) + (call $31 + (local.get $0) + ) + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + (func $37 (; 50 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (block $label$1 (result i32) + (local.set $14 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (local.set $18 + (local.get $14) + ) + (block $label$2 + (if + (i32.lt_u + (local.get $0) + (i32.const 245) + ) + (block + (local.set $3 + (i32.and + (i32.add + (local.get $0) + (i32.const 11) + ) + (i32.const -8) + ) + ) + (if + (i32.and + (local.tee $0 + (i32.shr_u + (local.tee $8 + (i32.load + (i32.const 4176) + ) + ) + (local.tee $2 + (i32.shr_u + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 11) + ) + (local.tee $3 + (i32.const 16) + ) + (local.get $3) + ) + (i32.const 3) + ) + ) + ) + ) + (i32.const 3) + ) + (block + (local.set $4 + (i32.load + (local.tee $1 + (i32.add + (local.tee $7 + (i32.load + (local.tee $3 + (i32.add + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $5 + (i32.add + (i32.xor + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $4) + ) + (i32.store + (i32.const 4176) + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $5) + ) + (i32.const -1) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + ) + (local.get $7) + ) + (block + (i32.store + (local.get $0) + (local.get $2) + ) + (i32.store + (local.get $3) + (local.get $4) + ) + ) + (call $fimport$8) + ) + ) + ) + (i32.store offset=4 + (local.get $7) + (i32.or + (local.tee $0 + (i32.shl + (local.get $5) + (i32.const 3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $7) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $1) + ) + ) + ) + (if + (i32.gt_u + (local.get $3) + (local.tee $16 + (i32.load + (i32.const 4184) + ) + ) + ) + (block + (if + (local.get $0) + (block + (local.set $5 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.tee $0 + (i32.and + (i32.shl + (local.get $0) + (local.get $2) + ) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $2) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $12 + (i32.load + (local.tee $5 + (i32.add + (local.tee $9 + (i32.load + (local.tee $2 + (i32.add + (local.tee $4 + (i32.add + (i32.shl + (i32.shl + (local.tee $11 + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $0) + (local.get $5) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $4) + (local.get $12) + ) + (i32.store + (i32.const 4176) + (local.tee $7 + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $11) + ) + (i32.const -1) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $12) + (i32.const 12) + ) + ) + ) + (local.get $9) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store + (local.get $2) + (local.get $12) + ) + (local.set $7 + (local.get $8) + ) + ) + (call $fimport$8) + ) + ) + ) + (i32.store offset=4 + (local.get $9) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.tee $4 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.or + (local.tee $11 + (i32.sub + (i32.shl + (local.get $11) + (i32.const 3) + ) + (local.get $3) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $4) + (local.get $11) + ) + (local.get $11) + ) + (if + (local.get $16) + (block + (local.set $9 + (i32.load + (i32.const 4196) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (if + (i32.and + (local.get $7) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (local.set $6 + (local.get $3) + ) + (local.set $1 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 4176) + (i32.or + (local.get $7) + (local.get $0) + ) + ) + (local.set $6 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $1) + (local.get $9) + ) + (i32.store offset=8 + (local.get $9) + (local.get $1) + ) + (i32.store offset=12 + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 4184) + (local.get $11) + ) + (i32.store + (i32.const 4196) + (local.get $4) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $5) + ) + ) + ) + (if + (local.tee $6 + (i32.load + (i32.const 4180) + ) + ) + (block + (local.set $2 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $6) + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $9 + (i32.sub + (i32.and + (i32.load offset=4 + (local.tee $2 + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $0) + (local.get $2) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $2) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $1 + (local.get $2) + ) + (loop $label$25 + (block $label$26 + (if + (i32.eqz + (local.tee $0 + (i32.load offset=16 + (local.get $1) + ) + ) + ) + (br_if $label$26 + (i32.eqz + (local.tee $0 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + ) + ) + (if + (local.tee $7 + (i32.lt_u + (local.tee $1 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.get $9) + ) + ) + (local.set $9 + (local.get $1) + ) + ) + (local.set $1 + (local.get $0) + ) + (if + (local.get $7) + (local.set $2 + (local.get $0) + ) + ) + (br $label$25) + ) + ) + (if + (i32.lt_u + (local.get $2) + (local.tee $12 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (if + (i32.ge_u + (local.get $2) + (local.tee $13 + (i32.add + (local.get $2) + (local.get $3) + ) + ) + ) + (call $fimport$8) + ) + (local.set $15 + (i32.load offset=24 + (local.get $2) + ) + ) + (block $label$32 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $2) + ) + ) + (local.get $2) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (br $label$32) + ) + ) + ) + (loop $label$36 + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$8) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $4 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $11 + (i32.load offset=8 + (local.get $2) + ) + ) + (local.get $12) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 12) + ) + ) + ) + (local.get $2) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $2) + ) + (block + (i32.store + (local.get $7) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $11) + ) + (local.set $4 + (local.get $0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + (block $label$46 + (if + (local.get $15) + (block + (if + (i32.eq + (local.get $2) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $2) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (if + (i32.eqz + (local.get $4) + ) + (block + (i32.store + (i32.const 4180) + (i32.and + (local.get $6) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$46) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $15) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $15) + (i32.const 16) + ) + ) + ) + (local.get $2) + ) + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store offset=20 + (local.get $15) + (local.get $4) + ) + ) + (br_if $label$46 + (i32.eqz + (local.get $4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.tee $0 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (i32.store offset=24 + (local.get $4) + (local.get $15) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$8) + (block + (i32.store offset=16 + (local.get $4) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $4) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store offset=20 + (local.get $4) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $4) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.tee $0 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $9) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $13) + (local.get $9) + ) + (local.get $9) + ) + (if + (local.get $16) + (block + (local.set $7 + (i32.load + (i32.const 4196) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (if + (i32.and + (local.get $8) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (local.set $10 + (local.get $1) + ) + (local.set $5 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 4176) + (i32.or + (local.get $8) + (local.get $0) + ) + ) + (local.set $10 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $5 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $10) + (local.get $7) + ) + (i32.store offset=12 + (local.get $5) + (local.get $7) + ) + (i32.store offset=8 + (local.get $7) + (local.get $5) + ) + (i32.store offset=12 + (local.get $7) + (local.get $3) + ) + ) + ) + (i32.store + (i32.const 4184) + (local.get $9) + ) + (i32.store + (i32.const 4196) + (local.get $13) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $0) + (i32.const -65) + ) + (local.set $0 + (i32.const -1) + ) + (block + (local.set $7 + (i32.and + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 11) + ) + ) + (i32.const -8) + ) + ) + (if + (local.tee $5 + (i32.load + (i32.const 4180) + ) + ) + (block + (local.set $17 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $0) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (local.set $3 + (i32.sub + (i32.const 0) + (local.get $7) + ) + ) + (block $label$78 + (block $label$79 + (block $label$80 + (if + (local.tee $1 + (i32.load + (i32.add + (i32.shl + (local.get $17) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + (block + (local.set $0 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $17) + (i32.const 1) + ) + ) + ) + (local.set $4 + (i32.const 0) + ) + (local.set $10 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $17) + (i32.const 31) + ) + (i32.const 0) + (local.get $0) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$84 + (if + (i32.lt_u + (local.tee $6 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + (if + (local.get $6) + (block + (local.set $3 + (local.get $6) + ) + (local.set $0 + (local.get $1) + ) + ) + (block + (local.set $3 + (i32.const 0) + ) + (local.set $0 + (local.get $1) + ) + (br $label$79) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.or + (i32.eqz + (local.tee $19 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + (i32.eq + (local.get $19) + (local.tee $6 + (i32.load + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $10) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + (local.get $4) + (local.get $19) + ) + ) + (local.set $10 + (i32.shl + (local.get $10) + (i32.xor + (i32.and + (local.tee $4 + (i32.eqz + (local.get $6) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (if + (local.get $4) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $0) + ) + (br $label$80) + ) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $6) + ) + (br $label$84) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (local.set $1 + (i32.const 0) + ) + ) + ) + ) + (br_if $label$79 + (local.tee $0 + (if (result i32) + (i32.and + (i32.eqz + (local.get $4) + ) + (i32.eqz + (local.get $1) + ) + ) + (block (result i32) + (if + (i32.eqz + (local.tee $0 + (i32.and + (local.get $5) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $17) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (br $label$2) + ) + ) + (local.set $10 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $0) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $0) + (local.get $10) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $10) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + (local.get $4) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + (br $label$78) + ) + (loop $label$96 + (if + (local.tee $10 + (i32.lt_u + (local.tee $4 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + ) + (local.set $3 + (local.get $4) + ) + ) + (if + (local.get $10) + (local.set $1 + (local.get $0) + ) + ) + (if + (local.tee $4 + (i32.load offset=16 + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$96) + ) + ) + (br_if $label$96 + (local.tee $0 + (i32.load offset=20 + (local.get $0) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + ) + ) + (if + (local.get $4) + (if + (i32.lt_u + (local.get $3) + (i32.sub + (i32.load + (i32.const 4184) + ) + (local.get $7) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.tee $12 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (if + (i32.ge_u + (local.get $4) + (local.tee $6 + (i32.add + (local.get $4) + (local.get $7) + ) + ) + ) + (call $fimport$8) + ) + (local.set $10 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$104 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (br $label$104) + ) + ) + ) + (loop $label$108 + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$8) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $9 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $12) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load + (local.tee $11 + (i32.add + (local.get $9) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $11) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $9) + ) + (local.set $13 + (local.get $0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + (block $label$118 + (if + (local.get $10) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $13) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (i32.store + (i32.const 4180) + (local.tee $2 + (i32.and + (local.get $5) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + ) + (br $label$118) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $10) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $10) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $13) + ) + (i32.store offset=20 + (local.get $10) + (local.get $13) + ) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (local.set $2 + (local.get $5) + ) + (br $label$118) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $13) + (local.tee $0 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (i32.store offset=24 + (local.get $13) + (local.get $10) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$8) + (block + (i32.store offset=16 + (local.get $13) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $13) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store offset=20 + (local.get $13) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $13) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (block $label$136 + (if + (i32.lt_u + (local.get $3) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.tee $0 + (i32.add + (local.get $3) + (local.get $7) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $4) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $7) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $3) + ) + (local.get $3) + ) + (local.set $0 + (i32.shr_u + (local.get $3) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $3) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 4176) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (local.set $16 + (local.get $1) + ) + (local.set $8 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 4176) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $16 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $8 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $16) + (local.get $6) + ) + (i32.store offset=12 + (local.get $8) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $8) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$136) + ) + ) + (local.set $1 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $3) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $3) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $3) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $5 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $5) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.get $2) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 4180) + (i32.or + (local.get $2) + (local.get $0) + ) + ) + (i32.store + (local.get $1) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $1) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (local.set $0 + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $3) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$151 + (block $label$152 + (block $label$153 + (loop $label$154 + (br_if $label$152 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$153 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $0 + (local.get $1) + ) + (br $label$154) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store + (local.get $5) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (br $label$151) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 4192) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.tee $1 + (i32.load + (i32.const 4184) + ) + ) + (local.get $0) + ) + (block + (local.set $2 + (i32.load + (i32.const 4196) + ) + ) + (if + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + (i32.const 15) + ) + (block + (i32.store + (i32.const 4196) + (local.tee $1 + (i32.add + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 4184) + (local.get $3) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $1) + (local.get $3) + ) + (local.get $3) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + ) + (block + (i32.store + (i32.const 4184) + (i32.const 0) + ) + (i32.store + (i32.const 4196) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $10 + (i32.load + (i32.const 4188) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 4188) + (local.tee $3 + (i32.sub + (local.get $10) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 4200) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 4200) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.le_u + (local.tee $6 + (i32.and + (local.tee $8 + (i32.add + (local.tee $1 + (if (result i32) + (i32.load + (i32.const 4648) + ) + (i32.load + (i32.const 4656) + ) + (block (result i32) + (i32.store + (i32.const 4656) + (i32.const 4096) + ) + (i32.store + (i32.const 4652) + (i32.const 4096) + ) + (i32.store + (i32.const 4660) + (i32.const -1) + ) + (i32.store + (i32.const 4664) + (i32.const -1) + ) + (i32.store + (i32.const 4668) + (i32.const 0) + ) + (i32.store + (i32.const 4620) + (i32.const 0) + ) + (i32.store + (local.get $18) + (local.tee $1 + (i32.xor + (i32.and + (local.get $18) + (i32.const -16) + ) + (i32.const 1431655768) + ) + ) + ) + (i32.store + (i32.const 4648) + (local.get $1) + ) + (i32.const 4096) + ) + ) + ) + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 47) + ) + ) + ) + ) + (local.tee $4 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + ) + ) + (local.get $0) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + (if + (local.tee $2 + (i32.load + (i32.const 4616) + ) + ) + (if + (i32.or + (i32.le_u + (local.tee $1 + (i32.add + (local.tee $3 + (i32.load + (i32.const 4608) + ) + ) + (local.get $6) + ) + ) + (local.get $3) + ) + (i32.gt_u + (local.get $1) + (local.get $2) + ) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + (block $label$171 + (block $label$172 + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 4620) + ) + (i32.const 4) + ) + ) + (block + (block $label$174 + (block $label$175 + (block $label$176 + (br_if $label$176 + (i32.eqz + (local.tee $3 + (i32.load + (i32.const 4200) + ) + ) + ) + ) + (local.set $2 + (i32.const 4624) + ) + (loop $label$177 + (block $label$178 + (if + (i32.le_u + (local.tee $1 + (i32.load + (local.get $2) + ) + ) + (local.get $3) + ) + (br_if $label$178 + (i32.gt_u + (i32.add + (local.get $1) + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $3) + ) + ) + ) + (br_if $label$176 + (i32.eqz + (local.tee $1 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (br $label$177) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.sub + (local.get $8) + (local.get $10) + ) + (local.get $4) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (local.tee $1 + (call $45 + (local.get $3) + ) + ) + (i32.add + (i32.load + (local.get $2) + ) + (i32.load + (local.get $5) + ) + ) + ) + (br_if $label$172 + (i32.ne + (local.get $1) + (i32.const -1) + ) + ) + (block + (local.set $2 + (local.get $1) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + (br $label$174) + ) + (if + (i32.ne + (local.tee $1 + (call $45 + (i32.const 0) + ) + ) + (i32.const -1) + ) + (block + (local.set $2 + (i32.sub + (i32.and + (i32.add + (local.tee $5 + (i32.add + (local.tee $2 + (i32.load + (i32.const 4652) + ) + ) + (i32.const -1) + ) + ) + (local.tee $3 + (local.get $1) + ) + ) + (i32.sub + (i32.const 0) + (local.get $2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.add + (local.tee $3 + (i32.add + (if (result i32) + (i32.and + (local.get $5) + (local.get $3) + ) + (local.get $2) + (i32.const 0) + ) + (local.get $6) + ) + ) + (local.tee $5 + (i32.load + (i32.const 4608) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $3) + (local.get $0) + ) + (i32.lt_u + (local.get $3) + (i32.const 2147483647) + ) + ) + (block + (if + (local.tee $2 + (i32.load + (i32.const 4616) + ) + ) + (br_if $label$174 + (i32.or + (i32.le_u + (local.get $4) + (local.get $5) + ) + (i32.gt_u + (local.get $4) + (local.get $2) + ) + ) + ) + ) + (br_if $label$172 + (i32.eq + (local.tee $2 + (call $45 + (local.get $3) + ) + ) + (local.get $1) + ) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + ) + (br $label$174) + ) + (local.set $5 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $7) + (local.get $1) + ) + (i32.and + (i32.lt_u + (local.get $1) + (i32.const 2147483647) + ) + (i32.ne + (local.get $2) + (i32.const -1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.add + (i32.sub + (local.get $13) + (local.get $1) + ) + (local.tee $3 + (i32.load + (i32.const 4656) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $3) + ) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (call $45 + (local.get $3) + ) + (i32.const -1) + ) + (block + (drop + (call $45 + (local.get $5) + ) + ) + (br $label$174) + ) + (local.set $3 + (i32.add + (local.get $3) + (local.get $1) + ) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (if + (i32.ne + (local.get $2) + (i32.const -1) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$172) + ) + ) + ) + (i32.store + (i32.const 4620) + (i32.or + (i32.load + (i32.const 4620) + ) + (i32.const 4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $6) + (i32.const 2147483647) + ) + (if + (i32.and + (i32.lt_u + (local.tee $1 + (call $45 + (local.get $6) + ) + ) + (local.tee $3 + (call $45 + (i32.const 0) + ) + ) + ) + (i32.and + (i32.ne + (local.get $1) + (i32.const -1) + ) + (i32.ne + (local.get $3) + (i32.const -1) + ) + ) + ) + (br_if $label$172 + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $3) + (local.get $1) + ) + ) + (i32.add + (local.get $0) + (i32.const 40) + ) + ) + ) + ) + ) + (br $label$171) + ) + (i32.store + (i32.const 4608) + (local.tee $2 + (i32.add + (i32.load + (i32.const 4608) + ) + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $2) + (i32.load + (i32.const 4612) + ) + ) + (i32.store + (i32.const 4612) + (local.get $2) + ) + ) + (block $label$198 + (if + (local.tee $8 + (i32.load + (i32.const 4200) + ) + ) + (block + (local.set $2 + (i32.const 4624) + ) + (block $label$200 + (block $label$201 + (loop $label$202 + (br_if $label$201 + (i32.eq + (local.get $1) + (i32.add + (local.tee $4 + (i32.load + (local.get $2) + ) + ) + (local.tee $5 + (i32.load + (local.tee $7 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + ) + ) + ) + (br_if $label$202 + (local.tee $2 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (br $label$200) + ) + (if + (i32.eqz + (i32.and + (i32.load offset=12 + (local.get $2) + ) + (i32.const 8) + ) + ) + (if + (i32.and + (i32.lt_u + (local.get $8) + (local.get $1) + ) + (i32.ge_u + (local.get $8) + (local.get $4) + ) + ) + (block + (i32.store + (local.get $7) + (i32.add + (local.get $5) + (local.get $3) + ) + ) + (local.set $5 + (i32.load + (i32.const 4188) + ) + ) + (local.set $1 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $8) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 4200) + (local.tee $2 + (i32.add + (local.get $8) + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $1) + (local.tee $1 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 4188) + (local.tee $1 + (i32.add + (i32.sub + (local.get $3) + (local.get $1) + ) + (local.get $5) + ) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 4204) + (i32.load + (i32.const 4664) + ) + ) + (br $label$198) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.tee $2 + (i32.load + (i32.const 4192) + ) + ) + ) + (block + (i32.store + (i32.const 4192) + (local.get $1) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (local.set $10 + (i32.add + (local.get $1) + (local.get $3) + ) + ) + (local.set $5 + (i32.const 4624) + ) + (block $label$208 + (block $label$209 + (loop $label$210 + (br_if $label$209 + (i32.eq + (i32.load + (local.get $5) + ) + (local.get $10) + ) + ) + (br_if $label$210 + (local.tee $5 + (i32.load offset=8 + (local.get $5) + ) + ) + ) + (local.set $5 + (i32.const 4624) + ) + ) + (br $label$208) + ) + (if + (i32.and + (i32.load offset=12 + (local.get $5) + ) + (i32.const 8) + ) + (local.set $5 + (i32.const 4624) + ) + (block + (i32.store + (local.get $5) + (local.get $1) + ) + (i32.store + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $3) + ) + ) + (local.set $7 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.get $10) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $6 + (i32.add + (local.tee $13 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $7) + (i32.const 0) + ) + ) + ) + (local.get $0) + ) + ) + (local.set $7 + (i32.sub + (i32.sub + (local.tee $4 + (i32.add + (local.get $10) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + (local.get $13) + ) + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (block $label$217 + (if + (i32.eq + (local.get $4) + (local.get $8) + ) + (block + (i32.store + (i32.const 4188) + (local.tee $0 + (i32.add + (i32.load + (i32.const 4188) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 4200) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + ) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (i32.const 4196) + ) + ) + (block + (i32.store + (i32.const 4184) + (local.tee $0 + (i32.add + (i32.load + (i32.const 4184) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 4196) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $0) + ) + (local.get $0) + ) + (br $label$217) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (local.tee $0 + (if (result i32) + (i32.eq + (i32.and + (local.tee $0 + (i32.load offset=4 + (local.get $4) + ) + ) + (i32.const 3) + ) + (i32.const 1) + ) + (block (result i32) + (local.set $11 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$222 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $5 + (i32.load offset=12 + (local.get $4) + ) + ) + (block $label$224 + (if + (i32.ne + (local.tee $3 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $2) + ) + (call $fimport$8) + ) + (br_if $label$224 + (i32.eq + (i32.load offset=12 + (local.get $3) + ) + (local.get $4) + ) + ) + (call $fimport$8) + ) + ) + ) + (if + (i32.eq + (local.get $5) + (local.get $3) + ) + (block + (i32.store + (i32.const 4176) + (i32.and + (i32.load + (i32.const 4176) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + ) + (block $label$228 + (if + (i32.eq + (local.get $5) + (local.get $0) + ) + (local.set $20 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $5) + (local.get $2) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (local.set $20 + (local.get $0) + ) + (br $label$228) + ) + ) + (call $fimport$8) + ) + ) + ) + (i32.store offset=12 + (local.get $3) + (local.get $5) + ) + (i32.store + (local.get $20) + (local.get $3) + ) + ) + (block + (local.set $8 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$234 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (local.get $3) + ) + (block + (local.set $12 + (i32.const 0) + ) + (br $label$234) + ) + ) + ) + (loop $label$239 + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $2) + ) + (call $fimport$8) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $12 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $2) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load + (local.tee $3 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $3) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $5) + ) + (local.set $12 + (local.get $0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $8) + ) + ) + (block $label$249 + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $12) + ) + (br_if $label$249 + (local.get $12) + ) + (i32.store + (i32.const 4180) + (i32.and + (i32.load + (i32.const 4180) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + (block + (if + (i32.lt_u + (local.get $8) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $12) + ) + (i32.store offset=20 + (local.get $8) + (local.get $12) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $12) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $12) + (local.tee $1 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (i32.store offset=24 + (local.get $12) + (local.get $8) + ) + (if + (local.tee $3 + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $3) + (local.get $1) + ) + (call $fimport$8) + (block + (i32.store offset=16 + (local.get $12) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $12) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.tee $0 + (i32.load offset=4 + (local.get $0) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store offset=20 + (local.get $12) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $12) + ) + ) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $11) + (local.get $7) + ) + ) + (i32.add + (local.get $4) + (local.get $11) + ) + ) + (local.get $4) + ) + ) + (i32.const 4) + ) + ) + (i32.and + (i32.load + (local.get $0) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $7) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $7) + ) + (local.get $7) + ) + (local.set $0 + (i32.shr_u + (local.get $7) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $7) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (block $label$263 + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 4176) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (if + (i32.ge_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (block + (local.set $21 + (local.get $1) + ) + (local.set $9 + (local.get $0) + ) + (br $label$263) + ) + ) + (call $fimport$8) + ) + (block + (i32.store + (i32.const 4176) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $21 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $9 + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $21) + (local.get $6) + ) + (i32.store offset=12 + (local.get $9) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$217) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (local.tee $2 + (block $label$267 (result i32) + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $7) + (i32.const 8) + ) + ) + (block (result i32) + (drop + (br_if $label$267 + (i32.const 31) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + ) + ) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $2) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (i32.const 4180) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $2) + ) + ) + ) + ) + (block + (i32.store + (i32.const 4180) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $3) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (local.set $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $2) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $2) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$273 + (block $label$274 + (block $label$275 + (loop $label$276 + (br_if $label$274 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.set $3 + (i32.shl + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$275 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $2) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $2 + (local.get $3) + ) + (local.set $0 + (local.get $1) + ) + (br $label$276) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store + (local.get $2) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (br $label$273) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 4192) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $13) + (i32.const 8) + ) + ) + ) + ) + ) + (loop $label$281 + (block $label$282 + (if + (i32.le_u + (local.tee $2 + (i32.load + (local.get $5) + ) + ) + (local.get $8) + ) + (br_if $label$282 + (i32.gt_u + (local.tee $13 + (i32.add + (local.get $2) + (i32.load offset=4 + (local.get $5) + ) + ) + ) + (local.get $8) + ) + ) + ) + (local.set $5 + (i32.load offset=8 + (local.get $5) + ) + ) + (br $label$281) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.tee $7 + (i32.add + (local.get $13) + (i32.const -47) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $10 + (i32.add + (local.tee $7 + (if (result i32) + (i32.lt_u + (local.tee $2 + (i32.add + (local.get $7) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $2) + (i32.const 0) + ) + ) + ) + (local.tee $12 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $8) + (local.get $2) + ) + ) + (i32.const 8) + ) + ) + (local.set $5 + (i32.add + (local.get $7) + (i32.const 24) + ) + ) + (local.set $9 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 4200) + (local.tee $4 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $2) + (local.tee $2 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 4188) + (local.tee $2 + (i32.sub + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $4) + (local.get $2) + ) + (i32.const 40) + ) + (i32.store + (i32.const 4204) + (i32.load + (i32.const 4664) + ) + ) + (i32.store + (local.tee $2 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (i32.const 27) + ) + (i64.store align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4624) + ) + ) + (i64.store offset=8 align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4632) + ) + ) + (i32.store + (i32.const 4624) + (local.get $1) + ) + (i32.store + (i32.const 4628) + (local.get $3) + ) + (i32.store + (i32.const 4636) + (i32.const 0) + ) + (i32.store + (i32.const 4632) + (local.get $10) + ) + (local.set $1 + (local.get $5) + ) + (loop $label$290 + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.const 7) + ) + (br_if $label$290 + (i32.lt_u + (i32.add + (local.get $1) + (i32.const 4) + ) + (local.get $13) + ) + ) + ) + (if + (i32.ne + (local.get $7) + (local.get $8) + ) + (block + (i32.store + (local.get $2) + (i32.and + (i32.load + (local.get $2) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $8) + (i32.or + (local.tee $4 + (i32.sub + (local.get $7) + (local.get $8) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (local.get $7) + (local.get $4) + ) + (local.set $1 + (i32.shr_u + (local.get $4) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.const 256) + ) + (block + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (if + (i32.and + (local.tee $3 + (i32.load + (i32.const 4176) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (local.set $15 + (local.get $3) + ) + (local.set $11 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 4176) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $11 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $8) + ) + (i32.store offset=12 + (local.get $11) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $11) + ) + (i32.store offset=12 + (local.get $8) + (local.get $2) + ) + (br $label$198) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $1 + (i32.shr_u + (local.get $4) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $4) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $4) + (i32.add + (local.tee $1 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $1) + (local.tee $2 + (i32.and + (i32.shr_u + (i32.add + (local.get $1) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $2) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $3) + (local.get $1) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $3) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + (i32.store offset=28 + (local.get $8) + (local.get $5) + ) + (i32.store offset=20 + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $12) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $3 + (i32.load + (i32.const 4180) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 4180) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $2) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (local.set $1 + (i32.load + (local.get $2) + ) + ) + (local.set $3 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $4) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $3) + ) + ) + ) + (block $label$304 + (block $label$305 + (block $label$306 + (loop $label$307 + (br_if $label$305 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $4) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$306 + (i32.eqz + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $1 + (local.get $3) + ) + (br $label$307) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store + (local.get $5) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $1) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (br $label$304) + ) + (if + (i32.and + (i32.ge_u + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + ) + (local.tee $3 + (i32.load + (i32.const 4192) + ) + ) + ) + (i32.ge_u + (local.get $1) + (local.get $3) + ) + ) + (block + (i32.store offset=12 + (local.get $5) + (local.get $8) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $5) + ) + (i32.store offset=12 + (local.get $8) + (local.get $1) + ) + (i32.store offset=24 + (local.get $8) + (i32.const 0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + ) + (block + (if + (i32.or + (i32.eqz + (local.tee $2 + (i32.load + (i32.const 4192) + ) + ) + ) + (i32.lt_u + (local.get $1) + (local.get $2) + ) + ) + (i32.store + (i32.const 4192) + (local.get $1) + ) + ) + (i32.store + (i32.const 4624) + (local.get $1) + ) + (i32.store + (i32.const 4628) + (local.get $3) + ) + (i32.store + (i32.const 4636) + (i32.const 0) + ) + (i32.store + (i32.const 4212) + (i32.load + (i32.const 4648) + ) + ) + (i32.store + (i32.const 4208) + (i32.const -1) + ) + (local.set $2 + (i32.const 0) + ) + (loop $label$314 + (i32.store offset=12 + (local.tee $5 + (i32.add + (i32.shl + (i32.shl + (local.get $2) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (local.get $5) + ) + (i32.store offset=8 + (local.get $5) + (local.get $5) + ) + (br_if $label$314 + (i32.ne + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 32) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 4200) + (local.tee $3 + (i32.add + (local.get $1) + (local.tee $1 + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 4188) + (local.tee $1 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $3) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 4204) + (i32.load + (i32.const 4664) + ) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $1 + (i32.load + (i32.const 4188) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 4188) + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 4200) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 4200) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + ) + (i32.store + (call $12) + (i32.const 12) + ) + (global.set $global$1 + (local.get $14) + ) + (i32.const 0) + ) + ) + (func $38 (; 51 ;) (type $3) (param $0 i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (block $label$1 + (if + (i32.eqz + (local.get $0) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.add + (local.get $0) + (i32.const -8) + ) + ) + (local.tee $11 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (local.tee $8 + (i32.and + (local.tee $0 + (i32.load + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + ) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (call $fimport$8) + ) + (local.set $6 + (i32.add + (local.get $1) + (local.tee $4 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + ) + ) + (block $label$5 + (if + (i32.and + (local.get $0) + (i32.const 1) + ) + (block + (local.set $3 + (local.get $1) + ) + (local.set $2 + (local.get $4) + ) + ) + (block + (if + (i32.eqz + (local.get $8) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.add + (local.get $1) + (i32.sub + (i32.const 0) + (local.tee $8 + (i32.load + (local.get $1) + ) + ) + ) + ) + ) + (local.get $11) + ) + (call $fimport$8) + ) + (local.set $1 + (i32.add + (local.get $8) + (local.get $4) + ) + ) + (if + (i32.eq + (local.get $0) + (i32.load + (i32.const 4196) + ) + ) + (block + (if + (i32.ne + (i32.and + (local.tee $3 + (i32.load + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 3) + ) + (i32.const 3) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (i32.store + (i32.const 4184) + (local.get $1) + ) + (i32.store + (local.get $2) + (i32.and + (local.get $3) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $0) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $0) + (local.get $1) + ) + (local.get $1) + ) + (return) + ) + ) + (local.set $10 + (i32.shr_u + (local.get $8) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $8) + (i32.const 256) + ) + (block + (local.set $3 + (i32.load offset=12 + (local.get $0) + ) + ) + (if + (i32.ne + (local.tee $4 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.get $10) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.get $11) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $4) + ) + (local.get $0) + ) + (call $fimport$8) + ) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $4) + ) + (block + (i32.store + (i32.const 4176) + (i32.and + (i32.load + (i32.const 4176) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $10) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $2) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $11) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $2 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (local.set $5 + (local.get $2) + ) + (call $fimport$8) + ) + ) + ) + (i32.store offset=12 + (local.get $4) + (local.get $3) + ) + (i32.store + (local.get $5) + (local.get $4) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (local.set $12 + (i32.load offset=24 + (local.get $0) + ) + ) + (block $label$22 + (if + (i32.eq + (local.tee $4 + (i32.load offset=12 + (local.get $0) + ) + ) + (local.get $0) + ) + (block + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $5 + (local.get $8) + ) + (if + (i32.eqz + (local.tee $4 + (i32.load + (local.get $5) + ) + ) + ) + (block + (local.set $7 + (i32.const 0) + ) + (br $label$22) + ) + ) + ) + (loop $label$27 + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + ) + (if + (i32.lt_u + (local.get $5) + (local.get $11) + ) + (call $fimport$8) + (block + (i32.store + (local.get $5) + (i32.const 0) + ) + (local.set $7 + (local.get $4) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.get $11) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load + (local.tee $8 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $0) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $10 + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (block + (i32.store + (local.get $8) + (local.get $4) + ) + (i32.store + (local.get $10) + (local.get $5) + ) + (local.set $7 + (local.get $4) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + (if + (local.get $12) + (block + (if + (i32.eq + (local.get $0) + (i32.load + (local.tee $5 + (i32.add + (i32.shl + (local.tee $4 + (i32.load offset=28 + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + ) + (block + (i32.store + (local.get $5) + (local.get $7) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (i32.store + (i32.const 4180) + (i32.and + (i32.load + (i32.const 4180) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $12) + (i32.const 16) + ) + ) + ) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $7) + ) + (i32.store offset=20 + (local.get $12) + (local.get $7) + ) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $7) + (local.tee $5 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (i32.store offset=24 + (local.get $7) + (local.get $12) + ) + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.get $5) + ) + (call $fimport$8) + (block + (i32.store offset=16 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + ) + ) + ) + (if + (local.tee $4 + (i32.load offset=4 + (local.get $8) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store offset=20 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.get $3) + (local.get $6) + ) + (call $fimport$8) + ) + (if + (i32.eqz + (i32.and + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 1) + ) + ) + (call $fimport$8) + ) + (if + (i32.and + (local.get $0) + (i32.const 2) + ) + (block + (i32.store + (local.get $1) + (i32.and + (local.get $0) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $2) + ) + (local.get $2) + ) + ) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 4200) + ) + ) + (block + (i32.store + (i32.const 4188) + (local.tee $0 + (i32.add + (i32.load + (i32.const 4188) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 4200) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (if + (i32.ne + (local.get $3) + (i32.load + (i32.const 4196) + ) + ) + (return) + ) + (i32.store + (i32.const 4196) + (i32.const 0) + ) + (i32.store + (i32.const 4184) + (i32.const 0) + ) + (return) + ) + ) + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 4196) + ) + ) + (block + (i32.store + (i32.const 4184) + (local.tee $0 + (i32.add + (i32.load + (i32.const 4184) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 4196) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $0) + ) + (local.get $0) + ) + (return) + ) + ) + (local.set $5 + (i32.add + (i32.and + (local.get $0) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$61 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $2 + (i32.load offset=12 + (local.get $6) + ) + ) + (if + (i32.ne + (local.tee $1 + (i32.load offset=8 + (local.get $6) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $4) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $1) + ) + (local.get $6) + ) + (call $fimport$8) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $1) + ) + (block + (i32.store + (i32.const 4176) + (i32.and + (i32.load + (i32.const 4176) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $0) + ) + (local.set $14 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (local.set $14 + (local.get $0) + ) + (call $fimport$8) + ) + ) + ) + (i32.store offset=12 + (local.get $1) + (local.get $2) + ) + (i32.store + (local.get $14) + (local.get $1) + ) + ) + (block + (local.set $7 + (i32.load offset=24 + (local.get $6) + ) + ) + (block $label$73 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $6) + ) + ) + (local.get $6) + ) + (block + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.get $2) + ) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$73) + ) + ) + ) + (loop $label$78 + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store + (local.get $2) + (i32.const 0) + ) + (local.set $9 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $2 + (i32.load offset=8 + (local.get $6) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.ne + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 12) + ) + ) + ) + (local.get $6) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (block + (i32.store + (local.get $1) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $2) + ) + (local.set $9 + (local.get $0) + ) + ) + (call $fimport$8) + ) + ) + ) + ) + (if + (local.get $7) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (local.tee $2 + (i32.add + (i32.shl + (local.tee $0 + (i32.load offset=28 + (local.get $6) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + ) + ) + (block + (i32.store + (local.get $2) + (local.get $9) + ) + (if + (i32.eqz + (local.get $9) + ) + (block + (i32.store + (i32.const 4180) + (i32.and + (i32.load + (i32.const 4180) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $0) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $7) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $7) + (i32.const 16) + ) + ) + ) + (local.get $6) + ) + (i32.store + (local.get $0) + (local.get $9) + ) + (i32.store offset=20 + (local.get $7) + (local.get $9) + ) + ) + (br_if $label$61 + (i32.eqz + (local.get $9) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (local.tee $2 + (i32.load + (i32.const 4192) + ) + ) + ) + (call $fimport$8) + ) + (i32.store offset=24 + (local.get $9) + (local.get $7) + ) + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (local.get $2) + ) + (call $fimport$8) + (block + (i32.store offset=16 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=4 + (local.get $1) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store offset=20 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $5) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $5) + ) + (local.get $5) + ) + (if + (i32.eq + (local.get $3) + (i32.load + (i32.const 4196) + ) + ) + (block + (i32.store + (i32.const 4184) + (local.get $5) + ) + (return) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $2) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.const 256) + ) + (block + (local.set $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 4216) + ) + ) + (if + (i32.and + (local.tee $2 + (i32.load + (i32.const 4176) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (local.set $15 + (local.get $2) + ) + (local.set $13 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 4176) + (i32.or + (local.get $2) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $3) + ) + (i32.store offset=12 + (local.get $13) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $13) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (return) + ) + ) + (local.set $0 + (i32.add + (i32.shl + (local.tee $1 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $2) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $2) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $2) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $4 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $0) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $0 + (i32.shl + (local.get $1) + (local.get $4) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $0) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 4480) + ) + ) + (i32.store offset=28 + (local.get $3) + (local.get $1) + ) + (i32.store offset=20 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=16 + (local.get $3) + (i32.const 0) + ) + (block $label$113 + (if + (i32.and + (local.tee $4 + (i32.load + (i32.const 4180) + ) + ) + (local.tee $5 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (block + (local.set $0 + (i32.load + (local.get $0) + ) + ) + (local.set $4 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $1) + (i32.const 1) + ) + ) + ) + (local.set $1 + (i32.shl + (local.get $2) + (if (result i32) + (i32.eq + (local.get $1) + (i32.const 31) + ) + (i32.const 0) + (local.get $4) + ) + ) + ) + (block $label$117 + (block $label$118 + (block $label$119 + (loop $label$120 + (br_if $label$118 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$119 + (i32.eqz + (local.tee $5 + (i32.load + (local.tee $1 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $1) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $4) + ) + (local.set $0 + (local.get $5) + ) + (br $label$120) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 4192) + ) + ) + (call $fimport$8) + (block + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + (br $label$113) + ) + ) + (br $label$117) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $4 + (i32.load + (i32.const 4192) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $4) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $3) + ) + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $2) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (i32.store offset=24 + (local.get $3) + (i32.const 0) + ) + ) + (call $fimport$8) + ) + ) + ) + (block + (i32.store + (i32.const 4180) + (i32.or + (local.get $4) + (local.get $5) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + ) + ) + ) + (i32.store + (i32.const 4208) + (local.tee $0 + (i32.add + (i32.load + (i32.const 4208) + ) + (i32.const -1) + ) + ) + ) + (if + (local.get $0) + (return) + (local.set $0 + (i32.const 4632) + ) + ) + (loop $label$128 + (local.set $0 + (i32.add + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + ) + (br_if $label$128 + (local.get $2) + ) + ) + (i32.store + (i32.const 4208) + (i32.const -1) + ) + ) + ) + (func $39 (; 52 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (block $label$1 (result i32) + (if + (i32.eqz + (local.get $0) + ) + (local.set $0 + (i32.const 1) + ) + ) + (loop $label$3 + (block $label$4 + (if + (local.tee $1 + (call $37 + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $1) + ) + (br $label$4) + ) + ) + (if + (local.tee $1 + (call $43) + ) + (block + (call_indirect (type $1) + (i32.add + (i32.and + (local.get $1) + (i32.const 0) + ) + (i32.const 8) + ) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 0) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $40 (; 53 ;) (type $2) (param $0 i32) (result i32) + (call $39 + (local.get $0) + ) + ) + (func $41 (; 54 ;) (type $3) (param $0 i32) + (call $38 + (local.get $0) + ) + ) + (func $42 (; 55 ;) (type $3) (param $0 i32) + (call $41 + (local.get $0) + ) + ) + (func $43 (; 56 ;) (type $4) (result i32) + (local $0 i32) + (block $label$1 (result i32) + (i32.store + (i32.const 4672) + (i32.add + (local.tee $0 + (i32.load + (i32.const 4672) + ) + ) + (i32.const 0) + ) + ) + (local.get $0) + ) + ) + (func $44 (; 57 ;) (type $1) + (nop) + ) + (func $45 (; 58 ;) (type $2) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.add + (local.tee $2 + (i32.load + (global.get $global$0) + ) + ) + (local.tee $0 + (i32.and + (i32.add + (local.get $0) + (i32.const 15) + ) + (i32.const -16) + ) + ) + ) + ) + (if + (i32.or + (i32.and + (i32.gt_s + (local.get $0) + (i32.const 0) + ) + (i32.lt_s + (local.get $1) + (local.get $2) + ) + ) + (i32.lt_s + (local.get $1) + (i32.const 0) + ) + ) + (block + (drop + (call $fimport$6) + ) + (call $fimport$11 + (i32.const 12) + ) + (return + (i32.const -1) + ) + ) + ) + (i32.store + (global.get $global$0) + (local.get $1) + ) + (if + (i32.gt_s + (local.get $1) + (call $fimport$5) + ) + (if + (i32.eqz + (call $fimport$4) + ) + (block + (call $fimport$11 + (i32.const 12) + ) + (i32.store + (global.get $global$0) + (local.get $2) + ) + (return + (i32.const -1) + ) + ) + ) + ) + (local.get $2) + ) + ) + (func $46 (; 59 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (if + (i32.ge_s + (local.get $2) + (i32.const 20) + ) + (block + (local.set $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (if + (local.tee $3 + (i32.and + (local.get $0) + (i32.const 3) + ) + ) + (block + (local.set $3 + (i32.sub + (i32.add + (local.get $0) + (i32.const 4) + ) + (local.get $3) + ) + ) + (loop $label$4 + (if + (i32.lt_s + (local.get $0) + (local.get $3) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + ) + ) + (local.set $3 + (i32.or + (i32.or + (i32.or + (local.get $1) + (i32.shl + (local.get $1) + (i32.const 8) + ) + ) + (i32.shl + (local.get $1) + (i32.const 16) + ) + ) + (i32.shl + (local.get $1) + (i32.const 24) + ) + ) + ) + (local.set $5 + (i32.and + (local.get $4) + (i32.const -4) + ) + ) + (loop $label$6 + (if + (i32.lt_s + (local.get $0) + (local.get $5) + ) + (block + (i32.store + (local.get $0) + (local.get $3) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (br $label$6) + ) + ) + ) + ) + ) + (loop $label$8 + (if + (i32.lt_s + (local.get $0) + (local.get $4) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$8) + ) + ) + ) + (i32.sub + (local.get $0) + (local.get $2) + ) + ) + ) + (func $47 (; 60 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (block $label$1 (result i32) + (if + (i32.ge_s + (local.get $2) + (i32.const 4096) + ) + (return + (call $fimport$12 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (if + (i32.eq + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.and + (local.get $1) + (i32.const 3) + ) + ) + (block + (loop $label$4 + (if + (i32.and + (local.get $0) + (i32.const 3) + ) + (block + (if + (i32.eqz + (local.get $2) + ) + (return + (local.get $3) + ) + ) + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + (loop $label$7 + (if + (i32.ge_s + (local.get $2) + (i32.const 4) + ) + (block + (i32.store + (local.get $0) + (i32.load + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 4) + ) + ) + (br $label$7) + ) + ) + ) + ) + ) + (loop $label$9 + (if + (i32.gt_s + (local.get $2) + (i32.const 0) + ) + (block + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$9) + ) + ) + ) + (local.get $3) + ) + ) + (func $48 (; 61 ;) (type $4) (result i32) + (i32.const 0) + ) + (func $49 (; 62 ;) (type $6) (param $0 i32) (param $1 i32) (result i32) + (call_indirect (type $2) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 0) + ) + ) + ) + (func $50 (; 63 ;) (type $12) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (call_indirect (type $0) + (local.get $1) + (local.get $2) + (local.get $3) + (i32.add + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (func $51 (; 64 ;) (type $5) (param $0 i32) (param $1 i32) + (call_indirect (type $3) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 6) + ) + ) + ) + (func $52 (; 65 ;) (type $3) (param $0 i32) + (call_indirect (type $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 0) + ) + (i32.const 8) + ) + ) + ) + (func $53 (; 66 ;) (type $2) (param $0 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $54 (; 67 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 1) + ) + (i32.const 0) + ) + ) + (func $55 (; 68 ;) (type $3) (param $0 i32) + (call $fimport$3 + (i32.const 2) + ) + ) + (func $56 (; 69 ;) (type $1) + (call $fimport$3 + (i32.const 3) + ) + ) +) + diff --git a/cranelift/wasmtests/embenchen_ifs.wat b/cranelift/wasmtests/embenchen_ifs.wat new file mode 100644 index 0000000000..e5ada5702f --- /dev/null +++ b/cranelift/wasmtests/embenchen_ifs.wat @@ -0,0 +1,15771 @@ +(module + (type $0 (func (param i32 i32 i32) (result i32))) + (type $1 (func (param i32) (result i32))) + (type $2 (func (param i32))) + (type $3 (func (result i32))) + (type $4 (func (param i32 i32) (result i32))) + (type $5 (func (param i32 i32))) + (type $6 (func)) + (type $7 (func (param i32 i32 i32 i32 i32) (result i32))) + (type $8 (func (param i32 i32 i32))) + (type $9 (func (param i64 i32) (result i32))) + (type $10 (func (param i32 i32 i32 i32 i32))) + (type $11 (func (param f64 i32) (result f64))) + (type $12 (func (param i32 i32 i32 i32) (result i32))) + (import "env" "memory" (memory $16 2048 2048)) + (data (i32.const 1024) "\04\04\00\00\05") + (data (i32.const 1040) "\01") + (data (i32.const 1064) "\01\00\00\00\02\00\00\00,\10\00\00\00\04") + (data (i32.const 1088) "\01") + (data (i32.const 1103) "\n\ff\ff\ff\ff") + (data (i32.const 1140) "error: %d\\n\00ok\00\11\00\n\00\11\11\11\00\00\00\00\05\00\00\00\00\00\00\t\00\00\00\00\0b") + (data (i32.const 1187) "\11\00\0f\n\11\11\11\03\n\07\00\01\13\t\0b\0b\00\00\t\06\0b\00\00\0b\00\06\11\00\00\00\11\11\11") + (data (i32.const 1236) "\0b") + (data (i32.const 1245) "\11\00\n\n\11\11\11\00\n\00\00\02\00\t\0b\00\00\00\t\00\0b\00\00\0b") + (data (i32.const 1294) "\0c") + (data (i32.const 1306) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c") + (data (i32.const 1352) "\0e") + (data (i32.const 1364) "\0d\00\00\00\04\0d\00\00\00\00\t\0e\00\00\00\00\00\0e\00\00\0e") + (data (i32.const 1410) "\10") + (data (i32.const 1422) "\0f\00\00\00\00\0f\00\00\00\00\t\10\00\00\00\00\00\10\00\00\10\00\00\12\00\00\00\12\12\12") + (data (i32.const 1477) "\12\00\00\00\12\12\12\00\00\00\00\00\00\t") + (data (i32.const 1526) "\0b") + (data (i32.const 1538) "\n\00\00\00\00\n\00\00\00\00\t\0b\00\00\00\00\00\0b\00\00\0b") + (data (i32.const 1584) "\0c") + (data (i32.const 1596) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c\00\000123456789ABCDEF-+ 0X0x\00(null)\00-0X+0X 0X-0x+0x 0x\00inf\00INF\00nan\00NAN\00.\00T!\"\19\0d\01\02\03\11K\1c\0c\10\04\0b\1d\12\1e\'hnopqb \05\06\0f\13\14\15\1a\08\16\07($\17\18\t\n\0e\1b\1f%#\83\82}&*+<=>?CGJMXYZ[\\]^_`acdefgijklrstyz{|\00Illegal byte sequence\00Domain error\00Result not representable\00Not a tty\00Permission denied\00Operation not permitted\00No such file or directory\00No such process\00File exists\00Value too large for data type\00No space left on device\00Out of memory\00Resource busy\00Interrupted system call\00Resource temporarily unavailable\00Invalid seek\00Cross-device link\00Read-only file system\00Directory not empty\00Connection reset by peer\00Operation timed out\00Connection refused\00Host is down\00Host is unreachable\00Address in use\00Broken pipe\00I/O error\00No such device or address\00Block device required\00No such device\00Not a directory\00Is a directory\00Text file busy\00Exec format error\00Invalid argument\00Argument list too long\00Symbolic link loop\00Filename too long\00Too many open files in system\00No file descriptors available\00Bad file descriptor\00No child process\00Bad address\00File too large\00Too many links\00No locks available\00Resource deadlock would occur\00State not recoverable\00Previous owner died\00Operation canceled\00Function not implemented\00No message of desired type\00Identifier removed\00Device not a stream\00No data available\00Device timeout\00Out of streams resources\00Link has been severed\00Protocol error\00Bad message\00File descriptor in bad state\00Not a socket\00Destination address required\00Message too large\00Protocol wrong type for socket\00Protocol not available\00Protocol not supported\00Socket type not supported\00Not supported\00Protocol family not supported\00Address family not supported by protocol\00Address not available\00Network is down\00Network unreachable\00Connection reset by network\00Connection aborted\00No buffer space available\00Socket is connected\00Socket not connected\00Cannot send after socket shutdown\00Operation already in progress\00Operation in progress\00Stale file handle\00Remote I/O error\00Quota exceeded\00No medium found\00Wrong medium type\00No error information") + (import "env" "table" (table $timport$17 8 8 funcref)) + (elem (global.get $gimport$19) $47 $9 $48 $14 $10 $15 $49 $16) + (import "env" "DYNAMICTOP_PTR" (global $gimport$0 i32)) + (import "env" "STACKTOP" (global $gimport$1 i32)) + (import "env" "STACK_MAX" (global $gimport$2 i32)) + (import "env" "memoryBase" (global $gimport$18 i32)) + (import "env" "tableBase" (global $gimport$19 i32)) + (import "env" "abort" (func $fimport$3 (param i32))) + (import "env" "enlargeMemory" (func $fimport$4 (result i32))) + (import "env" "getTotalMemory" (func $fimport$5 (result i32))) + (import "env" "abortOnCannotGrowMemory" (func $fimport$6 (result i32))) + (import "env" "_pthread_cleanup_pop" (func $fimport$7 (param i32))) + (import "env" "___syscall6" (func $fimport$8 (param i32 i32) (result i32))) + (import "env" "_pthread_cleanup_push" (func $fimport$9 (param i32 i32))) + (import "env" "_abort" (func $fimport$10)) + (import "env" "___setErrNo" (func $fimport$11 (param i32))) + (import "env" "_emscripten_memcpy_big" (func $fimport$12 (param i32 i32 i32) (result i32))) + (import "env" "___syscall54" (func $fimport$13 (param i32 i32) (result i32))) + (import "env" "___syscall140" (func $fimport$14 (param i32 i32) (result i32))) + (import "env" "___syscall146" (func $fimport$15 (param i32 i32) (result i32))) + (global $global$0 (mut i32) (global.get $gimport$0)) + (global $global$1 (mut i32) (global.get $gimport$1)) + (global $global$2 (mut i32) (global.get $gimport$2)) + (global $global$3 (mut i32) (i32.const 0)) + (global $global$4 (mut i32) (i32.const 0)) + (global $global$5 (mut i32) (i32.const 0)) + (export "_sbrk" (func $40)) + (export "_free" (func $38)) + (export "_main" (func $8)) + (export "_pthread_self" (func $43)) + (export "_memset" (func $41)) + (export "_malloc" (func $37)) + (export "_memcpy" (func $42)) + (export "___errno_location" (func $12)) + (export "runPostSets" (func $39)) + (export "stackAlloc" (func $0)) + (export "stackSave" (func $1)) + (export "stackRestore" (func $2)) + (export "establishStackSpace" (func $3)) + (export "setThrew" (func $4)) + (export "setTempRet0" (func $5)) + (export "getTempRet0" (func $6)) + (export "dynCall_ii" (func $44)) + (export "dynCall_iiii" (func $45)) + (export "dynCall_vi" (func $46)) + (func $0 (; 13 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (local.get $0) + ) + ) + (global.set $global$1 + (i32.and + (i32.add + (global.get $global$1) + (i32.const 15) + ) + (i32.const -16) + ) + ) + (local.get $1) + ) + ) + (func $1 (; 14 ;) (type $3) (result i32) + (global.get $global$1) + ) + (func $2 (; 15 ;) (type $2) (param $0 i32) + (global.set $global$1 + (local.get $0) + ) + ) + (func $3 (; 16 ;) (type $5) (param $0 i32) (param $1 i32) + (block $label$1 + (global.set $global$1 + (local.get $0) + ) + (global.set $global$2 + (local.get $1) + ) + ) + ) + (func $4 (; 17 ;) (type $5) (param $0 i32) (param $1 i32) + (if + (i32.eqz + (global.get $global$3) + ) + (block + (global.set $global$3 + (local.get $0) + ) + (global.set $global$4 + (local.get $1) + ) + ) + ) + ) + (func $5 (; 18 ;) (type $2) (param $0 i32) + (global.set $global$5 + (local.get $0) + ) + ) + (func $6 (; 19 ;) (type $3) (result i32) + (global.get $global$5) + ) + (func $7 (; 20 ;) (type $3) (result i32) + (local $0 i32) + (block $label$1 (result i32) + (i32.store + (i32.const 3584) + (i32.add + (local.tee $0 + (i32.load + (i32.const 3584) + ) + ) + (i32.const 1) + ) + ) + (i32.and + (local.get $0) + (i32.const 16384) + ) + ) + ) + (func $8 (; 21 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (local.set $2 + (local.get $4) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.le_s + (local.get $0) + (i32.const 1) + ) + ) + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (br_table $label$5 $label$10 $label$8 $label$9 $label$7 $label$6 $label$4 + (i32.sub + (local.tee $0 + (i32.load8_s + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (i32.const 48) + ) + ) + ) + (local.set $3 + (i32.const 75) + ) + (br $label$2) + ) + (br $label$3) + ) + (local.set $3 + (i32.const 625) + ) + (br $label$2) + ) + (local.set $3 + (i32.const 6250) + ) + (br $label$2) + ) + (local.set $3 + (i32.const 12500) + ) + (br $label$2) + ) + (global.set $global$1 + (local.get $4) + ) + (return + (i32.const 0) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $0) + (i32.const -48) + ) + ) + (drop + (call $34 + (i32.const 1140) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (return + (i32.const -1) + ) + ) + (local.set $3 + (i32.const 1250) + ) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$11 + (local.set $2 + (i32.const 0) + ) + (loop $label$12 + (local.set $0 + (block $label$13 (result i32) + (block $label$14 + (br_if $label$14 + (i32.eqz + (call $7) + ) + ) + (br_if $label$14 + (i32.eqz + (call $7) + ) + ) + (br $label$13 + (i32.add + (local.get $0) + (i32.const 17) + ) + ) + ) + (i32.add + (local.get $0) + (i32.const 19) + ) + ) + ) + (block $label$15 + (block $label$16 + (br_if $label$16 + (call $7) + ) + (br_if $label$16 + (call $7) + ) + (br $label$15) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 23) + ) + ) + ) + (br_if $label$12 + (i32.lt_s + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $3) + ) + ) + ) + (br_if $label$11 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 27000) + ) + ) + ) + (drop + (call $35 + (i32.const 1152) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $9 (; 22 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $2 + (local.get $1) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (local.set $0 + (call $11 + (call $fimport$8 + (i32.const 6) + (local.get $2) + ) + ) + ) + (global.set $global$1 + (local.get $1) + ) + (local.get $0) + ) + ) + (func $10 (; 23 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 32) + ) + ) + (i32.store + (local.tee $3 + (local.get $4) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + (i32.store offset=16 + (local.get $3) + (local.get $2) + ) + (local.set $0 + (if (result i32) + (i32.lt_s + (call $11 + (call $fimport$14 + (i32.const 140) + (local.get $3) + ) + ) + (i32.const 0) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.const -1) + ) + (i32.const -1) + ) + (i32.load + (local.get $0) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $11 (; 24 ;) (type $1) (param $0 i32) (result i32) + (if (result i32) + (i32.gt_u + (local.get $0) + (i32.const -4096) + ) + (block (result i32) + (i32.store + (call $12) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + (local.get $0) + ) + ) + (func $12 (; 25 ;) (type $3) (result i32) + (i32.const 3632) + ) + (func $13 (; 26 ;) (type $2) (param $0 i32) + (nop) + ) + (func $14 (; 27 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 80) + ) + ) + (local.set $3 + (local.get $4) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + (i32.store offset=36 + (local.get $0) + (i32.const 3) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 64) + ) + ) + (block + (i32.store + (local.get $3) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 21505) + ) + (i32.store offset=8 + (local.get $3) + (local.get $5) + ) + (if + (call $fimport$13 + (i32.const 54) + (local.get $3) + ) + (i32.store8 offset=75 + (local.get $0) + (i32.const -1) + ) + ) + ) + ) + (local.set $0 + (call $15 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $15 (; 28 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $8 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 48) + ) + ) + (local.set $9 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + (local.set $10 + (local.get $8) + ) + (i32.store + (local.tee $3 + (i32.add + (local.get $8) + (i32.const 32) + ) + ) + (local.tee $4 + (i32.load + (local.tee $6 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (local.tee $5 + (i32.sub + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + (local.get $4) + ) + ) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.get $2) + ) + (local.set $13 + (i32.add + (local.get $0) + (i32.const 60) + ) + ) + (local.set $14 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + (local.set $1 + (local.get $3) + ) + (local.set $4 + (i32.const 2) + ) + (local.set $12 + (i32.add + (local.get $5) + (local.get $2) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (if + (i32.load + (i32.const 3588) + ) + (block + (call $fimport$9 + (i32.const 1) + (local.get $0) + ) + (i32.store + (local.get $10) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $10) + (local.get $1) + ) + (i32.store offset=8 + (local.get $10) + (local.get $4) + ) + (local.set $3 + (call $11 + (call $fimport$15 + (i32.const 146) + (local.get $10) + ) + ) + ) + (call $fimport$7 + (i32.const 0) + ) + ) + (block + (i32.store + (local.get $9) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $9) + (local.get $1) + ) + (i32.store offset=8 + (local.get $9) + (local.get $4) + ) + (local.set $3 + (call $11 + (call $fimport$15 + (i32.const 146) + (local.get $9) + ) + ) + ) + ) + ) + (br_if $label$4 + (i32.eq + (local.get $12) + (local.get $3) + ) + ) + (br_if $label$3 + (i32.lt_s + (local.get $3) + (i32.const 0) + ) + ) + (local.set $5 + (if (result i32) + (i32.gt_u + (local.get $3) + (local.tee $5 + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (block (result i32) + (i32.store + (local.get $6) + (local.tee $7 + (i32.load + (local.get $14) + ) + ) + ) + (i32.store + (local.get $11) + (local.get $7) + ) + (local.set $7 + (i32.load offset=12 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + (i32.sub + (local.get $3) + (local.get $5) + ) + ) + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (block (result i32) + (i32.store + (local.get $6) + (i32.add + (i32.load + (local.get $6) + ) + (local.get $3) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $4 + (i32.const 2) + ) + (local.get $3) + ) + (block (result i32) + (local.set $7 + (local.get $5) + ) + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $1) + (i32.add + (i32.load + (local.get $1) + ) + (local.get $5) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.sub + (local.get $7) + (local.get $5) + ) + ) + (local.set $12 + (i32.sub + (local.get $12) + (local.get $3) + ) + ) + (br $label$5) + ) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.tee $1 + (i32.load + (local.get $14) + ) + ) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $1) + ) + (i32.store + (local.get $11) + (local.get $1) + ) + (br $label$2) + ) + (i32.store offset=16 + (local.get $0) + (i32.const 0) + ) + (i32.store + (local.get $6) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (local.set $2 + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (i32.const 0) + (i32.sub + (local.get $2) + (i32.load offset=4 + (local.get $1) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $8) + ) + (local.get $2) + ) + ) + (func $16 (; 29 ;) (type $2) (param $0 i32) + (if + (i32.eqz + (i32.load offset=68 + (local.get $0) + ) + ) + (call $13 + (local.get $0) + ) + ) + ) + (func $17 (; 30 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $5 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (if + (i32.and + (local.tee $4 + (i32.ne + (local.get $2) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 0) + ) + ) + (block + (local.set $4 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (loop $label$6 + (if + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $4) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + ) + (br_if $label$6 + (i32.and + (local.tee $0 + (i32.ne + (local.tee $3 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 3) + ) + (i32.const 0) + ) + ) + ) + (br $label$4) + ) + ) + (block + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (local.set $0 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $0) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 0) + ) + ) + (br $label$2) + ) + (if + (i32.ne + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $3 + (i32.mul + (local.get $5) + (i32.const 16843009) + ) + ) + (block $label$12 + (block $label$13 + (br_if $label$13 + (i32.le_u + (local.get $0) + (i32.const 3) + ) + ) + (loop $label$14 + (if + (i32.eqz + (i32.and + (i32.xor + (i32.and + (local.tee $4 + (i32.xor + (i32.load + (local.get $2) + ) + (local.get $3) + ) + ) + (i32.const -2139062144) + ) + (i32.const -2139062144) + ) + (i32.add + (local.get $4) + (i32.const -16843009) + ) + ) + ) + (block + (local.set $2 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + (br_if $label$14 + (i32.gt_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + (i32.const 3) + ) + ) + (br $label$13) + ) + ) + ) + (br $label$12) + ) + (if + (i32.eqz + (local.get $0) + ) + (block + (local.set $0 + (i32.const 0) + ) + (br $label$2) + ) + ) + ) + (loop $label$17 + (br_if $label$2 + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $1) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$17 + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -1) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + ) + ) + ) + (if (result i32) + (local.get $0) + (local.get $2) + (i32.const 0) + ) + ) + ) + (func $18 (; 31 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 224) + ) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 136) + ) + ) + (i64.store align=4 + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 80) + ) + ) + (i64.const 0) + ) + (i64.store offset=8 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=16 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=24 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=32 align=4 + (local.get $3) + (i64.const 0) + ) + (i32.store + (local.tee $6 + (i32.add + (local.get $4) + (i32.const 120) + ) + ) + (i32.load + (local.get $2) + ) + ) + (if + (i32.lt_s + (call $19 + (i32.const 0) + (local.get $1) + (local.get $6) + (local.tee $2 + (local.get $4) + ) + (local.get $3) + ) + (i32.const 0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $12 + (if (result i32) + (i32.gt_s + (i32.load offset=76 + (local.get $0) + ) + (i32.const -1) + ) + (call $20 + (local.get $0) + ) + (i32.const 0) + ) + ) + (local.set $7 + (i32.load + (local.get $0) + ) + ) + (if + (i32.lt_s + (i32.load8_s offset=74 + (local.get $0) + ) + (i32.const 1) + ) + (i32.store + (local.get $0) + (i32.and + (local.get $7) + (i32.const -33) + ) + ) + ) + (if + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + ) + (local.set $1 + (call $19 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (block + (local.set $10 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + ) + ) + (i32.store + (local.get $9) + (local.get $5) + ) + (i32.store + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + (local.get $5) + ) + (i32.store + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + (local.get $5) + ) + (i32.store + (local.get $8) + (i32.const 80) + ) + (i32.store + (local.tee $14 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.add + (local.get $5) + (i32.const 80) + ) + ) + (local.set $1 + (call $19 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (if + (local.get $10) + (block + (drop + (call_indirect (type $0) + (local.get $0) + (i32.const 0) + (i32.const 0) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $11) + ) + ) + (local.set $1 + (i32.const -1) + ) + ) + (i32.store + (local.get $9) + (local.get $10) + ) + (i32.store + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $14) + (i32.const 0) + ) + (i32.store + (local.get $13) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (local.get $0) + (i32.or + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.and + (local.get $7) + (i32.const 32) + ) + ) + ) + (if + (local.get $12) + (call $13 + (local.get $0) + ) + ) + (if + (i32.and + (local.get $2) + (i32.const 32) + ) + (local.set $1 + (i32.const -1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $1) + ) + ) + (func $19 (; 32 ;) (type $7) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (local $22 i32) + (local $23 i32) + (local $24 i32) + (local $25 i32) + (local $26 i32) + (local $27 i32) + (local $28 i32) + (local $29 i32) + (local $30 i32) + (local $31 i32) + (local $32 i32) + (local $33 i32) + (local $34 i32) + (local $35 i32) + (local $36 i32) + (local $37 i32) + (local $38 i32) + (local $39 i32) + (local $40 i32) + (local $41 i32) + (local $42 i32) + (local $43 i32) + (local $44 i32) + (local $45 i32) + (local $46 i32) + (local $47 i32) + (local $48 i32) + (local $49 i32) + (local $50 i64) + (local $51 i64) + (local $52 f64) + (local $53 f64) + (block $label$1 (result i32) + (local.set $23 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 624) + ) + ) + (local.set $20 + (i32.add + (local.get $23) + (i32.const 16) + ) + ) + (local.set $16 + (local.get $23) + ) + (local.set $36 + (i32.add + (local.get $23) + (i32.const 528) + ) + ) + (local.set $30 + (i32.ne + (local.get $0) + (i32.const 0) + ) + ) + (local.set $38 + (local.tee $21 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 536) + ) + ) + (i32.const 40) + ) + ) + ) + (local.set $39 + (i32.add + (local.get $17) + (i32.const 39) + ) + ) + (local.set $42 + (i32.add + (local.tee $37 + (i32.add + (local.get $23) + (i32.const 8) + ) + ) + (i32.const 4) + ) + ) + (local.set $43 + (i32.sub + (i32.const 0) + (local.tee $27 + (local.tee $19 + (i32.add + (local.get $23) + (i32.const 588) + ) + ) + ) + ) + ) + (local.set $33 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 576) + ) + ) + (i32.const 12) + ) + ) + (local.set $40 + (i32.add + (local.get $17) + (i32.const 11) + ) + ) + (local.set $44 + (i32.sub + (local.tee $28 + (local.get $33) + ) + (local.get $27) + ) + ) + (local.set $45 + (i32.sub + (i32.const -2) + (local.get $27) + ) + ) + (local.set $46 + (i32.add + (local.get $28) + (i32.const 2) + ) + ) + (local.set $48 + (i32.add + (local.tee $47 + (i32.add + (local.get $23) + (i32.const 24) + ) + ) + (i32.const 288) + ) + ) + (local.set $41 + (local.tee $31 + (i32.add + (local.get $19) + (i32.const 9) + ) + ) + ) + (local.set $34 + (i32.add + (local.get $19) + (i32.const 8) + ) + ) + (local.set $15 + (i32.const 0) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $17 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (loop $label$4 + (block $label$5 + (if + (i32.gt_s + (local.get $15) + (i32.const -1) + ) + (local.set $15 + (if (result i32) + (i32.gt_s + (local.get $10) + (i32.sub + (i32.const 2147483647) + (local.get $15) + ) + ) + (block (result i32) + (i32.store + (call $12) + (i32.const 75) + ) + (i32.const -1) + ) + (i32.add + (local.get $10) + (local.get $15) + ) + ) + ) + ) + (br_if $label$3 + (i32.eqz + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.get $1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $11 + (local.get $1) + ) + (block $label$9 + (block $label$10 + (loop $label$11 + (block $label$12 + (block $label$13 + (block $label$14 + (block $label$15 + (br_table $label$14 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$15 $label$13 + (i32.sub + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (local.set $5 + (local.get $11) + ) + (br $label$10) + ) + (local.set $5 + (local.get $11) + ) + (br $label$12) + ) + (local.set $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (br $label$11) + ) + ) + (br $label$9) + ) + (loop $label$16 + (br_if $label$9 + (i32.ne + (i32.load8_s offset=1 + (local.get $5) + ) + (i32.const 37) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + (br_if $label$16 + (i32.eq + (i32.load8_s + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + (i32.const 37) + ) + ) + ) + ) + (local.set $10 + (i32.sub + (local.get $11) + (local.get $1) + ) + ) + (if + (local.get $30) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (local.get $10) + (local.get $0) + ) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $1 + (local.get $5) + ) + (br $label$4) + ) + ) + (local.set $10 + (if (result i32) + (i32.lt_u + (local.tee $9 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $10 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block (result i32) + (local.set $10 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + (if + (local.tee $12 + (i32.eq + (i32.load8_s offset=2 + (local.get $5) + ) + (i32.const 36) + ) + ) + (local.set $11 + (local.get $10) + ) + ) + (if + (local.get $12) + (local.set $17 + (i32.const 1) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.get $12) + ) + (local.set $9 + (i32.const -1) + ) + ) + (local.get $17) + ) + (block (result i32) + (local.set $5 + (local.get $10) + ) + (local.set $9 + (i32.const -1) + ) + (local.get $17) + ) + ) + ) + (block $label$25 + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + (block + (local.set $17 + (i32.const 0) + ) + (loop $label$27 + (br_if $label$25 + (i32.eqz + (i32.and + (i32.shl + (i32.const 1) + (local.get $12) + ) + (i32.const 75913) + ) + ) + ) + (local.set $17 + (i32.or + (i32.shl + (i32.const 1) + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (local.get $17) + ) + ) + (br_if $label$27 + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + ) + ) + ) + (local.set $17 + (i32.const 0) + ) + ) + ) + (block $label$29 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (local.set $11 + (block $label$31 (result i32) + (block $label$32 + (br_if $label$32 + (i32.ge_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (br_if $label$32 + (i32.ne + (i32.load8_s offset=2 + (local.get $11) + ) + (i32.const 36) + ) + ) + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $12) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $8 + (i32.const 1) + ) + (local.set $10 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (br $label$31 + (i32.add + (local.get $11) + (i32.const 3) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (local.set $10 + (i32.const 0) + ) + (br $label$29) + ) + ) + (local.set $10 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.set $8 + (i32.const 0) + ) + (local.get $7) + ) + ) + (local.set $12 + (i32.or + (local.get $17) + (i32.const 8192) + ) + ) + (local.set $7 + (i32.sub + (i32.const 0) + (local.get $10) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.tee $6 + (i32.lt_s + (local.get $10) + (i32.const 0) + ) + ) + ) + (local.set $12 + (local.get $17) + ) + ) + (local.set $17 + (local.get $8) + ) + (if + (local.get $6) + (local.set $10 + (local.get $7) + ) + ) + ) + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $7 + (i32.const 0) + ) + (local.set $5 + (local.get $12) + ) + (loop $label$39 + (local.set $7 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$39 + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $12 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + ) + (if + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (block + (local.set $5 + (local.get $12) + ) + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (local.get $7) + ) + ) + ) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (i32.const 0) + ) + ) + ) + ) + ) + (block $label$43 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 46) + ) + (block + (if + (i32.ne + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $11 + (local.get $7) + ) + (local.set $7 + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (br $label$43) + ) + ) + (loop $label$48 + (local.set $5 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$43 + (i32.ge_u + (local.tee $8 + (i32.add + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $5 + (local.get $8) + ) + (br $label$48) + ) + ) + ) + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 2) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (if + (i32.eq + (i32.load8_s offset=3 + (local.get $11) + ) + (i32.const 36) + ) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $5) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $5 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (br $label$43) + ) + ) + ) + (if + (local.get $17) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (if (result i32) + (local.get $30) + (block (result i32) + (local.set $5 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block (result i32) + (local.set $5 + (i32.const 0) + ) + (local.get $7) + ) + ) + ) + ) + (local.set $5 + (i32.const -1) + ) + ) + ) + (local.set $7 + (local.get $11) + ) + (local.set $8 + (i32.const 0) + ) + (loop $label$55 + (if + (i32.gt_u + (local.tee $6 + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -65) + ) + ) + (i32.const 57) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (if + (i32.lt_u + (i32.add + (local.tee $6 + (i32.and + (local.tee $13 + (i32.load8_s + (i32.add + (i32.add + (i32.mul + (local.get $8) + (i32.const 58) + ) + (i32.const 1155) + ) + (local.get $6) + ) + ) + ) + (i32.const 255) + ) + ) + (i32.const -1) + ) + (i32.const 8) + ) + (block + (local.set $7 + (local.get $11) + ) + (local.set $8 + (local.get $6) + ) + (br $label$55) + ) + ) + ) + (if + (i32.eqz + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $14 + (i32.gt_s + (local.get $9) + (i32.const -1) + ) + ) + (block $label$59 + (block $label$60 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 19) + ) + (if + (local.get $14) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (br $label$60) + ) + (block + (if + (local.get $14) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $9) + (i32.const 2) + ) + ) + (local.get $6) + ) + (i64.store + (local.get $16) + (i64.load + (i32.add + (local.get $3) + (i32.shl + (local.get $9) + (i32.const 3) + ) + ) + ) + ) + (br $label$60) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $15 + (i32.const 0) + ) + (br $label$5) + ) + ) + (call $22 + (local.get $16) + (local.get $6) + (local.get $2) + ) + ) + ) + (br $label$59) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + ) + (local.set $9 + (i32.and + (local.tee $7 + (i32.load8_s + (local.get $7) + ) + ) + (i32.const -33) + ) + ) + (if + (i32.eqz + (i32.and + (i32.ne + (local.get $8) + (i32.const 0) + ) + (i32.eq + (i32.and + (local.get $7) + (i32.const 15) + ) + (i32.const 3) + ) + ) + ) + (local.set $9 + (local.get $7) + ) + ) + (local.set $7 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8192) + ) + (local.set $12 + (local.get $7) + ) + ) + (block $label$70 + (block $label$71 + (block $label$72 + (block $label$73 + (block $label$74 + (block $label$75 + (block $label$76 + (block $label$77 + (block $label$78 + (block $label$79 + (block $label$80 + (block $label$81 + (block $label$82 + (block $label$83 + (block $label$84 + (block $label$85 + (block $label$86 + (block $label$87 + (block $label$88 + (block $label$89 + (br_table $label$78 $label$77 $label$80 $label$77 $label$78 $label$78 $label$78 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$79 $label$77 $label$77 $label$77 $label$77 $label$87 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$78 $label$77 $label$83 $label$85 $label$78 $label$78 $label$78 $label$77 $label$85 $label$77 $label$77 $label$77 $label$82 $label$89 $label$86 $label$88 $label$77 $label$77 $label$81 $label$77 $label$84 $label$77 $label$77 $label$87 $label$77 + (i32.sub + (local.get $9) + (i32.const 65) + ) + ) + ) + (block $label$90 + (block $label$91 + (block $label$92 + (block $label$93 + (block $label$94 + (block $label$95 + (block $label$96 + (block $label$97 + (br_table $label$97 $label$96 $label$95 $label$94 $label$93 $label$90 $label$92 $label$91 $label$90 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (local.get $8) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store16 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store8 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $12 + (i32.or + (local.get $12) + (i32.const 8) + ) + ) + (if + (i32.le_u + (local.get $5) + (i32.const 8) + ) + (local.set $5 + (i32.const 8) + ) + ) + (local.set $9 + (i32.const 120) + ) + (br $label$76) + ) + (br $label$76) + ) + (if + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (local.set $7 + (local.get $21) + ) + (block + (local.set $1 + (local.get $21) + ) + (loop $label$101 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.and + (local.get $50) + (i64.const 7) + ) + (i64.const 48) + ) + ) + (br_if $label$101 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 3) + ) + ) + (i64.const 0) + ) + ) + (local.set $7 + (local.get $1) + ) + ) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8) + ) + (block + (local.set $8 + (i32.add + (local.tee $1 + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + (i32.const 1) + ) + ) + (if + (i32.le_s + (local.get $5) + (local.get $1) + ) + (local.set $5 + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1635) + ) + (br $label$71) + ) + (block + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1635) + ) + (br $label$71) + ) + ) + ) + (if + (i64.lt_s + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block + (i64.store + (local.get $16) + (local.tee $50 + (i64.sub + (i64.const 0) + (local.get $50) + ) + ) + ) + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 1635) + ) + (br $label$75) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 1636) + ) + (br $label$75) + ) + (block + (local.set $6 + (local.tee $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + ) + (local.set $8 + (if (result i32) + (local.get $1) + (i32.const 1637) + (i32.const 1635) + ) + ) + (br $label$75) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1635) + ) + (br $label$75) + ) + (i64.store8 + (local.get $39) + (i64.load + (local.get $16) + ) + ) + (local.set $1 + (local.get $39) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (i32.const 1) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1635) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $1 + (call $24 + (i32.load + (call $12) + ) + ) + ) + (br $label$74) + ) + (if + (i32.eqz + (local.tee $1 + (i32.load + (local.get $16) + ) + ) + ) + (local.set $1 + (i32.const 1645) + ) + ) + (br $label$74) + ) + (i64.store32 + (local.get $37) + (i64.load + (local.get $16) + ) + ) + (i32.store + (local.get $42) + (i32.const 0) + ) + (i32.store + (local.get $16) + (local.get $37) + ) + (local.set $7 + (local.get $37) + ) + (local.set $6 + (i32.const -1) + ) + (br $label$73) + ) + (local.set $7 + (i32.load + (local.get $16) + ) + ) + (if + (local.get $5) + (block + (local.set $6 + (local.get $5) + ) + (br $label$73) + ) + (block + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (i32.const 0) + (local.get $12) + ) + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (local.set $52 + (f64.load + (local.get $16) + ) + ) + (i32.store + (local.get $20) + (i32.const 0) + ) + (local.set $26 + (if (result i32) + (i64.lt_s + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + (i32.const 1652) + ) + (block (result i32) + (local.set $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + (if (result i32) + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (i32.const 1655) + ) + (block (result i32) + (local.set $24 + (local.get $1) + ) + (if (result i32) + (local.get $1) + (i32.const 1658) + (i32.const 1653) + ) + ) + ) + ) + ) + ) + (block $label$119 + (if + (i64.lt_u + (i64.and + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 9218868437227405312) + ) + (i64.const 9218868437227405312) + ) + (block + (if + (local.tee $1 + (f64.ne + (local.tee $52 + (f64.mul + (call $27 + (local.get $52) + (local.get $20) + ) + (f64.const 2) + ) + ) + (f64.const 0) + ) + ) + (i32.store + (local.get $20) + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -1) + ) + ) + ) + (if + (i32.eq + (local.tee $22 + (i32.or + (local.get $9) + (i32.const 32) + ) + ) + (i32.const 97) + ) + (block + (local.set $1 + (i32.add + (local.get $26) + (i32.const 9) + ) + ) + (if + (local.tee $6 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $26 + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.or + (i32.gt_u + (local.get $5) + (i32.const 11) + ) + (i32.eqz + (local.tee $1 + (i32.sub + (i32.const 12) + (local.get $5) + ) + ) + ) + ) + ) + (block + (local.set $53 + (f64.const 8) + ) + (loop $label$125 + (local.set $53 + (f64.mul + (local.get $53) + (f64.const 16) + ) + ) + (br_if $label$125 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (local.set $52 + (if (result f64) + (i32.eq + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + (f64.neg + (f64.add + (local.get $53) + (f64.sub + (f64.neg + (local.get $52) + ) + (local.get $53) + ) + ) + ) + (f64.sub + (f64.add + (local.get $52) + (local.get $53) + ) + (local.get $53) + ) + ) + ) + ) + ) + (local.set $1 + (i32.sub + (i32.const 0) + (local.tee $7 + (i32.load + (local.get $20) + ) + ) + ) + ) + (if + (i32.eq + (local.tee $1 + (call $23 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (local.get $1) + (local.get $7) + ) + ) + (local.get $33) + ) + ) + (local.get $33) + ) + (block + (i32.store8 + (local.get $40) + (i32.const 48) + ) + (local.set $1 + (local.get $40) + ) + ) + ) + (local.set $13 + (i32.or + (local.get $24) + (i32.const 2) + ) + ) + (i32.store8 + (i32.add + (local.get $1) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $7) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $8 + (i32.add + (local.get $1) + (i32.const -2) + ) + ) + (i32.add + (local.get $9) + (i32.const 15) + ) + ) + (local.set $9 + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + (local.set $14 + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (local.set $1 + (local.get $19) + ) + (loop $label$131 + (i32.store8 + (local.get $1) + (i32.or + (i32.load8_u + (i32.add + (local.tee $7 + (i32.trunc_f64_s + (local.get $52) + ) + ) + (i32.const 1619) + ) + ) + (local.get $6) + ) + ) + (local.set $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_s + (local.get $7) + ) + ) + (f64.const 16) + ) + ) + (local.set $1 + (block $label$132 (result i32) + (if (result i32) + (i32.eq + (i32.sub + (local.tee $7 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $27) + ) + (i32.const 1) + ) + (block (result i32) + (drop + (br_if $label$132 + (local.get $7) + (i32.and + (local.get $14) + (i32.and + (local.get $9) + (f64.eq + (local.get $52) + (f64.const 0) + ) + ) + ) + ) + ) + (i32.store8 + (local.get $7) + (i32.const 46) + ) + (i32.add + (local.get $1) + (i32.const 2) + ) + ) + (local.get $7) + ) + ) + ) + (br_if $label$131 + (f64.ne + (local.get $52) + (f64.const 0) + ) + ) + ) + (local.set $6 + (i32.sub + (i32.add + (local.get $46) + (local.get $5) + ) + (local.tee $7 + (local.get $8) + ) + ) + ) + (local.set $9 + (i32.add + (i32.sub + (local.get $44) + (local.get $7) + ) + (local.get $1) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.and + (i32.ne + (local.get $5) + (i32.const 0) + ) + (i32.lt_s + (i32.add + (local.get $45) + (local.get $1) + ) + (local.get $5) + ) + ) + (local.get $6) + (local.tee $6 + (local.get $9) + ) + ) + (local.get $13) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $26) + (local.get $13) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $27) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $19) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.sub + (local.get $6) + (i32.add + (local.get $1) + (local.tee $1 + (i32.sub + (local.get $28) + (local.get $7) + ) + ) + ) + ) + (i32.const 0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $8) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $5) + (local.get $10) + ) + (local.set $10 + (local.get $5) + ) + ) + (br $label$119) + ) + ) + (if + (local.get $1) + (block + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -28) + ) + ) + ) + (local.set $52 + (f64.mul + (local.get $52) + (f64.const 268435456) + ) + ) + ) + (local.set $6 + (i32.load + (local.get $20) + ) + ) + ) + (local.set $8 + (local.tee $7 + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $47) + (local.get $48) + ) + ) + ) + (loop $label$145 + (i32.store + (local.get $8) + (local.tee $1 + (i32.trunc_f64_s + (local.get $52) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$145 + (f64.ne + (local.tee $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_u + (local.get $1) + ) + ) + (f64.const 1e9) + ) + ) + (f64.const 0) + ) + ) + ) + (if + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $1 + (local.get $7) + ) + (loop $label$147 + (local.set $14 + (if (result i32) + (i32.gt_s + (local.get $6) + (i32.const 29) + ) + (i32.const 29) + (local.get $6) + ) + ) + (block $label$150 + (if + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (block + (local.set $50 + (i64.extend_i32_u + (local.get $14) + ) + ) + (local.set $13 + (i32.const 0) + ) + (loop $label$152 + (i64.store32 + (local.get $6) + (i64.rem_u + (local.tee $51 + (i64.add + (i64.shl + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $50) + ) + (i64.extend_i32_u + (local.get $13) + ) + ) + ) + (i64.const 1000000000) + ) + ) + (local.set $13 + (i32.wrap_i64 + (i64.div_u + (local.get $51) + (i64.const 1000000000) + ) + ) + ) + (br_if $label$152 + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $6) + (i32.const -4) + ) + ) + (local.get $1) + ) + ) + ) + (br_if $label$150 + (i32.eqz + (local.get $13) + ) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (local.get $13) + ) + ) + ) + ) + (loop $label$153 + (if + (i32.gt_u + (local.get $8) + (local.get $1) + ) + (if + (i32.eqz + (i32.load + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $8 + (local.get $6) + ) + (br $label$153) + ) + ) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.sub + (i32.load + (local.get $20) + ) + (local.get $14) + ) + ) + ) + (br_if $label$147 + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + ) + ) + ) + (local.set $1 + (local.get $7) + ) + ) + (local.set $18 + (if (result i32) + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (i32.const 6) + (local.get $5) + ) + ) + (if + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $14 + (i32.add + (i32.div_s + (i32.add + (local.get $18) + (i32.const 25) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (local.set $25 + (i32.eq + (local.get $22) + (i32.const 102) + ) + ) + (local.set $5 + (local.get $8) + ) + (loop $label$160 + (if + (i32.gt_s + (local.tee $13 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const 9) + ) + (local.set $13 + (i32.const 9) + ) + ) + (block $label$162 + (if + (i32.lt_u + (local.get $1) + (local.get $5) + ) + (block + (local.set $29 + (i32.add + (i32.shl + (i32.const 1) + (local.get $13) + ) + (i32.const -1) + ) + ) + (local.set $35 + (i32.shr_u + (i32.const 1000000000) + (local.get $13) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (local.get $1) + ) + (loop $label$164 + (i32.store + (local.get $8) + (i32.add + (i32.shr_u + (local.tee $32 + (i32.load + (local.get $8) + ) + ) + (local.get $13) + ) + (local.get $6) + ) + ) + (local.set $6 + (i32.mul + (i32.and + (local.get $32) + (local.get $29) + ) + (local.get $35) + ) + ) + (br_if $label$164 + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (local.get $5) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + (br_if $label$162 + (i32.eqz + (local.get $6) + ) + ) + (i32.store + (local.get $5) + (local.get $6) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + ) + (block + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $6 + (i32.add + (local.tee $8 + (if (result i32) + (local.get $25) + (local.get $7) + (local.get $1) + ) + ) + (i32.shl + (local.get $14) + (i32.const 2) + ) + ) + ) + (if + (i32.gt_s + (i32.shr_s + (i32.sub + (local.get $5) + (local.get $8) + ) + (i32.const 2) + ) + (local.get $14) + ) + (local.set $5 + (local.get $6) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (local.get $13) + ) + ) + ) + (br_if $label$160 + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + ) + (local.set $13 + (local.get $5) + ) + ) + ) + (local.set $13 + (local.get $8) + ) + ) + (local.set $25 + (local.get $7) + ) + (block $label$172 + (if + (i32.lt_u + (local.get $1) + (local.get $13) + ) + (block + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$172 + (i32.lt_u + (local.tee $6 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $8 + (i32.const 10) + ) + (loop $label$174 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$174 + (i32.ge_u + (local.get $6) + (local.tee $8 + (i32.mul + (local.get $8) + (i32.const 10) + ) + ) + ) + ) + ) + ) + (local.set $5 + (i32.const 0) + ) + ) + ) + (local.set $29 + (i32.eq + (local.get $22) + (i32.const 103) + ) + ) + (local.set $35 + (i32.ne + (local.get $18) + (i32.const 0) + ) + ) + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.sub + (local.get $18) + (if (result i32) + (i32.ne + (local.get $22) + (i32.const 102) + ) + (local.get $5) + (i32.const 0) + ) + ) + (i32.shr_s + (i32.shl + (i32.and + (local.get $35) + (local.get $29) + ) + (i32.const 31) + ) + (i32.const 31) + ) + ) + ) + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $13) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (block + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.rem_s + (local.tee $14 + (i32.add + (local.get $8) + (i32.const 9216) + ) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (i32.const 9) + ) + (block + (local.set $6 + (i32.const 10) + ) + (loop $label$180 + (local.set $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + (br_if $label$180 + (i32.ne + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 1) + ) + ) + (i32.const 9) + ) + ) + ) + ) + (local.set $6 + (i32.const 10) + ) + ) + (local.set $14 + (i32.rem_u + (local.tee $22 + (i32.load + (local.tee $8 + (i32.add + (i32.add + (local.get $7) + (i32.const 4) + ) + (i32.shl + (i32.add + (i32.div_s + (local.get $14) + (i32.const 9) + ) + (i32.const -1024) + ) + (i32.const 2) + ) + ) + ) + ) + ) + (local.get $6) + ) + ) + (block $label$182 + (if + (i32.eqz + (i32.and + (local.tee $32 + (i32.eq + (i32.add + (local.get $8) + (i32.const 4) + ) + (local.get $13) + ) + ) + (i32.eqz + (local.get $14) + ) + ) + ) + (block + (local.set $52 + (if (result f64) + (i32.lt_u + (local.get $14) + (local.tee $49 + (i32.div_s + (local.get $6) + (i32.const 2) + ) + ) + ) + (f64.const 0.5) + (if (result f64) + (i32.and + (local.get $32) + (i32.eq + (local.get $14) + (local.get $49) + ) + ) + (f64.const 1) + (f64.const 1.5) + ) + ) + ) + (local.set $53 + (if (result f64) + (i32.and + (i32.div_u + (local.get $22) + (local.get $6) + ) + (i32.const 1) + ) + (f64.const 9007199254740994) + (f64.const 9007199254740992) + ) + ) + (block $label$190 + (if + (local.get $24) + (block + (br_if $label$190 + (i32.ne + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + ) + (local.set $53 + (f64.neg + (local.get $53) + ) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + ) + ) + ) + (i32.store + (local.get $8) + (local.tee $14 + (i32.sub + (local.get $22) + (local.get $14) + ) + ) + ) + (br_if $label$182 + (f64.eq + (f64.add + (local.get $53) + (local.get $52) + ) + (local.get $53) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (local.get $14) + (local.get $6) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + (loop $label$193 + (i32.store + (local.get $8) + (i32.const 0) + ) + (if + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (i32.const 0) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (i32.load + (local.get $8) + ) + (i32.const 1) + ) + ) + ) + (br_if $label$193 + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + ) + ) + ) + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$182 + (i32.lt_u + (local.tee $14 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $6 + (i32.const 10) + ) + (loop $label$195 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$195 + (i32.ge_u + (local.get $14) + (local.tee $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + ) + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (if + (i32.le_u + (local.get $13) + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (block + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (local.set $32 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (loop $label$198 + (block $label$199 + (if + (i32.le_u + (local.get $8) + (local.get $14) + ) + (block + (local.set $22 + (i32.const 0) + ) + (br $label$199) + ) + ) + (if + (i32.load + (local.tee $1 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + (local.set $22 + (i32.const 1) + ) + (block + (local.set $8 + (local.get $1) + ) + (br $label$198) + ) + ) + ) + ) + (block $label$203 + (if + (local.get $29) + (block + (local.set $1 + (if (result i32) + (i32.and + (i32.gt_s + (local.tee $1 + (i32.add + (i32.xor + (i32.and + (local.get $35) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $18) + ) + ) + (local.get $6) + ) + (i32.gt_s + (local.get $6) + (i32.const -5) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.sub + (i32.add + (local.get $1) + (i32.const -1) + ) + (local.get $6) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (br_if $label$203 + (local.tee $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (block $label$207 + (if + (local.get $22) + (block + (if + (i32.eqz + (local.tee $18 + (i32.load + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $9 + (i32.const 9) + ) + (br $label$207) + ) + ) + (if + (i32.rem_u + (local.get $18) + (i32.const 10) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$207) + ) + (block + (local.set $13 + (i32.const 10) + ) + (local.set $9 + (i32.const 0) + ) + ) + ) + (loop $label$212 + (local.set $9 + (i32.add + (local.get $9) + (i32.const 1) + ) + ) + (br_if $label$212 + (i32.eqz + (i32.rem_u + (local.get $18) + (local.tee $13 + (i32.mul + (local.get $13) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + (local.set $9 + (i32.const 9) + ) + ) + ) + (local.set $18 + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $8) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (if + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (local.get $18) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (i32.add + (local.get $18) + (local.get $6) + ) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $18) + ) + (local.set $5 + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $25 + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (if + (i32.le_s + (local.get $6) + (i32.const 0) + ) + (local.set $6 + (i32.const 0) + ) + ) + ) + (block + (if + (i32.lt_s + (i32.sub + (local.get $28) + (local.tee $9 + (call $23 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $32) + (local.get $6) + ) + ) + (local.get $33) + ) + ) + ) + (i32.const 2) + ) + (loop $label$229 + (i32.store8 + (local.tee $9 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.const 48) + ) + (br_if $label$229 + (i32.lt_s + (i32.sub + (local.get $28) + (local.get $9) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (i32.add + (local.get $9) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $6) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $6 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (local.get $5) + ) + (local.set $9 + (local.get $6) + ) + (local.set $6 + (i32.sub + (local.get $28) + (local.get $6) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $18 + (i32.add + (i32.add + (i32.add + (i32.add + (local.get $24) + (i32.const 1) + ) + (local.get $1) + ) + (i32.ne + (local.tee $29 + (i32.or + (local.get $1) + (local.get $13) + ) + ) + (i32.const 0) + ) + ) + (local.get $6) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (block $label$231 + (if + (local.get $25) + (block + (local.set $6 + (local.tee $9 + (if (result i32) + (i32.gt_u + (local.get $14) + (local.get $7) + ) + (local.get $7) + (local.get $14) + ) + ) + ) + (loop $label$235 + (local.set $5 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $31) + ) + ) + (block $label$236 + (if + (i32.eq + (local.get $6) + (local.get $9) + ) + (block + (br_if $label$236 + (i32.ne + (local.get $5) + (local.get $31) + ) + ) + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $5 + (local.get $34) + ) + ) + (block + (br_if $label$236 + (i32.le_u + (local.get $5) + (local.get $19) + ) + ) + (drop + (call $41 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $5) + (local.get $27) + ) + ) + ) + (loop $label$239 + (br_if $label$239 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $5) + (i32.sub + (local.get $41) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (if + (i32.le_u + (local.tee $5 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block + (local.set $6 + (local.get $5) + ) + (br $label$235) + ) + ) + ) + (block $label$242 + (if + (local.get $29) + (block + (br_if $label$242 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (i32.const 1687) + (i32.const 1) + (local.get $0) + ) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 0) + ) + (i32.lt_u + (local.get $5) + (local.get $8) + ) + ) + (loop $label$245 + (if + (i32.gt_u + (local.tee $7 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $5) + ) + ) + (local.get $31) + ) + ) + (local.get $19) + ) + (block + (drop + (call $41 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $7) + (local.get $27) + ) + ) + ) + (loop $label$247 + (br_if $label$247 + (i32.gt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $7) + (if (result i32) + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (local.get $1) + ) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $1) + (i32.const -9) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.lt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (local.get $8) + ) + ) + (block + (local.set $1 + (local.get $7) + ) + (br $label$245) + ) + (local.set $1 + (local.get $7) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.add + (local.get $14) + (i32.const 4) + ) + ) + (if + (i32.eqz + (local.get $22) + ) + (local.set $8 + (local.get $5) + ) + ) + (if + (i32.gt_s + (local.get $1) + (i32.const -1) + ) + (block + (local.set $13 + (i32.eqz + (local.get $13) + ) + ) + (local.set $7 + (local.get $14) + ) + (local.set $5 + (local.get $1) + ) + (loop $label$256 + (if + (i32.eq + (local.tee $1 + (call $23 + (i64.extend_i32_u + (i32.load + (local.get $7) + ) + ) + (local.get $31) + ) + ) + (local.get $31) + ) + (block + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $1 + (local.get $34) + ) + ) + ) + (block $label$258 + (if + (i32.eq + (local.get $7) + (local.get $14) + ) + (block + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (i32.const 1) + (local.get $0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$258 + (i32.and + (local.get $13) + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + ) + (br_if $label$258 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (i32.const 1687) + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (br_if $label$258 + (i32.le_u + (local.get $1) + (local.get $19) + ) + ) + (drop + (call $41 + (local.get $19) + (i32.const 48) + (i32.add + (local.get $1) + (local.get $43) + ) + ) + ) + (loop $label$262 + (br_if $label$262 + (i32.gt_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (local.set $6 + (i32.sub + (local.get $41) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (if (result i32) + (i32.gt_s + (local.get $5) + (local.get $6) + ) + (local.get $6) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (br_if $label$256 + (i32.and + (i32.lt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (local.get $8) + ) + (i32.gt_s + (local.tee $5 + (i32.sub + (local.get $5) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + ) + (local.set $1 + (local.get $5) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 18) + ) + (i32.const 18) + (i32.const 0) + ) + (br_if $label$231 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $9) + (i32.sub + (local.get $28) + (local.get $9) + ) + (local.get $0) + ) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $18) + (local.get $10) + ) + (local.set $10 + (local.get $18) + ) + ) + ) + (block + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $8 + (i32.add + (if (result i32) + (local.tee $6 + (i32.or + (f64.ne + (local.get $52) + (local.get $52) + ) + (i32.const 0) + ) + ) + (local.tee $24 + (i32.const 0) + ) + (local.get $24) + ) + (i32.const 3) + ) + ) + (local.get $7) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + (block + (drop + (call $21 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $7 + (if (result i32) + (local.tee $5 + (i32.ne + (i32.and + (local.get $9) + (i32.const 32) + ) + (i32.const 0) + ) + ) + (i32.const 1671) + (i32.const 1675) + ) + ) + (local.set $5 + (if (result i32) + (local.get $5) + (i32.const 1679) + (i32.const 1683) + ) + ) + (if + (i32.eqz + (local.get $6) + ) + (local.set $5 + (local.get $7) + ) + ) + (if + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $5) + (i32.const 3) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $8) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $8) + (local.get $10) + ) + (local.set $10 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $7 + (local.get $5) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1635) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $7 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $7 + (if (result i32) + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $50 + (i64.const 0) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (loop $label$280 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.load8_u + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $50) + ) + (i32.const 15) + ) + (i32.const 1619) + ) + ) + (local.get $7) + ) + ) + (br_if $label$280 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 4) + ) + ) + (i64.const 0) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.get $1) + ) + ) + ) + (local.set $8 + (i32.add + (i32.shr_s + (local.get $9) + (i32.const 4) + ) + (i32.const 1635) + ) + ) + (if + (local.tee $1 + (i32.or + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (i64.eq + (local.get $50) + (i64.const 0) + ) + ) + ) + (local.set $8 + (i32.const 1635) + ) + ) + (local.set $6 + (if (result i32) + (local.get $1) + (i32.const 0) + (i32.const 2) + ) + ) + (br $label$71) + ) + (local.set $7 + (call $23 + (local.get $50) + (local.get $21) + ) + ) + (br $label$71) + ) + (local.set $14 + (i32.eqz + (local.tee $13 + (call $17 + (local.get $1) + (i32.const 0) + (local.get $5) + ) + ) + ) + ) + (local.set $8 + (i32.sub + (local.get $13) + (local.get $1) + ) + ) + (local.set $9 + (i32.add + (local.get $1) + (local.get $5) + ) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (if (result i32) + (local.get $14) + (local.get $5) + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1635) + ) + (local.set $5 + (if (result i32) + (local.get $14) + (local.get $9) + (local.get $13) + ) + ) + (br $label$70) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $5 + (i32.const 0) + ) + (local.set $8 + (local.get $7) + ) + (loop $label$288 + (block $label$289 + (br_if $label$289 + (i32.eqz + (local.tee $9 + (i32.load + (local.get $8) + ) + ) + ) + ) + (br_if $label$289 + (i32.or + (i32.lt_s + (local.tee $5 + (call $26 + (local.get $36) + (local.get $9) + ) + ) + (i32.const 0) + ) + (i32.gt_u + (local.get $5) + (i32.sub + (local.get $6) + (local.get $1) + ) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$288 + (i32.gt_u + (local.get $6) + (local.tee $1 + (i32.add + (local.get $5) + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (local.get $12) + ) + (if + (local.get $1) + (block + (local.set $5 + (i32.const 0) + ) + (loop $label$292 + (br_if $label$72 + (i32.eqz + (local.tee $8 + (i32.load + (local.get $7) + ) + ) + ) + ) + (br_if $label$72 + (i32.gt_s + (local.tee $5 + (i32.add + (local.tee $8 + (call $26 + (local.get $36) + (local.get $8) + ) + ) + (local.get $5) + ) + ) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $36) + (local.get $8) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (br_if $label$292 + (i32.lt_u + (local.get $5) + (local.get $1) + ) + ) + (br $label$72) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.le_s + (local.get $10) + (local.get $1) + ) + (local.set $10 + (local.get $1) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $1 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.gt_s + (local.get $5) + (i32.const -1) + ) + (local.set $12 + (local.get $1) + ) + ) + (local.set $5 + (if (result i32) + (i32.or + (local.get $5) + (local.tee $9 + (i64.ne + (i64.load + (local.get $16) + ) + (i64.const 0) + ) + ) + ) + (block (result i32) + (local.set $1 + (local.get $7) + ) + (if + (i32.gt_s + (local.get $5) + (local.tee $7 + (i32.add + (i32.xor + (i32.and + (local.get $9) + (i32.const 1) + ) + (i32.const 1) + ) + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + ) + ) + (local.set $7 + (local.get $5) + ) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (local.set $7 + (i32.const 0) + ) + (local.get $21) + ) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (if (result i32) + (i32.lt_s + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.lt_s + (local.get $7) + (local.tee $9 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (local.tee $7 + (local.get $9) + ) + (local.get $7) + ) + (local.get $6) + ) + ) + ) + (local.tee $10 + (local.get $5) + ) + (local.get $10) + ) + (local.get $5) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $8) + (local.get $6) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (call $25 + (local.get $0) + (i32.const 48) + (local.get $7) + (local.get $9) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $21 + (local.get $1) + (local.get $9) + (local.get $0) + ) + ) + ) + (call $25 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + (br $label$2) + ) + (if + (i32.eqz + (local.get $0) + ) + (if + (local.get $17) + (block + (local.set $0 + (i32.const 1) + ) + (loop $label$308 + (if + (local.tee $1 + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + ) + (block + (call $22 + (i32.add + (local.get $3) + (i32.shl + (local.get $0) + (i32.const 3) + ) + ) + (local.get $1) + (local.get $2) + ) + (br_if $label$308 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + (br $label$2) + ) + ) + ) + (loop $label$310 + (if + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$2) + ) + ) + (br_if $label$310 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + ) + ) + (local.set $15 + (i32.const 0) + ) + ) + ) + ) + (global.set $global$1 + (local.get $23) + ) + (local.get $15) + ) + ) + (func $20 (; 33 ;) (type $1) (param $0 i32) (result i32) + (i32.const 0) + ) + (func $21 (; 34 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $3 + (i32.load + (local.tee $4 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $30 + (local.get $2) + ) + (local.set $3 + (i32.const 0) + ) + (block + (local.set $3 + (i32.load + (local.get $4) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (i32.sub + (local.get $3) + (local.tee $4 + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (local.get $1) + ) + (block + (local.set $3 + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (br $label$2) + ) + ) + (local.set $2 + (block $label$7 (result i32) + (if (result i32) + (i32.gt_s + (i32.load8_s offset=75 + (local.get $2) + ) + (i32.const -1) + ) + (block (result i32) + (local.set $3 + (local.get $1) + ) + (loop $label$9 + (drop + (br_if $label$7 + (i32.const 0) + (i32.eqz + (local.get $3) + ) + ) + ) + (if + (i32.ne + (i32.load8_s + (i32.add + (local.get $0) + (local.tee $6 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + ) + ) + (i32.const 10) + ) + (block + (local.set $3 + (local.get $6) + ) + (br $label$9) + ) + ) + ) + (br_if $label$2 + (i32.lt_u + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $3) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.load + (local.get $5) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $3) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (local.get $3) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + ) + (drop + (call $42 + (local.get $4) + (local.get $0) + (local.get $1) + ) + ) + (i32.store + (local.get $5) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $1) + ) + ) + (local.set $3 + (i32.add + (local.get $2) + (local.get $1) + ) + ) + ) + (local.get $3) + ) + ) + (func $22 (; 35 ;) (type $8) (param $0 i32) (param $1 i32) (param $2 i32) + (local $3 i32) + (local $4 i64) + (local $5 f64) + (block $label$1 + (if + (i32.le_u + (local.get $1) + (i32.const 20) + ) + (block $label$3 + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (block $label$12 + (block $label$13 + (br_table $label$13 $label$12 $label$11 $label$10 $label$9 $label$8 $label$7 $label$6 $label$5 $label$4 $label$3 + (i32.sub + (local.get $1) + (i32.const 9) + ) + ) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $4 + (i64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (i64.store + (local.get $0) + (local.get $4) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 65535) + ) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 65535) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 255) + ) + ) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + ) + ) + ) + ) + (func $23 (; 36 ;) (type $9) (param $0 i64) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i64) + (block $label$1 (result i32) + (local.set $2 + (i32.wrap_i64 + (local.get $0) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 4294967295) + ) + (block + (loop $label$3 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.rem_u + (local.get $0) + (i64.const 10) + ) + (i64.const 48) + ) + ) + (local.set $4 + (i64.div_u + (local.get $0) + (i64.const 10) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 42949672959) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$3) + ) + ) + ) + (local.set $2 + (i32.wrap_i64 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $2) + (loop $label$6 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.rem_u + (local.get $2) + (i32.const 10) + ) + (i32.const 48) + ) + ) + (local.set $3 + (i32.div_u + (local.get $2) + (i32.const 10) + ) + ) + (if + (i32.ge_u + (local.get $2) + (i32.const 10) + ) + (block + (local.set $2 + (local.get $3) + ) + (br $label$6) + ) + ) + ) + ) + (local.get $1) + ) + ) + (func $24 (; 37 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (br_if $label$4 + (i32.eq + (i32.load8_u + (i32.add + (local.get $1) + (i32.const 1689) + ) + ) + (local.get $0) + ) + ) + (br_if $label$5 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 87) + ) + ) + (local.set $1 + (i32.const 87) + ) + (local.set $0 + (i32.const 1777) + ) + (br $label$3) + ) + ) + (if + (local.get $1) + (block + (local.set $0 + (i32.const 1777) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 1777) + ) + ) + (br $label$2) + ) + (loop $label$8 + (local.set $2 + (local.get $0) + ) + (loop $label$9 + (local.set $0 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (if + (i32.load8_s + (local.get $2) + ) + (block + (local.set $2 + (local.get $0) + ) + (br $label$9) + ) + ) + ) + (br_if $label$8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $25 (; 38 ;) (type $10) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 + (local.set $7 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 256) + ) + ) + (local.set $6 + (local.get $7) + ) + (block $label$2 + (if + (i32.and + (i32.gt_s + (local.get $2) + (local.get $3) + ) + (i32.eqz + (i32.and + (local.get $4) + (i32.const 73728) + ) + ) + ) + (block + (drop + (call $41 + (local.get $6) + (local.get $1) + (if (result i32) + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + (i32.const 256) + ) + (i32.const 256) + (local.get $5) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 255) + ) + (block + (loop $label$7 + (if + (local.get $4) + (block + (drop + (call $21 + (local.get $6) + (i32.const 256) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + ) + (br_if $label$7 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -256) + ) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + (local.set $5 + (i32.and + (i32.sub + (local.get $2) + (local.get $3) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + ) + (drop + (call $21 + (local.get $6) + (local.get $5) + (local.get $0) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $7) + ) + ) + ) + (func $26 (; 39 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (if (result i32) + (local.get $0) + (call $29 + (local.get $0) + (local.get $1) + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $27 (; 40 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (call $28 + (local.get $0) + (local.get $1) + ) + ) + (func $28 (; 41 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (local $2 i64) + (local $3 i64) + (block $label$1 (result f64) + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_table $label$5 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$4 $label$3 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (i32.and + (i32.wrap_i64 + (local.tee $3 + (i64.shr_u + (local.tee $2 + (i64.reinterpret_f64 + (local.get $0) + ) + ) + (i64.const 52) + ) + ) + ) + (i32.const 65535) + ) + (i32.const 2047) + ) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (local.get $1) + (if (result i32) + (f64.ne + (local.get $0) + (f64.const 0) + ) + (block (result i32) + (local.set $0 + (call $28 + (f64.mul + (local.get $0) + (f64.const 18446744073709551615) + ) + (local.get $1) + ) + ) + (i32.add + (i32.load + (local.get $1) + ) + (i32.const -64) + ) + ) + (i32.const 0) + ) + ) + (br $label$2) + ) + (br $label$2) + ) + (i32.store + (local.get $1) + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $3) + ) + (i32.const 2047) + ) + (i32.const -1022) + ) + ) + (local.set $0 + (f64.reinterpret_i64 + (i64.or + (i64.and + (local.get $2) + (i64.const -9218868437227405313) + ) + (i64.const 4602678819172646912) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $29 (; 42 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (if (result i32) + (local.get $0) + (block (result i32) + (if + (i32.lt_u + (local.get $1) + (i32.const 128) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (br $label$1 + (i32.const 1) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.const 2048) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 192) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 2) + ) + ) + ) + (if + (i32.or + (i32.lt_u + (local.get $1) + (i32.const 55296) + ) + (i32.eq + (i32.and + (local.get $1) + (i32.const -8192) + ) + (i32.const 57344) + ) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 224) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 3) + ) + ) + ) + (if (result i32) + (i32.lt_u + (i32.add + (local.get $1) + (i32.const -65536) + ) + (i32.const 1048576) + ) + (block (result i32) + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 18) + ) + (i32.const 240) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=3 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.const 4) + ) + (block (result i32) + (i32.store + (call $12) + (i32.const 84) + ) + (i32.const -1) + ) + ) + ) + (i32.const 1) + ) + ) + ) + (func $30 (; 43 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.load8_s + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 74) + ) + ) + ) + ) + (i32.store8 + (local.get $2) + (i32.or + (i32.add + (local.get $1) + (i32.const 255) + ) + (local.get $1) + ) + ) + (local.tee $0 + (if (result i32) + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.or + (local.get $1) + (i32.const 32) + ) + ) + (i32.const -1) + ) + (block (result i32) + (i32.store offset=8 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=28 + (local.get $0) + (local.tee $1 + (i32.load offset=44 + (local.get $0) + ) + ) + ) + (i32.store offset=20 + (local.get $0) + (local.get $1) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.get $1) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.const 0) + ) + ) + ) + ) + ) + (func $31 (; 44 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.eqz + (i32.and + (local.tee $2 + (local.get $0) + ) + (i32.const 3) + ) + ) + ) + (local.set $1 + (local.get $2) + ) + (loop $label$4 + (if + (i32.eqz + (i32.load8_s + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $1) + ) + (br $label$2) + ) + ) + (br_if $label$4 + (i32.and + (local.tee $1 + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 3) + ) + ) + (br $label$3) + ) + ) + (loop $label$6 + (local.set $1 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.and + (i32.xor + (i32.and + (local.tee $3 + (i32.load + (local.get $0) + ) + ) + (i32.const -2139062144) + ) + (i32.const -2139062144) + ) + (i32.add + (local.get $3) + (i32.const -16843009) + ) + ) + ) + (block + (local.set $0 + (local.get $1) + ) + (br $label$6) + ) + ) + ) + (if + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (loop $label$9 + (br_if $label$9 + (i32.load8_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + ) + ) + ) + ) + (i32.sub + (local.get $0) + (local.get $2) + ) + ) + ) + (func $32 (; 45 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 (result i32) + (local.set $3 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store8 + (local.tee $4 + (local.get $3) + ) + (local.tee $7 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $30 + (local.get $0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $5 + (i32.load + (local.get $2) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (local.tee $6 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (local.get $5) + ) + (if + (i32.ne + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.load8_s offset=75 + (local.get $0) + ) + ) + (block + (i32.store + (local.get $2) + (i32.add + (local.get $6) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $6) + (local.get $7) + ) + (br $label$2) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.eq + (call_indirect (type $0) + (local.get $0) + (local.get $4) + (i32.const 1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (i32.const 1) + ) + (i32.load8_u + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (global.set $global$1 + (local.get $3) + ) + (local.get $1) + ) + ) + (func $33 (; 46 ;) (type $12) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (i32.mul + (local.get $2) + (local.get $1) + ) + ) + (if + (i32.gt_s + (i32.load offset=76 + (local.get $3) + ) + (i32.const -1) + ) + (block + (local.set $5 + (i32.eqz + (call $20 + (local.get $3) + ) + ) + ) + (local.set $0 + (call $21 + (local.get $0) + (local.get $4) + (local.get $3) + ) + ) + (if + (i32.eqz + (local.get $5) + ) + (call $13 + (local.get $3) + ) + ) + ) + (local.set $0 + (call $21 + (local.get $0) + (local.get $4) + (local.get $3) + ) + ) + ) + (if + (i32.ne + (local.get $0) + (local.get $4) + ) + (local.set $2 + (i32.div_u + (local.get $0) + (local.get $1) + ) + ) + ) + (local.get $2) + ) + ) + (func $34 (; 47 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (local.set $2 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $3 + (local.get $2) + ) + (local.get $1) + ) + (local.set $0 + (call $18 + (i32.load + (i32.const 1024) + ) + (local.get $0) + (local.get $3) + ) + ) + (global.set $global$1 + (local.get $2) + ) + (local.get $0) + ) + ) + (func $35 (; 48 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (local.set $2 + (if (result i32) + (i32.gt_s + (i32.load offset=76 + (local.tee $1 + (i32.load + (i32.const 1024) + ) + ) + ) + (i32.const -1) + ) + (call $20 + (local.get $1) + ) + (i32.const 0) + ) + ) + (local.set $0 + (block $label$4 (result i32) + (if (result i32) + (i32.lt_s + (call $36 + (local.get $0) + (local.get $1) + ) + (i32.const 0) + ) + (i32.const 1) + (block (result i32) + (if + (i32.ne + (i32.load8_s offset=75 + (local.get $1) + ) + (i32.const 10) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $3 + (i32.add + (local.get $1) + (i32.const 20) + ) + ) + ) + ) + (i32.load offset=16 + (local.get $1) + ) + ) + (block + (i32.store + (local.get $3) + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.store8 + (local.get $0) + (i32.const 10) + ) + (br $label$4 + (i32.const 0) + ) + ) + ) + ) + (i32.lt_s + (call $32 + (local.get $1) + (i32.const 10) + ) + (i32.const 0) + ) + ) + ) + ) + ) + (if + (local.get $2) + (call $13 + (local.get $1) + ) + ) + (i32.shr_s + (i32.shl + (local.get $0) + (i32.const 31) + ) + (i32.const 31) + ) + ) + ) + (func $36 (; 49 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (i32.add + (call $33 + (local.get $0) + (call $31 + (local.get $0) + ) + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + (func $37 (; 50 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (block $label$1 (result i32) + (local.set $14 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (local.set $18 + (local.get $14) + ) + (block $label$2 + (if + (i32.lt_u + (local.get $0) + (i32.const 245) + ) + (block + (local.set $3 + (i32.and + (i32.add + (local.get $0) + (i32.const 11) + ) + (i32.const -8) + ) + ) + (if + (i32.and + (local.tee $0 + (i32.shr_u + (local.tee $8 + (i32.load + (i32.const 3636) + ) + ) + (local.tee $2 + (i32.shr_u + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 11) + ) + (local.tee $3 + (i32.const 16) + ) + (local.get $3) + ) + (i32.const 3) + ) + ) + ) + ) + (i32.const 3) + ) + (block + (local.set $4 + (i32.load + (local.tee $1 + (i32.add + (local.tee $7 + (i32.load + (local.tee $3 + (i32.add + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $5 + (i32.add + (i32.xor + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $4) + ) + (i32.store + (i32.const 3636) + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $5) + ) + (i32.const -1) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + ) + (local.get $7) + ) + (block + (i32.store + (local.get $0) + (local.get $2) + ) + (i32.store + (local.get $3) + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=4 + (local.get $7) + (i32.or + (local.tee $0 + (i32.shl + (local.get $5) + (i32.const 3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $7) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $1) + ) + ) + ) + (if + (i32.gt_u + (local.get $3) + (local.tee $16 + (i32.load + (i32.const 3644) + ) + ) + ) + (block + (if + (local.get $0) + (block + (local.set $5 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.tee $0 + (i32.and + (i32.shl + (local.get $0) + (local.get $2) + ) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $2) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $12 + (i32.load + (local.tee $5 + (i32.add + (local.tee $9 + (i32.load + (local.tee $2 + (i32.add + (local.tee $4 + (i32.add + (i32.shl + (i32.shl + (local.tee $11 + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $0) + (local.get $5) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $4) + (local.get $12) + ) + (i32.store + (i32.const 3636) + (local.tee $7 + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $11) + ) + (i32.const -1) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $12) + (i32.const 12) + ) + ) + ) + (local.get $9) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store + (local.get $2) + (local.get $12) + ) + (local.set $7 + (local.get $8) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=4 + (local.get $9) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.tee $4 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.or + (local.tee $11 + (i32.sub + (i32.shl + (local.get $11) + (i32.const 3) + ) + (local.get $3) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $4) + (local.get $11) + ) + (local.get $11) + ) + (if + (local.get $16) + (block + (local.set $9 + (i32.load + (i32.const 3656) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (if + (i32.and + (local.get $7) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (local.set $6 + (local.get $3) + ) + (local.set $1 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3636) + (i32.or + (local.get $7) + (local.get $0) + ) + ) + (local.set $6 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $1) + (local.get $9) + ) + (i32.store offset=8 + (local.get $9) + (local.get $1) + ) + (i32.store offset=12 + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3644) + (local.get $11) + ) + (i32.store + (i32.const 3656) + (local.get $4) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $5) + ) + ) + ) + (if + (local.tee $6 + (i32.load + (i32.const 3640) + ) + ) + (block + (local.set $2 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $6) + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $9 + (i32.sub + (i32.and + (i32.load offset=4 + (local.tee $2 + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $0) + (local.get $2) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $2) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $1 + (local.get $2) + ) + (loop $label$25 + (block $label$26 + (if + (i32.eqz + (local.tee $0 + (i32.load offset=16 + (local.get $1) + ) + ) + ) + (br_if $label$26 + (i32.eqz + (local.tee $0 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + ) + ) + (if + (local.tee $7 + (i32.lt_u + (local.tee $1 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.get $9) + ) + ) + (local.set $9 + (local.get $1) + ) + ) + (local.set $1 + (local.get $0) + ) + (if + (local.get $7) + (local.set $2 + (local.get $0) + ) + ) + (br $label$25) + ) + ) + (if + (i32.lt_u + (local.get $2) + (local.tee $12 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.ge_u + (local.get $2) + (local.tee $13 + (i32.add + (local.get $2) + (local.get $3) + ) + ) + ) + (call $fimport$10) + ) + (local.set $15 + (i32.load offset=24 + (local.get $2) + ) + ) + (block $label$32 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $2) + ) + ) + (local.get $2) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (br $label$32) + ) + ) + ) + (loop $label$36 + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $4 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $11 + (i32.load offset=8 + (local.get $2) + ) + ) + (local.get $12) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 12) + ) + ) + ) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $2) + ) + (block + (i32.store + (local.get $7) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $11) + ) + (local.set $4 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (block $label$46 + (if + (local.get $15) + (block + (if + (i32.eq + (local.get $2) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $2) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (if + (i32.eqz + (local.get $4) + ) + (block + (i32.store + (i32.const 3640) + (i32.and + (local.get $6) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$46) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $15) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $15) + (i32.const 16) + ) + ) + ) + (local.get $2) + ) + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store offset=20 + (local.get $15) + (local.get $4) + ) + ) + (br_if $label$46 + (i32.eqz + (local.get $4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.tee $0 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $4) + (local.get $15) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $4) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $4) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $4) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $4) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.tee $0 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $9) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $13) + (local.get $9) + ) + (local.get $9) + ) + (if + (local.get $16) + (block + (local.set $7 + (i32.load + (i32.const 3656) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (if + (i32.and + (local.get $8) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (local.set $10 + (local.get $1) + ) + (local.set $5 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3636) + (i32.or + (local.get $8) + (local.get $0) + ) + ) + (local.set $10 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $5 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $10) + (local.get $7) + ) + (i32.store offset=12 + (local.get $5) + (local.get $7) + ) + (i32.store offset=8 + (local.get $7) + (local.get $5) + ) + (i32.store offset=12 + (local.get $7) + (local.get $3) + ) + ) + ) + (i32.store + (i32.const 3644) + (local.get $9) + ) + (i32.store + (i32.const 3656) + (local.get $13) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $0) + (i32.const -65) + ) + (local.set $0 + (i32.const -1) + ) + (block + (local.set $7 + (i32.and + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 11) + ) + ) + (i32.const -8) + ) + ) + (if + (local.tee $5 + (i32.load + (i32.const 3640) + ) + ) + (block + (local.set $17 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $0) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (local.set $3 + (i32.sub + (i32.const 0) + (local.get $7) + ) + ) + (block $label$78 + (block $label$79 + (block $label$80 + (if + (local.tee $1 + (i32.load + (i32.add + (i32.shl + (local.get $17) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + (block + (local.set $0 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $17) + (i32.const 1) + ) + ) + ) + (local.set $4 + (i32.const 0) + ) + (local.set $10 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $17) + (i32.const 31) + ) + (i32.const 0) + (local.get $0) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$84 + (if + (i32.lt_u + (local.tee $6 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + (if + (local.get $6) + (block + (local.set $3 + (local.get $6) + ) + (local.set $0 + (local.get $1) + ) + ) + (block + (local.set $3 + (i32.const 0) + ) + (local.set $0 + (local.get $1) + ) + (br $label$79) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.or + (i32.eqz + (local.tee $19 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + (i32.eq + (local.get $19) + (local.tee $6 + (i32.load + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $10) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + (local.get $4) + (local.get $19) + ) + ) + (local.set $10 + (i32.shl + (local.get $10) + (i32.xor + (i32.and + (local.tee $4 + (i32.eqz + (local.get $6) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (if + (local.get $4) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $0) + ) + (br $label$80) + ) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $6) + ) + (br $label$84) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (local.set $1 + (i32.const 0) + ) + ) + ) + ) + (br_if $label$79 + (local.tee $0 + (if (result i32) + (i32.and + (i32.eqz + (local.get $4) + ) + (i32.eqz + (local.get $1) + ) + ) + (block (result i32) + (if + (i32.eqz + (local.tee $0 + (i32.and + (local.get $5) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $17) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (br $label$2) + ) + ) + (local.set $10 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $0) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $0) + (local.get $10) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $10) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + (local.get $4) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + (br $label$78) + ) + (loop $label$96 + (if + (local.tee $10 + (i32.lt_u + (local.tee $4 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + ) + (local.set $3 + (local.get $4) + ) + ) + (if + (local.get $10) + (local.set $1 + (local.get $0) + ) + ) + (if + (local.tee $4 + (i32.load offset=16 + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$96) + ) + ) + (br_if $label$96 + (local.tee $0 + (i32.load offset=20 + (local.get $0) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + ) + ) + (if + (local.get $4) + (if + (i32.lt_u + (local.get $3) + (i32.sub + (i32.load + (i32.const 3644) + ) + (local.get $7) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.tee $12 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.ge_u + (local.get $4) + (local.tee $6 + (i32.add + (local.get $4) + (local.get $7) + ) + ) + ) + (call $fimport$10) + ) + (local.set $10 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$104 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (br $label$104) + ) + ) + ) + (loop $label$108 + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $9 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $12) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $11 + (i32.add + (local.get $9) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $11) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $9) + ) + (local.set $13 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (block $label$118 + (if + (local.get $10) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $13) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (i32.store + (i32.const 3640) + (local.tee $2 + (i32.and + (local.get $5) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + ) + (br $label$118) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $10) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $10) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $13) + ) + (i32.store offset=20 + (local.get $10) + (local.get $13) + ) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (local.set $2 + (local.get $5) + ) + (br $label$118) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $13) + (local.tee $0 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $13) + (local.get $10) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $13) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $13) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $13) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $13) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (block $label$136 + (if + (i32.lt_u + (local.get $3) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.tee $0 + (i32.add + (local.get $3) + (local.get $7) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $4) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $7) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $3) + ) + (local.get $3) + ) + (local.set $0 + (i32.shr_u + (local.get $3) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $3) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 3636) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (local.set $16 + (local.get $1) + ) + (local.set $8 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3636) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $16 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $8 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $16) + (local.get $6) + ) + (i32.store offset=12 + (local.get $8) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $8) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$136) + ) + ) + (local.set $1 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $3) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $3) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $3) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $5 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $5) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.get $2) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3640) + (i32.or + (local.get $2) + (local.get $0) + ) + ) + (i32.store + (local.get $1) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $1) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (local.set $0 + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $3) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$151 + (block $label$152 + (block $label$153 + (loop $label$154 + (br_if $label$152 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$153 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $0 + (local.get $1) + ) + (br $label$154) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (br $label$151) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 3652) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.tee $1 + (i32.load + (i32.const 3644) + ) + ) + (local.get $0) + ) + (block + (local.set $2 + (i32.load + (i32.const 3656) + ) + ) + (if + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + (i32.const 15) + ) + (block + (i32.store + (i32.const 3656) + (local.tee $1 + (i32.add + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3644) + (local.get $3) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $1) + (local.get $3) + ) + (local.get $3) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + ) + (block + (i32.store + (i32.const 3644) + (i32.const 0) + ) + (i32.store + (i32.const 3656) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $10 + (i32.load + (i32.const 3648) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 3648) + (local.tee $3 + (i32.sub + (local.get $10) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 3660) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.le_u + (local.tee $6 + (i32.and + (local.tee $8 + (i32.add + (local.tee $1 + (if (result i32) + (i32.load + (i32.const 4108) + ) + (i32.load + (i32.const 4116) + ) + (block (result i32) + (i32.store + (i32.const 4116) + (i32.const 4096) + ) + (i32.store + (i32.const 4112) + (i32.const 4096) + ) + (i32.store + (i32.const 4120) + (i32.const -1) + ) + (i32.store + (i32.const 4124) + (i32.const -1) + ) + (i32.store + (i32.const 4128) + (i32.const 0) + ) + (i32.store + (i32.const 4080) + (i32.const 0) + ) + (i32.store + (local.get $18) + (local.tee $1 + (i32.xor + (i32.and + (local.get $18) + (i32.const -16) + ) + (i32.const 1431655768) + ) + ) + ) + (i32.store + (i32.const 4108) + (local.get $1) + ) + (i32.const 4096) + ) + ) + ) + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 47) + ) + ) + ) + ) + (local.tee $4 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + ) + ) + (local.get $0) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + (if + (local.tee $2 + (i32.load + (i32.const 4076) + ) + ) + (if + (i32.or + (i32.le_u + (local.tee $1 + (i32.add + (local.tee $3 + (i32.load + (i32.const 4068) + ) + ) + (local.get $6) + ) + ) + (local.get $3) + ) + (i32.gt_u + (local.get $1) + (local.get $2) + ) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + (block $label$171 + (block $label$172 + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 4080) + ) + (i32.const 4) + ) + ) + (block + (block $label$174 + (block $label$175 + (block $label$176 + (br_if $label$176 + (i32.eqz + (local.tee $3 + (i32.load + (i32.const 3660) + ) + ) + ) + ) + (local.set $2 + (i32.const 4084) + ) + (loop $label$177 + (block $label$178 + (if + (i32.le_u + (local.tee $1 + (i32.load + (local.get $2) + ) + ) + (local.get $3) + ) + (br_if $label$178 + (i32.gt_u + (i32.add + (local.get $1) + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $3) + ) + ) + ) + (br_if $label$176 + (i32.eqz + (local.tee $1 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (br $label$177) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.sub + (local.get $8) + (local.get $10) + ) + (local.get $4) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (local.tee $1 + (call $40 + (local.get $3) + ) + ) + (i32.add + (i32.load + (local.get $2) + ) + (i32.load + (local.get $5) + ) + ) + ) + (br_if $label$172 + (i32.ne + (local.get $1) + (i32.const -1) + ) + ) + (block + (local.set $2 + (local.get $1) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + (br $label$174) + ) + (if + (i32.ne + (local.tee $1 + (call $40 + (i32.const 0) + ) + ) + (i32.const -1) + ) + (block + (local.set $2 + (i32.sub + (i32.and + (i32.add + (local.tee $5 + (i32.add + (local.tee $2 + (i32.load + (i32.const 4112) + ) + ) + (i32.const -1) + ) + ) + (local.tee $3 + (local.get $1) + ) + ) + (i32.sub + (i32.const 0) + (local.get $2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.add + (local.tee $3 + (i32.add + (if (result i32) + (i32.and + (local.get $5) + (local.get $3) + ) + (local.get $2) + (i32.const 0) + ) + (local.get $6) + ) + ) + (local.tee $5 + (i32.load + (i32.const 4068) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $3) + (local.get $0) + ) + (i32.lt_u + (local.get $3) + (i32.const 2147483647) + ) + ) + (block + (if + (local.tee $2 + (i32.load + (i32.const 4076) + ) + ) + (br_if $label$174 + (i32.or + (i32.le_u + (local.get $4) + (local.get $5) + ) + (i32.gt_u + (local.get $4) + (local.get $2) + ) + ) + ) + ) + (br_if $label$172 + (i32.eq + (local.tee $2 + (call $40 + (local.get $3) + ) + ) + (local.get $1) + ) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + ) + (br $label$174) + ) + (local.set $5 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $7) + (local.get $1) + ) + (i32.and + (i32.lt_u + (local.get $1) + (i32.const 2147483647) + ) + (i32.ne + (local.get $2) + (i32.const -1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.add + (i32.sub + (local.get $13) + (local.get $1) + ) + (local.tee $3 + (i32.load + (i32.const 4116) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $3) + ) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (call $40 + (local.get $3) + ) + (i32.const -1) + ) + (block + (drop + (call $40 + (local.get $5) + ) + ) + (br $label$174) + ) + (local.set $3 + (i32.add + (local.get $3) + (local.get $1) + ) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (if + (i32.ne + (local.get $2) + (i32.const -1) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$172) + ) + ) + ) + (i32.store + (i32.const 4080) + (i32.or + (i32.load + (i32.const 4080) + ) + (i32.const 4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $6) + (i32.const 2147483647) + ) + (if + (i32.and + (i32.lt_u + (local.tee $1 + (call $40 + (local.get $6) + ) + ) + (local.tee $3 + (call $40 + (i32.const 0) + ) + ) + ) + (i32.and + (i32.ne + (local.get $1) + (i32.const -1) + ) + (i32.ne + (local.get $3) + (i32.const -1) + ) + ) + ) + (br_if $label$172 + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $3) + (local.get $1) + ) + ) + (i32.add + (local.get $0) + (i32.const 40) + ) + ) + ) + ) + ) + (br $label$171) + ) + (i32.store + (i32.const 4068) + (local.tee $2 + (i32.add + (i32.load + (i32.const 4068) + ) + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $2) + (i32.load + (i32.const 4072) + ) + ) + (i32.store + (i32.const 4072) + (local.get $2) + ) + ) + (block $label$198 + (if + (local.tee $8 + (i32.load + (i32.const 3660) + ) + ) + (block + (local.set $2 + (i32.const 4084) + ) + (block $label$200 + (block $label$201 + (loop $label$202 + (br_if $label$201 + (i32.eq + (local.get $1) + (i32.add + (local.tee $4 + (i32.load + (local.get $2) + ) + ) + (local.tee $5 + (i32.load + (local.tee $7 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + ) + ) + ) + (br_if $label$202 + (local.tee $2 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (br $label$200) + ) + (if + (i32.eqz + (i32.and + (i32.load offset=12 + (local.get $2) + ) + (i32.const 8) + ) + ) + (if + (i32.and + (i32.lt_u + (local.get $8) + (local.get $1) + ) + (i32.ge_u + (local.get $8) + (local.get $4) + ) + ) + (block + (i32.store + (local.get $7) + (i32.add + (local.get $5) + (local.get $3) + ) + ) + (local.set $5 + (i32.load + (i32.const 3648) + ) + ) + (local.set $1 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $8) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3660) + (local.tee $2 + (i32.add + (local.get $8) + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $1) + (local.tee $1 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3648) + (local.tee $1 + (i32.add + (i32.sub + (local.get $3) + (local.get $1) + ) + (local.get $5) + ) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3664) + (i32.load + (i32.const 4124) + ) + ) + (br $label$198) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.tee $2 + (i32.load + (i32.const 3652) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (local.get $1) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (local.set $10 + (i32.add + (local.get $1) + (local.get $3) + ) + ) + (local.set $5 + (i32.const 4084) + ) + (block $label$208 + (block $label$209 + (loop $label$210 + (br_if $label$209 + (i32.eq + (i32.load + (local.get $5) + ) + (local.get $10) + ) + ) + (br_if $label$210 + (local.tee $5 + (i32.load offset=8 + (local.get $5) + ) + ) + ) + (local.set $5 + (i32.const 4084) + ) + ) + (br $label$208) + ) + (if + (i32.and + (i32.load offset=12 + (local.get $5) + ) + (i32.const 8) + ) + (local.set $5 + (i32.const 4084) + ) + (block + (i32.store + (local.get $5) + (local.get $1) + ) + (i32.store + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $3) + ) + ) + (local.set $7 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.get $10) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $6 + (i32.add + (local.tee $13 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $7) + (i32.const 0) + ) + ) + ) + (local.get $0) + ) + ) + (local.set $7 + (i32.sub + (i32.sub + (local.tee $4 + (i32.add + (local.get $10) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + (local.get $13) + ) + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (block $label$217 + (if + (i32.eq + (local.get $4) + (local.get $8) + ) + (block + (i32.store + (i32.const 3648) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3648) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + ) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (i32.const 3656) + ) + ) + (block + (i32.store + (i32.const 3644) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3644) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 3656) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $0) + ) + (local.get $0) + ) + (br $label$217) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (local.tee $0 + (if (result i32) + (i32.eq + (i32.and + (local.tee $0 + (i32.load offset=4 + (local.get $4) + ) + ) + (i32.const 3) + ) + (i32.const 1) + ) + (block (result i32) + (local.set $11 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$222 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $5 + (i32.load offset=12 + (local.get $4) + ) + ) + (block $label$224 + (if + (i32.ne + (local.tee $3 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $2) + ) + (call $fimport$10) + ) + (br_if $label$224 + (i32.eq + (i32.load offset=12 + (local.get $3) + ) + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $5) + (local.get $3) + ) + (block + (i32.store + (i32.const 3636) + (i32.and + (i32.load + (i32.const 3636) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + ) + (block $label$228 + (if + (i32.eq + (local.get $5) + (local.get $0) + ) + (local.set $20 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $5) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (local.set $20 + (local.get $0) + ) + (br $label$228) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $3) + (local.get $5) + ) + (i32.store + (local.get $20) + (local.get $3) + ) + ) + (block + (local.set $8 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$234 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (local.get $3) + ) + (block + (local.set $12 + (i32.const 0) + ) + (br $label$234) + ) + ) + ) + (loop $label$239 + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $2) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $12 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $3 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $3) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $5) + ) + (local.set $12 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $8) + ) + ) + (block $label$249 + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $12) + ) + (br_if $label$249 + (local.get $12) + ) + (i32.store + (i32.const 3640) + (i32.and + (i32.load + (i32.const 3640) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + (block + (if + (i32.lt_u + (local.get $8) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $12) + ) + (i32.store offset=20 + (local.get $8) + (local.get $12) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $12) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $12) + (local.tee $1 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $12) + (local.get $8) + ) + (if + (local.tee $3 + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $3) + (local.get $1) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $12) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $12) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.tee $0 + (i32.load offset=4 + (local.get $0) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $12) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $12) + ) + ) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $11) + (local.get $7) + ) + ) + (i32.add + (local.get $4) + (local.get $11) + ) + ) + (local.get $4) + ) + ) + (i32.const 4) + ) + ) + (i32.and + (i32.load + (local.get $0) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $7) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $7) + ) + (local.get $7) + ) + (local.set $0 + (i32.shr_u + (local.get $7) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $7) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (block $label$263 + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 3636) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (if + (i32.ge_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (block + (local.set $21 + (local.get $1) + ) + (local.set $9 + (local.get $0) + ) + (br $label$263) + ) + ) + (call $fimport$10) + ) + (block + (i32.store + (i32.const 3636) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $21 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $9 + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $21) + (local.get $6) + ) + (i32.store offset=12 + (local.get $9) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$217) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (local.tee $2 + (block $label$267 (result i32) + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $7) + (i32.const 8) + ) + ) + (block (result i32) + (drop + (br_if $label$267 + (i32.const 31) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + ) + ) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $2) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (i32.const 3640) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $2) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3640) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $3) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (local.set $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $2) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $2) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$273 + (block $label$274 + (block $label$275 + (loop $label$276 + (br_if $label$274 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.set $3 + (i32.shl + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$275 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $2) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $2 + (local.get $3) + ) + (local.set $0 + (local.get $1) + ) + (br $label$276) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $2) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (br $label$273) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 3652) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $13) + (i32.const 8) + ) + ) + ) + ) + ) + (loop $label$281 + (block $label$282 + (if + (i32.le_u + (local.tee $2 + (i32.load + (local.get $5) + ) + ) + (local.get $8) + ) + (br_if $label$282 + (i32.gt_u + (local.tee $13 + (i32.add + (local.get $2) + (i32.load offset=4 + (local.get $5) + ) + ) + ) + (local.get $8) + ) + ) + ) + (local.set $5 + (i32.load offset=8 + (local.get $5) + ) + ) + (br $label$281) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.tee $7 + (i32.add + (local.get $13) + (i32.const -47) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $10 + (i32.add + (local.tee $7 + (if (result i32) + (i32.lt_u + (local.tee $2 + (i32.add + (local.get $7) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $2) + (i32.const 0) + ) + ) + ) + (local.tee $12 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $8) + (local.get $2) + ) + ) + (i32.const 8) + ) + ) + (local.set $5 + (i32.add + (local.get $7) + (i32.const 24) + ) + ) + (local.set $9 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3660) + (local.tee $4 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $2) + (local.tee $2 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3648) + (local.tee $2 + (i32.sub + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $4) + (local.get $2) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3664) + (i32.load + (i32.const 4124) + ) + ) + (i32.store + (local.tee $2 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (i32.const 27) + ) + (i64.store align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4084) + ) + ) + (i64.store offset=8 align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4092) + ) + ) + (i32.store + (i32.const 4084) + (local.get $1) + ) + (i32.store + (i32.const 4088) + (local.get $3) + ) + (i32.store + (i32.const 4096) + (i32.const 0) + ) + (i32.store + (i32.const 4092) + (local.get $10) + ) + (local.set $1 + (local.get $5) + ) + (loop $label$290 + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.const 7) + ) + (br_if $label$290 + (i32.lt_u + (i32.add + (local.get $1) + (i32.const 4) + ) + (local.get $13) + ) + ) + ) + (if + (i32.ne + (local.get $7) + (local.get $8) + ) + (block + (i32.store + (local.get $2) + (i32.and + (i32.load + (local.get $2) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $8) + (i32.or + (local.tee $4 + (i32.sub + (local.get $7) + (local.get $8) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (local.get $7) + (local.get $4) + ) + (local.set $1 + (i32.shr_u + (local.get $4) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.const 256) + ) + (block + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (if + (i32.and + (local.tee $3 + (i32.load + (i32.const 3636) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (local.set $15 + (local.get $3) + ) + (local.set $11 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 3636) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $11 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $8) + ) + (i32.store offset=12 + (local.get $11) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $11) + ) + (i32.store offset=12 + (local.get $8) + (local.get $2) + ) + (br $label$198) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $1 + (i32.shr_u + (local.get $4) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $4) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $4) + (i32.add + (local.tee $1 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $1) + (local.tee $2 + (i32.and + (i32.shr_u + (i32.add + (local.get $1) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $2) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $3) + (local.get $1) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $3) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + (i32.store offset=28 + (local.get $8) + (local.get $5) + ) + (i32.store offset=20 + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $12) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $3 + (i32.load + (i32.const 3640) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3640) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $2) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (local.set $1 + (i32.load + (local.get $2) + ) + ) + (local.set $3 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $4) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $3) + ) + ) + ) + (block $label$304 + (block $label$305 + (block $label$306 + (loop $label$307 + (br_if $label$305 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $4) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$306 + (i32.eqz + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $1 + (local.get $3) + ) + (br $label$307) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $1) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (br $label$304) + ) + (if + (i32.and + (i32.ge_u + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + ) + (local.tee $3 + (i32.load + (i32.const 3652) + ) + ) + ) + (i32.ge_u + (local.get $1) + (local.get $3) + ) + ) + (block + (i32.store offset=12 + (local.get $5) + (local.get $8) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $5) + ) + (i32.store offset=12 + (local.get $8) + (local.get $1) + ) + (i32.store offset=24 + (local.get $8) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (block + (if + (i32.or + (i32.eqz + (local.tee $2 + (i32.load + (i32.const 3652) + ) + ) + ) + (i32.lt_u + (local.get $1) + (local.get $2) + ) + ) + (i32.store + (i32.const 3652) + (local.get $1) + ) + ) + (i32.store + (i32.const 4084) + (local.get $1) + ) + (i32.store + (i32.const 4088) + (local.get $3) + ) + (i32.store + (i32.const 4096) + (i32.const 0) + ) + (i32.store + (i32.const 3672) + (i32.load + (i32.const 4108) + ) + ) + (i32.store + (i32.const 3668) + (i32.const -1) + ) + (local.set $2 + (i32.const 0) + ) + (loop $label$314 + (i32.store offset=12 + (local.tee $5 + (i32.add + (i32.shl + (i32.shl + (local.get $2) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (local.get $5) + ) + (i32.store offset=8 + (local.get $5) + (local.get $5) + ) + (br_if $label$314 + (i32.ne + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 32) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3660) + (local.tee $3 + (i32.add + (local.get $1) + (local.tee $1 + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3648) + (local.tee $1 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $3) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3664) + (i32.load + (i32.const 4124) + ) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $1 + (i32.load + (i32.const 3648) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 3648) + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 3660) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + ) + (i32.store + (call $12) + (i32.const 12) + ) + (global.set $global$1 + (local.get $14) + ) + (i32.const 0) + ) + ) + (func $38 (; 51 ;) (type $2) (param $0 i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (block $label$1 + (if + (i32.eqz + (local.get $0) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.add + (local.get $0) + (i32.const -8) + ) + ) + (local.tee $11 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (local.tee $8 + (i32.and + (local.tee $0 + (i32.load + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + ) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (call $fimport$10) + ) + (local.set $6 + (i32.add + (local.get $1) + (local.tee $4 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + ) + ) + (block $label$5 + (if + (i32.and + (local.get $0) + (i32.const 1) + ) + (block + (local.set $3 + (local.get $1) + ) + (local.set $2 + (local.get $4) + ) + ) + (block + (if + (i32.eqz + (local.get $8) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.add + (local.get $1) + (i32.sub + (i32.const 0) + (local.tee $8 + (i32.load + (local.get $1) + ) + ) + ) + ) + ) + (local.get $11) + ) + (call $fimport$10) + ) + (local.set $1 + (i32.add + (local.get $8) + (local.get $4) + ) + ) + (if + (i32.eq + (local.get $0) + (i32.load + (i32.const 3656) + ) + ) + (block + (if + (i32.ne + (i32.and + (local.tee $3 + (i32.load + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 3) + ) + (i32.const 3) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (i32.store + (i32.const 3644) + (local.get $1) + ) + (i32.store + (local.get $2) + (i32.and + (local.get $3) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $0) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $0) + (local.get $1) + ) + (local.get $1) + ) + (return) + ) + ) + (local.set $10 + (i32.shr_u + (local.get $8) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $8) + (i32.const 256) + ) + (block + (local.set $3 + (i32.load offset=12 + (local.get $0) + ) + ) + (if + (i32.ne + (local.tee $4 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.get $10) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $4) + ) + (local.get $0) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $4) + ) + (block + (i32.store + (i32.const 3636) + (i32.and + (i32.load + (i32.const 3636) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $10) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $2) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $2 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (local.set $5 + (local.get $2) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $4) + (local.get $3) + ) + (i32.store + (local.get $5) + (local.get $4) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (local.set $12 + (i32.load offset=24 + (local.get $0) + ) + ) + (block $label$22 + (if + (i32.eq + (local.tee $4 + (i32.load offset=12 + (local.get $0) + ) + ) + (local.get $0) + ) + (block + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $5 + (local.get $8) + ) + (if + (i32.eqz + (local.tee $4 + (i32.load + (local.get $5) + ) + ) + ) + (block + (local.set $7 + (i32.const 0) + ) + (br $label$22) + ) + ) + ) + (loop $label$27 + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + ) + (if + (i32.lt_u + (local.get $5) + (local.get $11) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (i32.const 0) + ) + (local.set $7 + (local.get $4) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $8 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $0) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $10 + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (block + (i32.store + (local.get $8) + (local.get $4) + ) + (i32.store + (local.get $10) + (local.get $5) + ) + (local.set $7 + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (if + (local.get $12) + (block + (if + (i32.eq + (local.get $0) + (i32.load + (local.tee $5 + (i32.add + (i32.shl + (local.tee $4 + (i32.load offset=28 + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + ) + (block + (i32.store + (local.get $5) + (local.get $7) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (i32.store + (i32.const 3640) + (i32.and + (i32.load + (i32.const 3640) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $12) + (i32.const 16) + ) + ) + ) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $7) + ) + (i32.store offset=20 + (local.get $12) + (local.get $7) + ) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $7) + (local.tee $5 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $7) + (local.get $12) + ) + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.get $5) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + ) + ) + ) + (if + (local.tee $4 + (i32.load offset=4 + (local.get $8) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.get $3) + (local.get $6) + ) + (call $fimport$10) + ) + (if + (i32.eqz + (i32.and + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 1) + ) + ) + (call $fimport$10) + ) + (if + (i32.and + (local.get $0) + (i32.const 2) + ) + (block + (i32.store + (local.get $1) + (i32.and + (local.get $0) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $2) + ) + (local.get $2) + ) + ) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 3660) + ) + ) + (block + (i32.store + (i32.const 3648) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3648) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3660) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (if + (i32.ne + (local.get $3) + (i32.load + (i32.const 3656) + ) + ) + (return) + ) + (i32.store + (i32.const 3656) + (i32.const 0) + ) + (i32.store + (i32.const 3644) + (i32.const 0) + ) + (return) + ) + ) + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 3656) + ) + ) + (block + (i32.store + (i32.const 3644) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3644) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3656) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $0) + ) + (local.get $0) + ) + (return) + ) + ) + (local.set $5 + (i32.add + (i32.and + (local.get $0) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$61 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $2 + (i32.load offset=12 + (local.get $6) + ) + ) + (if + (i32.ne + (local.tee $1 + (i32.load offset=8 + (local.get $6) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $4) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $1) + ) + (local.get $6) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $1) + ) + (block + (i32.store + (i32.const 3636) + (i32.and + (i32.load + (i32.const 3636) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $0) + ) + (local.set $14 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (local.set $14 + (local.get $0) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $1) + (local.get $2) + ) + (i32.store + (local.get $14) + (local.get $1) + ) + ) + (block + (local.set $7 + (i32.load offset=24 + (local.get $6) + ) + ) + (block $label$73 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $6) + ) + ) + (local.get $6) + ) + (block + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.get $2) + ) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$73) + ) + ) + ) + (loop $label$78 + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $2) + (i32.const 0) + ) + (local.set $9 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $2 + (i32.load offset=8 + (local.get $6) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 12) + ) + ) + ) + (local.get $6) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (block + (i32.store + (local.get $1) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $2) + ) + (local.set $9 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (if + (local.get $7) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (local.tee $2 + (i32.add + (i32.shl + (local.tee $0 + (i32.load offset=28 + (local.get $6) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + ) + ) + (block + (i32.store + (local.get $2) + (local.get $9) + ) + (if + (i32.eqz + (local.get $9) + ) + (block + (i32.store + (i32.const 3640) + (i32.and + (i32.load + (i32.const 3640) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $0) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $7) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $7) + (i32.const 16) + ) + ) + ) + (local.get $6) + ) + (i32.store + (local.get $0) + (local.get $9) + ) + (i32.store offset=20 + (local.get $7) + (local.get $9) + ) + ) + (br_if $label$61 + (i32.eqz + (local.get $9) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (local.tee $2 + (i32.load + (i32.const 3652) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $9) + (local.get $7) + ) + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (local.get $2) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=4 + (local.get $1) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $5) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $5) + ) + (local.get $5) + ) + (if + (i32.eq + (local.get $3) + (i32.load + (i32.const 3656) + ) + ) + (block + (i32.store + (i32.const 3644) + (local.get $5) + ) + (return) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $2) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.const 256) + ) + (block + (local.set $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3676) + ) + ) + (if + (i32.and + (local.tee $2 + (i32.load + (i32.const 3636) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (local.set $15 + (local.get $2) + ) + (local.set $13 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 3636) + (i32.or + (local.get $2) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $3) + ) + (i32.store offset=12 + (local.get $13) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $13) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (return) + ) + ) + (local.set $0 + (i32.add + (i32.shl + (local.tee $1 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $2) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $2) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $2) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $4 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $0) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $0 + (i32.shl + (local.get $1) + (local.get $4) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $0) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3940) + ) + ) + (i32.store offset=28 + (local.get $3) + (local.get $1) + ) + (i32.store offset=20 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=16 + (local.get $3) + (i32.const 0) + ) + (block $label$113 + (if + (i32.and + (local.tee $4 + (i32.load + (i32.const 3640) + ) + ) + (local.tee $5 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (block + (local.set $0 + (i32.load + (local.get $0) + ) + ) + (local.set $4 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $1) + (i32.const 1) + ) + ) + ) + (local.set $1 + (i32.shl + (local.get $2) + (if (result i32) + (i32.eq + (local.get $1) + (i32.const 31) + ) + (i32.const 0) + (local.get $4) + ) + ) + ) + (block $label$117 + (block $label$118 + (block $label$119 + (loop $label$120 + (br_if $label$118 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$119 + (i32.eqz + (local.tee $5 + (i32.load + (local.tee $1 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $1) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $4) + ) + (local.set $0 + (local.get $5) + ) + (br $label$120) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 3652) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + (br $label$113) + ) + ) + (br $label$117) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $4 + (i32.load + (i32.const 3652) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $4) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $3) + ) + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $2) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (i32.store offset=24 + (local.get $3) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + (block + (i32.store + (i32.const 3640) + (i32.or + (local.get $4) + (local.get $5) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + ) + ) + ) + (i32.store + (i32.const 3668) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3668) + ) + (i32.const -1) + ) + ) + ) + (if + (local.get $0) + (return) + (local.set $0 + (i32.const 4092) + ) + ) + (loop $label$128 + (local.set $0 + (i32.add + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + ) + (br_if $label$128 + (local.get $2) + ) + ) + (i32.store + (i32.const 3668) + (i32.const -1) + ) + ) + ) + (func $39 (; 52 ;) (type $6) + (nop) + ) + (func $40 (; 53 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.add + (local.tee $2 + (i32.load + (global.get $global$0) + ) + ) + (local.tee $0 + (i32.and + (i32.add + (local.get $0) + (i32.const 15) + ) + (i32.const -16) + ) + ) + ) + ) + (if + (i32.or + (i32.and + (i32.gt_s + (local.get $0) + (i32.const 0) + ) + (i32.lt_s + (local.get $1) + (local.get $2) + ) + ) + (i32.lt_s + (local.get $1) + (i32.const 0) + ) + ) + (block + (drop + (call $fimport$6) + ) + (call $fimport$11 + (i32.const 12) + ) + (return + (i32.const -1) + ) + ) + ) + (i32.store + (global.get $global$0) + (local.get $1) + ) + (if + (i32.gt_s + (local.get $1) + (call $fimport$5) + ) + (if + (i32.eqz + (call $fimport$4) + ) + (block + (call $fimport$11 + (i32.const 12) + ) + (i32.store + (global.get $global$0) + (local.get $2) + ) + (return + (i32.const -1) + ) + ) + ) + ) + (local.get $2) + ) + ) + (func $41 (; 54 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (if + (i32.ge_s + (local.get $2) + (i32.const 20) + ) + (block + (local.set $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (if + (local.tee $3 + (i32.and + (local.get $0) + (i32.const 3) + ) + ) + (block + (local.set $3 + (i32.sub + (i32.add + (local.get $0) + (i32.const 4) + ) + (local.get $3) + ) + ) + (loop $label$4 + (if + (i32.lt_s + (local.get $0) + (local.get $3) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + ) + ) + (local.set $3 + (i32.or + (i32.or + (i32.or + (local.get $1) + (i32.shl + (local.get $1) + (i32.const 8) + ) + ) + (i32.shl + (local.get $1) + (i32.const 16) + ) + ) + (i32.shl + (local.get $1) + (i32.const 24) + ) + ) + ) + (local.set $5 + (i32.and + (local.get $4) + (i32.const -4) + ) + ) + (loop $label$6 + (if + (i32.lt_s + (local.get $0) + (local.get $5) + ) + (block + (i32.store + (local.get $0) + (local.get $3) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (br $label$6) + ) + ) + ) + ) + ) + (loop $label$8 + (if + (i32.lt_s + (local.get $0) + (local.get $4) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$8) + ) + ) + ) + (i32.sub + (local.get $0) + (local.get $2) + ) + ) + ) + (func $42 (; 55 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (block $label$1 (result i32) + (if + (i32.ge_s + (local.get $2) + (i32.const 4096) + ) + (return + (call $fimport$12 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (if + (i32.eq + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.and + (local.get $1) + (i32.const 3) + ) + ) + (block + (loop $label$4 + (if + (i32.and + (local.get $0) + (i32.const 3) + ) + (block + (if + (i32.eqz + (local.get $2) + ) + (return + (local.get $3) + ) + ) + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + (loop $label$7 + (if + (i32.ge_s + (local.get $2) + (i32.const 4) + ) + (block + (i32.store + (local.get $0) + (i32.load + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 4) + ) + ) + (br $label$7) + ) + ) + ) + ) + ) + (loop $label$9 + (if + (i32.gt_s + (local.get $2) + (i32.const 0) + ) + (block + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$9) + ) + ) + ) + (local.get $3) + ) + ) + (func $43 (; 56 ;) (type $3) (result i32) + (i32.const 0) + ) + (func $44 (; 57 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (call_indirect (type $1) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 0) + ) + ) + ) + (func $45 (; 58 ;) (type $12) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (call_indirect (type $0) + (local.get $1) + (local.get $2) + (local.get $3) + (i32.add + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (func $46 (; 59 ;) (type $5) (param $0 i32) (param $1 i32) + (call_indirect (type $2) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 6) + ) + ) + ) + (func $47 (; 60 ;) (type $1) (param $0 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $48 (; 61 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 1) + ) + (i32.const 0) + ) + ) + (func $49 (; 62 ;) (type $2) (param $0 i32) + (call $fimport$3 + (i32.const 2) + ) + ) +) + diff --git a/cranelift/wasmtests/embenchen_primes.wat b/cranelift/wasmtests/embenchen_primes.wat new file mode 100644 index 0000000000..4f4603198c --- /dev/null +++ b/cranelift/wasmtests/embenchen_primes.wat @@ -0,0 +1,15334 @@ +(module + (type $0 (func (param i32 i32 i32) (result i32))) + (type $1 (func (param i32) (result i32))) + (type $2 (func (param i32))) + (type $3 (func (result i32))) + (type $4 (func (param i32 i32) (result i32))) + (type $5 (func (param i32 i32))) + (type $6 (func)) + (type $7 (func (param i32 i32 i32 i32 i32) (result i32))) + (type $8 (func (param i32 i32 i32))) + (type $9 (func (param i64 i32) (result i32))) + (type $10 (func (param i32 i32 i32 i32 i32))) + (type $11 (func (param f64 i32) (result f64))) + (type $12 (func (param i32 i32 i32 i32) (result i32))) + (import "env" "memory" (memory $16 2048 2048)) + (data (i32.const 1024) "\04\04\00\00\05") + (data (i32.const 1040) "\01") + (data (i32.const 1064) "\01\00\00\00\02\00\00\004\10\00\00\00\04") + (data (i32.const 1088) "\01") + (data (i32.const 1103) "\n\ff\ff\ff\ff") + (data (i32.const 1140) "error: %d\\n\00lastprime: %d.\n\00\11\00\n\00\11\11\11\00\00\00\00\05\00\00\00\00\00\00\t\00\00\00\00\0b") + (data (i32.const 1200) "\11\00\0f\n\11\11\11\03\n\07\00\01\13\t\0b\0b\00\00\t\06\0b\00\00\0b\00\06\11\00\00\00\11\11\11") + (data (i32.const 1249) "\0b") + (data (i32.const 1258) "\11\00\n\n\11\11\11\00\n\00\00\02\00\t\0b\00\00\00\t\00\0b\00\00\0b") + (data (i32.const 1307) "\0c") + (data (i32.const 1319) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c") + (data (i32.const 1365) "\0e") + (data (i32.const 1377) "\0d\00\00\00\04\0d\00\00\00\00\t\0e\00\00\00\00\00\0e\00\00\0e") + (data (i32.const 1423) "\10") + (data (i32.const 1435) "\0f\00\00\00\00\0f\00\00\00\00\t\10\00\00\00\00\00\10\00\00\10\00\00\12\00\00\00\12\12\12") + (data (i32.const 1490) "\12\00\00\00\12\12\12\00\00\00\00\00\00\t") + (data (i32.const 1539) "\0b") + (data (i32.const 1551) "\n\00\00\00\00\n\00\00\00\00\t\0b\00\00\00\00\00\0b\00\00\0b") + (data (i32.const 1597) "\0c") + (data (i32.const 1609) "\0c\00\00\00\00\0c\00\00\00\00\t\0c\00\00\00\00\00\0c\00\00\0c\00\000123456789ABCDEF-+ 0X0x\00(null)\00-0X+0X 0X-0x+0x 0x\00inf\00INF\00nan\00NAN\00.\00T!\"\19\0d\01\02\03\11K\1c\0c\10\04\0b\1d\12\1e\'hnopqb \05\06\0f\13\14\15\1a\08\16\07($\17\18\t\n\0e\1b\1f%#\83\82}&*+<=>?CGJMXYZ[\\]^_`acdefgijklrstyz{|\00Illegal byte sequence\00Domain error\00Result not representable\00Not a tty\00Permission denied\00Operation not permitted\00No such file or directory\00No such process\00File exists\00Value too large for data type\00No space left on device\00Out of memory\00Resource busy\00Interrupted system call\00Resource temporarily unavailable\00Invalid seek\00Cross-device link\00Read-only file system\00Directory not empty\00Connection reset by peer\00Operation timed out\00Connection refused\00Host is down\00Host is unreachable\00Address in use\00Broken pipe\00I/O error\00No such device or address\00Block device required\00No such device\00Not a directory\00Is a directory\00Text file busy\00Exec format error\00Invalid argument\00Argument list too long\00Symbolic link loop\00Filename too long\00Too many open files in system\00No file descriptors available\00Bad file descriptor\00No child process\00Bad address\00File too large\00Too many links\00No locks available\00Resource deadlock would occur\00State not recoverable\00Previous owner died\00Operation canceled\00Function not implemented\00No message of desired type\00Identifier removed\00Device not a stream\00No data available\00Device timeout\00Out of streams resources\00Link has been severed\00Protocol error\00Bad message\00File descriptor in bad state\00Not a socket\00Destination address required\00Message too large\00Protocol wrong type for socket\00Protocol not available\00Protocol not supported\00Socket type not supported\00Not supported\00Protocol family not supported\00Address family not supported by protocol\00Address not available\00Network is down\00Network unreachable\00Connection reset by network\00Connection aborted\00No buffer space available\00Socket is connected\00Socket not connected\00Cannot send after socket shutdown\00Operation already in progress\00Operation in progress\00Stale file handle\00Remote I/O error\00Quota exceeded\00No medium found\00Wrong medium type\00No error information") + (import "env" "table" (table $timport$17 8 8 funcref)) + (elem (global.get $gimport$19) $41 $8 $42 $13 $9 $14 $43 $15) + (import "env" "DYNAMICTOP_PTR" (global $gimport$0 i32)) + (import "env" "STACKTOP" (global $gimport$1 i32)) + (import "env" "STACK_MAX" (global $gimport$2 i32)) + (import "env" "memoryBase" (global $gimport$18 i32)) + (import "env" "tableBase" (global $gimport$19 i32)) + (import "env" "abort" (func $fimport$3 (param i32))) + (import "env" "enlargeMemory" (func $fimport$4 (result i32))) + (import "env" "getTotalMemory" (func $fimport$5 (result i32))) + (import "env" "abortOnCannotGrowMemory" (func $fimport$6 (result i32))) + (import "env" "_pthread_cleanup_pop" (func $fimport$7 (param i32))) + (import "env" "___syscall6" (func $fimport$8 (param i32 i32) (result i32))) + (import "env" "_pthread_cleanup_push" (func $fimport$9 (param i32 i32))) + (import "env" "_abort" (func $fimport$10)) + (import "env" "___setErrNo" (func $fimport$11 (param i32))) + (import "env" "_emscripten_memcpy_big" (func $fimport$12 (param i32 i32 i32) (result i32))) + (import "env" "___syscall54" (func $fimport$13 (param i32 i32) (result i32))) + (import "env" "___syscall140" (func $fimport$14 (param i32 i32) (result i32))) + (import "env" "___syscall146" (func $fimport$15 (param i32 i32) (result i32))) + (global $global$0 (mut i32) (global.get $gimport$0)) + (global $global$1 (mut i32) (global.get $gimport$1)) + (global $global$2 (mut i32) (global.get $gimport$2)) + (global $global$3 (mut i32) (i32.const 0)) + (global $global$4 (mut i32) (i32.const 0)) + (global $global$5 (mut i32) (i32.const 0)) + (export "_sbrk" (func $34)) + (export "_free" (func $32)) + (export "_main" (func $7)) + (export "_pthread_self" (func $37)) + (export "_memset" (func $35)) + (export "_malloc" (func $31)) + (export "_memcpy" (func $36)) + (export "___errno_location" (func $11)) + (export "runPostSets" (func $33)) + (export "stackAlloc" (func $0)) + (export "stackSave" (func $1)) + (export "stackRestore" (func $2)) + (export "establishStackSpace" (func $3)) + (export "setThrew" (func $4)) + (export "setTempRet0" (func $5)) + (export "getTempRet0" (func $6)) + (export "dynCall_ii" (func $38)) + (export "dynCall_iiii" (func $39)) + (export "dynCall_vi" (func $40)) + (func $0 (; 13 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (local.get $0) + ) + ) + (global.set $global$1 + (i32.and + (i32.add + (global.get $global$1) + (i32.const 15) + ) + (i32.const -16) + ) + ) + (local.get $1) + ) + ) + (func $1 (; 14 ;) (type $3) (result i32) + (global.get $global$1) + ) + (func $2 (; 15 ;) (type $2) (param $0 i32) + (global.set $global$1 + (local.get $0) + ) + ) + (func $3 (; 16 ;) (type $5) (param $0 i32) (param $1 i32) + (block $label$1 + (global.set $global$1 + (local.get $0) + ) + (global.set $global$2 + (local.get $1) + ) + ) + ) + (func $4 (; 17 ;) (type $5) (param $0 i32) (param $1 i32) + (if + (i32.eqz + (global.get $global$3) + ) + (block + (global.set $global$3 + (local.get $0) + ) + (global.set $global$4 + (local.get $1) + ) + ) + ) + ) + (func $5 (; 18 ;) (type $2) (param $0 i32) + (global.set $global$5 + (local.get $0) + ) + ) + (func $6 (; 19 ;) (type $3) (result i32) + (global.get $global$5) + ) + (func $7 (; 20 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 f32) + (block $label$1 (result i32) + (local.set $3 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $2 + (local.get $3) + ) + (block $label$2 + (block $label$3 + (br_if $label$3 + (i32.le_s + (local.get $0) + (i32.const 1) + ) + ) + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (br_table $label$5 $label$10 $label$8 $label$9 $label$7 $label$6 $label$4 + (i32.sub + (local.tee $0 + (i32.load8_s + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (i32.const 48) + ) + ) + ) + (local.set $4 + (i32.const 33000) + ) + (br $label$2) + ) + (br $label$3) + ) + (local.set $4 + (i32.const 130000) + ) + (br $label$2) + ) + (local.set $4 + (i32.const 610000) + ) + (br $label$2) + ) + (local.set $4 + (i32.const 1010000) + ) + (br $label$2) + ) + (global.set $global$1 + (local.get $3) + ) + (return + (i32.const 0) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $0) + (i32.const -48) + ) + ) + (drop + (call $30 + (i32.const 1140) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $3) + ) + (return + (i32.const -1) + ) + ) + (local.set $4 + (i32.const 220000) + ) + ) + (local.set $1 + (i32.const 2) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$11 + (block $label$12 + (block $label$13 + (br_if $label$13 + (i32.eqz + (f32.gt + (local.tee $6 + (f32.sqrt + (f32.convert_i32_s + (local.get $1) + ) + ) + ) + (f32.const 2) + ) + ) + ) + (local.set $2 + (i32.const 2) + ) + (loop $label$14 + (br_if $label$12 + (i32.eqz + (i32.rem_s + (local.get $1) + (local.get $2) + ) + ) + ) + (br_if $label$14 + (f32.lt + (f32.convert_i32_s + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + ) + (local.get $6) + ) + ) + (br $label$13) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (if + (i32.lt_s + (local.get $0) + (local.get $4) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$11) + ) + ) + ) + (i32.store + (local.get $5) + (local.get $1) + ) + (drop + (call $30 + (i32.const 1152) + (local.get $5) + ) + ) + (global.set $global$1 + (local.get $3) + ) + (i32.const 0) + ) + ) + (func $8 (; 21 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $2 + (local.get $1) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (local.set $0 + (call $10 + (call $fimport$8 + (i32.const 6) + (local.get $2) + ) + ) + ) + (global.set $global$1 + (local.get $1) + ) + (local.get $0) + ) + ) + (func $9 (; 22 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 32) + ) + ) + (i32.store + (local.tee $3 + (local.get $4) + ) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + (i32.store offset=16 + (local.get $3) + (local.get $2) + ) + (local.set $0 + (if (result i32) + (i32.lt_s + (call $10 + (call $fimport$14 + (i32.const 140) + (local.get $3) + ) + ) + (i32.const 0) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.const -1) + ) + (i32.const -1) + ) + (i32.load + (local.get $0) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $10 (; 23 ;) (type $1) (param $0 i32) (result i32) + (if (result i32) + (i32.gt_u + (local.get $0) + (i32.const -4096) + ) + (block (result i32) + (i32.store + (call $11) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + (local.get $0) + ) + ) + (func $11 (; 24 ;) (type $3) (result i32) + (i32.const 3640) + ) + (func $12 (; 25 ;) (type $2) (param $0 i32) + (nop) + ) + (func $13 (; 26 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 80) + ) + ) + (local.set $3 + (local.get $4) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + (i32.store offset=36 + (local.get $0) + (i32.const 3) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 64) + ) + ) + (block + (i32.store + (local.get $3) + (i32.load offset=60 + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.const 21505) + ) + (i32.store offset=8 + (local.get $3) + (local.get $5) + ) + (if + (call $fimport$13 + (i32.const 54) + (local.get $3) + ) + (i32.store8 offset=75 + (local.get $0) + (i32.const -1) + ) + ) + ) + ) + (local.set $0 + (call $14 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $0) + ) + ) + (func $14 (; 27 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $8 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 48) + ) + ) + (local.set $9 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + (local.set $10 + (local.get $8) + ) + (i32.store + (local.tee $3 + (i32.add + (local.get $8) + (i32.const 32) + ) + ) + (local.tee $4 + (i32.load + (local.tee $6 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (local.tee $5 + (i32.sub + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + (local.get $4) + ) + ) + ) + (i32.store offset=8 + (local.get $3) + (local.get $1) + ) + (i32.store offset=12 + (local.get $3) + (local.get $2) + ) + (local.set $13 + (i32.add + (local.get $0) + (i32.const 60) + ) + ) + (local.set $14 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + (local.set $1 + (local.get $3) + ) + (local.set $4 + (i32.const 2) + ) + (local.set $12 + (i32.add + (local.get $5) + (local.get $2) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (if + (i32.load + (i32.const 3596) + ) + (block + (call $fimport$9 + (i32.const 1) + (local.get $0) + ) + (i32.store + (local.get $10) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $10) + (local.get $1) + ) + (i32.store offset=8 + (local.get $10) + (local.get $4) + ) + (local.set $3 + (call $10 + (call $fimport$15 + (i32.const 146) + (local.get $10) + ) + ) + ) + (call $fimport$7 + (i32.const 0) + ) + ) + (block + (i32.store + (local.get $9) + (i32.load + (local.get $13) + ) + ) + (i32.store offset=4 + (local.get $9) + (local.get $1) + ) + (i32.store offset=8 + (local.get $9) + (local.get $4) + ) + (local.set $3 + (call $10 + (call $fimport$15 + (i32.const 146) + (local.get $9) + ) + ) + ) + ) + ) + (br_if $label$4 + (i32.eq + (local.get $12) + (local.get $3) + ) + ) + (br_if $label$3 + (i32.lt_s + (local.get $3) + (i32.const 0) + ) + ) + (local.set $5 + (if (result i32) + (i32.gt_u + (local.get $3) + (local.tee $5 + (i32.load offset=4 + (local.get $1) + ) + ) + ) + (block (result i32) + (i32.store + (local.get $6) + (local.tee $7 + (i32.load + (local.get $14) + ) + ) + ) + (i32.store + (local.get $11) + (local.get $7) + ) + (local.set $7 + (i32.load offset=12 + (local.get $1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (local.set $4 + (i32.add + (local.get $4) + (i32.const -1) + ) + ) + (i32.sub + (local.get $3) + (local.get $5) + ) + ) + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (block (result i32) + (i32.store + (local.get $6) + (i32.add + (i32.load + (local.get $6) + ) + (local.get $3) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $4 + (i32.const 2) + ) + (local.get $3) + ) + (block (result i32) + (local.set $7 + (local.get $5) + ) + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $1) + (i32.add + (i32.load + (local.get $1) + ) + (local.get $5) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.sub + (local.get $7) + (local.get $5) + ) + ) + (local.set $12 + (i32.sub + (local.get $12) + (local.get $3) + ) + ) + (br $label$5) + ) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.tee $1 + (i32.load + (local.get $14) + ) + ) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $1) + ) + (i32.store + (local.get $11) + (local.get $1) + ) + (br $label$2) + ) + (i32.store offset=16 + (local.get $0) + (i32.const 0) + ) + (i32.store + (local.get $6) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (local.set $2 + (if (result i32) + (i32.eq + (local.get $4) + (i32.const 2) + ) + (i32.const 0) + (i32.sub + (local.get $2) + (i32.load offset=4 + (local.get $1) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $8) + ) + (local.get $2) + ) + ) + (func $15 (; 28 ;) (type $2) (param $0 i32) + (if + (i32.eqz + (i32.load offset=68 + (local.get $0) + ) + ) + (call $12 + (local.get $0) + ) + ) + ) + (func $16 (; 29 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $5 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (if + (i32.and + (local.tee $4 + (i32.ne + (local.get $2) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 0) + ) + ) + (block + (local.set $4 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (loop $label$6 + (if + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $4) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + ) + (br_if $label$6 + (i32.and + (local.tee $0 + (i32.ne + (local.tee $3 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + (i32.const 0) + ) + ) + (i32.ne + (i32.and + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 3) + ) + (i32.const 0) + ) + ) + ) + (br $label$4) + ) + ) + (block + (local.set $3 + (local.get $2) + ) + (local.set $2 + (local.get $0) + ) + (local.set $0 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $0) + (block + (local.set $0 + (local.get $3) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 0) + ) + ) + (br $label$2) + ) + (if + (i32.ne + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.tee $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $3 + (i32.mul + (local.get $5) + (i32.const 16843009) + ) + ) + (block $label$12 + (block $label$13 + (br_if $label$13 + (i32.le_u + (local.get $0) + (i32.const 3) + ) + ) + (loop $label$14 + (if + (i32.eqz + (i32.and + (i32.xor + (i32.and + (local.tee $4 + (i32.xor + (i32.load + (local.get $2) + ) + (local.get $3) + ) + ) + (i32.const -2139062144) + ) + (i32.const -2139062144) + ) + (i32.add + (local.get $4) + (i32.const -16843009) + ) + ) + ) + (block + (local.set $2 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + (br_if $label$14 + (i32.gt_u + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + (i32.const 3) + ) + ) + (br $label$13) + ) + ) + ) + (br $label$12) + ) + (if + (i32.eqz + (local.get $0) + ) + (block + (local.set $0 + (i32.const 0) + ) + (br $label$2) + ) + ) + ) + (loop $label$17 + (br_if $label$2 + (i32.eq + (i32.load8_s + (local.get $2) + ) + (i32.shr_s + (i32.shl + (local.get $1) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$17 + (local.tee $0 + (i32.add + (local.get $0) + (i32.const -1) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + ) + ) + ) + ) + (if (result i32) + (local.get $0) + (local.get $2) + (i32.const 0) + ) + ) + ) + (func $17 (; 30 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (block $label$1 (result i32) + (local.set $4 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 224) + ) + ) + (local.set $5 + (i32.add + (local.get $4) + (i32.const 136) + ) + ) + (i64.store align=4 + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 80) + ) + ) + (i64.const 0) + ) + (i64.store offset=8 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=16 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=24 align=4 + (local.get $3) + (i64.const 0) + ) + (i64.store offset=32 align=4 + (local.get $3) + (i64.const 0) + ) + (i32.store + (local.tee $6 + (i32.add + (local.get $4) + (i32.const 120) + ) + ) + (i32.load + (local.get $2) + ) + ) + (if + (i32.lt_s + (call $18 + (i32.const 0) + (local.get $1) + (local.get $6) + (local.tee $2 + (local.get $4) + ) + (local.get $3) + ) + (i32.const 0) + ) + (local.set $1 + (i32.const -1) + ) + (block + (local.set $12 + (if (result i32) + (i32.gt_s + (i32.load offset=76 + (local.get $0) + ) + (i32.const -1) + ) + (call $19 + (local.get $0) + ) + (i32.const 0) + ) + ) + (local.set $7 + (i32.load + (local.get $0) + ) + ) + (if + (i32.lt_s + (i32.load8_s offset=74 + (local.get $0) + ) + (i32.const 1) + ) + (i32.store + (local.get $0) + (i32.and + (local.get $7) + (i32.const -33) + ) + ) + ) + (if + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + ) + (local.set $1 + (call $18 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (block + (local.set $10 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 44) + ) + ) + ) + ) + (i32.store + (local.get $9) + (local.get $5) + ) + (i32.store + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + (local.get $5) + ) + (i32.store + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + (local.get $5) + ) + (i32.store + (local.get $8) + (i32.const 80) + ) + (i32.store + (local.tee $14 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.add + (local.get $5) + (i32.const 80) + ) + ) + (local.set $1 + (call $18 + (local.get $0) + (local.get $1) + (local.get $6) + (local.get $2) + (local.get $3) + ) + ) + (if + (local.get $10) + (block + (drop + (call_indirect (type $0) + (local.get $0) + (i32.const 0) + (i32.const 0) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $0) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $11) + ) + ) + (local.set $1 + (i32.const -1) + ) + ) + (i32.store + (local.get $9) + (local.get $10) + ) + (i32.store + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $14) + (i32.const 0) + ) + (i32.store + (local.get $13) + (i32.const 0) + ) + (i32.store + (local.get $11) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (local.get $0) + (i32.or + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.and + (local.get $7) + (i32.const 32) + ) + ) + ) + (if + (local.get $12) + (call $12 + (local.get $0) + ) + ) + (if + (i32.and + (local.get $2) + (i32.const 32) + ) + (local.set $1 + (i32.const -1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $4) + ) + (local.get $1) + ) + ) + (func $18 (; 31 ;) (type $7) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) (result i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (local $22 i32) + (local $23 i32) + (local $24 i32) + (local $25 i32) + (local $26 i32) + (local $27 i32) + (local $28 i32) + (local $29 i32) + (local $30 i32) + (local $31 i32) + (local $32 i32) + (local $33 i32) + (local $34 i32) + (local $35 i32) + (local $36 i32) + (local $37 i32) + (local $38 i32) + (local $39 i32) + (local $40 i32) + (local $41 i32) + (local $42 i32) + (local $43 i32) + (local $44 i32) + (local $45 i32) + (local $46 i32) + (local $47 i32) + (local $48 i32) + (local $49 i32) + (local $50 i64) + (local $51 i64) + (local $52 f64) + (local $53 f64) + (block $label$1 (result i32) + (local.set $23 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 624) + ) + ) + (local.set $20 + (i32.add + (local.get $23) + (i32.const 16) + ) + ) + (local.set $16 + (local.get $23) + ) + (local.set $36 + (i32.add + (local.get $23) + (i32.const 528) + ) + ) + (local.set $30 + (i32.ne + (local.get $0) + (i32.const 0) + ) + ) + (local.set $38 + (local.tee $21 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 536) + ) + ) + (i32.const 40) + ) + ) + ) + (local.set $39 + (i32.add + (local.get $17) + (i32.const 39) + ) + ) + (local.set $42 + (i32.add + (local.tee $37 + (i32.add + (local.get $23) + (i32.const 8) + ) + ) + (i32.const 4) + ) + ) + (local.set $43 + (i32.sub + (i32.const 0) + (local.tee $27 + (local.tee $19 + (i32.add + (local.get $23) + (i32.const 588) + ) + ) + ) + ) + ) + (local.set $33 + (i32.add + (local.tee $17 + (i32.add + (local.get $23) + (i32.const 576) + ) + ) + (i32.const 12) + ) + ) + (local.set $40 + (i32.add + (local.get $17) + (i32.const 11) + ) + ) + (local.set $44 + (i32.sub + (local.tee $28 + (local.get $33) + ) + (local.get $27) + ) + ) + (local.set $45 + (i32.sub + (i32.const -2) + (local.get $27) + ) + ) + (local.set $46 + (i32.add + (local.get $28) + (i32.const 2) + ) + ) + (local.set $48 + (i32.add + (local.tee $47 + (i32.add + (local.get $23) + (i32.const 24) + ) + ) + (i32.const 288) + ) + ) + (local.set $41 + (local.tee $31 + (i32.add + (local.get $19) + (i32.const 9) + ) + ) + ) + (local.set $34 + (i32.add + (local.get $19) + (i32.const 8) + ) + ) + (local.set $15 + (i32.const 0) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $17 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (loop $label$4 + (block $label$5 + (if + (i32.gt_s + (local.get $15) + (i32.const -1) + ) + (local.set $15 + (if (result i32) + (i32.gt_s + (local.get $10) + (i32.sub + (i32.const 2147483647) + (local.get $15) + ) + ) + (block (result i32) + (i32.store + (call $11) + (i32.const 75) + ) + (i32.const -1) + ) + (i32.add + (local.get $10) + (local.get $15) + ) + ) + ) + ) + (br_if $label$3 + (i32.eqz + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.get $1) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (local.set $11 + (local.get $1) + ) + (block $label$9 + (block $label$10 + (loop $label$11 + (block $label$12 + (block $label$13 + (block $label$14 + (block $label$15 + (br_table $label$14 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$13 $label$15 $label$13 + (i32.sub + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (local.set $5 + (local.get $11) + ) + (br $label$10) + ) + (local.set $5 + (local.get $11) + ) + (br $label$12) + ) + (local.set $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (br $label$11) + ) + ) + (br $label$9) + ) + (loop $label$16 + (br_if $label$9 + (i32.ne + (i32.load8_s offset=1 + (local.get $5) + ) + (i32.const 37) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + (br_if $label$16 + (i32.eq + (i32.load8_s + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 2) + ) + ) + ) + (i32.const 37) + ) + ) + ) + ) + (local.set $10 + (i32.sub + (local.get $11) + (local.get $1) + ) + ) + (if + (local.get $30) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $1) + (local.get $10) + (local.get $0) + ) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $1 + (local.get $5) + ) + (br $label$4) + ) + ) + (local.set $10 + (if (result i32) + (i32.lt_u + (local.tee $9 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $10 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block (result i32) + (local.set $10 + (i32.add + (local.get $5) + (i32.const 3) + ) + ) + (if + (local.tee $12 + (i32.eq + (i32.load8_s offset=2 + (local.get $5) + ) + (i32.const 36) + ) + ) + (local.set $11 + (local.get $10) + ) + ) + (if + (local.get $12) + (local.set $17 + (i32.const 1) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.get $12) + ) + (local.set $9 + (i32.const -1) + ) + ) + (local.get $17) + ) + (block (result i32) + (local.set $5 + (local.get $10) + ) + (local.set $9 + (i32.const -1) + ) + (local.get $17) + ) + ) + ) + (block $label$25 + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + (block + (local.set $17 + (i32.const 0) + ) + (loop $label$27 + (br_if $label$25 + (i32.eqz + (i32.and + (i32.shl + (i32.const 1) + (local.get $12) + ) + (i32.const 75913) + ) + ) + ) + (local.set $17 + (i32.or + (i32.shl + (i32.const 1) + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (local.get $17) + ) + ) + (br_if $label$27 + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -32) + ) + ) + (i32.const 32) + ) + ) + ) + ) + (local.set $17 + (i32.const 0) + ) + ) + ) + (block $label$29 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (local.set $11 + (block $label$31 (result i32) + (block $label$32 + (br_if $label$32 + (i32.ge_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (br_if $label$32 + (i32.ne + (i32.load8_s offset=2 + (local.get $11) + ) + (i32.const 36) + ) + ) + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $12) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $8 + (i32.const 1) + ) + (local.set $10 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (br $label$31 + (i32.add + (local.get $11) + (i32.const 3) + ) + ) + ) + (if + (local.get $10) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (local.set $10 + (i32.const 0) + ) + (br $label$29) + ) + ) + (local.set $10 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.set $8 + (i32.const 0) + ) + (local.get $7) + ) + ) + (local.set $12 + (i32.or + (local.get $17) + (i32.const 8192) + ) + ) + (local.set $7 + (i32.sub + (i32.const 0) + (local.get $10) + ) + ) + (local.set $5 + (i32.load8_s + (local.get $11) + ) + ) + (if + (i32.eqz + (local.tee $6 + (i32.lt_s + (local.get $10) + (i32.const 0) + ) + ) + ) + (local.set $12 + (local.get $17) + ) + ) + (local.set $17 + (local.get $8) + ) + (if + (local.get $6) + (local.set $10 + (local.get $7) + ) + ) + ) + (if + (i32.lt_u + (local.tee $12 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $7 + (i32.const 0) + ) + (local.set $5 + (local.get $12) + ) + (loop $label$39 + (local.set $7 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$39 + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.tee $12 + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + ) + (if + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (block + (local.set $5 + (local.get $12) + ) + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (local.get $7) + ) + ) + ) + ) + (block + (local.set $12 + (local.get $17) + ) + (local.set $17 + (local.get $10) + ) + (local.set $10 + (i32.const 0) + ) + ) + ) + ) + ) + (block $label$43 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 46) + ) + (block + (if + (i32.ne + (i32.shr_s + (i32.shl + (local.tee $5 + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 42) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.shr_s + (i32.shl + (local.get $5) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (block + (local.set $11 + (local.get $7) + ) + (local.set $7 + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.const 0) + ) + (local.set $11 + (local.get $7) + ) + (br $label$43) + ) + ) + (loop $label$48 + (local.set $5 + (i32.add + (i32.mul + (local.get $7) + (i32.const 10) + ) + (local.get $5) + ) + ) + (br_if $label$43 + (i32.ge_u + (local.tee $8 + (i32.add + (i32.load8_s + (local.tee $11 + (i32.add + (local.get $11) + (i32.const 1) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + ) + (local.set $7 + (local.get $5) + ) + (local.set $5 + (local.get $8) + ) + (br $label$48) + ) + ) + ) + (if + (i32.lt_u + (local.tee $5 + (i32.add + (i32.load8_s + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 2) + ) + ) + ) + (i32.const -48) + ) + ) + (i32.const 10) + ) + (if + (i32.eq + (i32.load8_s offset=3 + (local.get $11) + ) + (i32.const 36) + ) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $5) + (i32.const 2) + ) + ) + (i32.const 10) + ) + (local.set $5 + (i32.wrap_i64 + (i64.load + (i32.add + (local.get $3) + (i32.shl + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -48) + ) + (i32.const 3) + ) + ) + ) + ) + ) + (local.set $11 + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (br $label$43) + ) + ) + ) + (if + (local.get $17) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (if (result i32) + (local.get $30) + (block (result i32) + (local.set $5 + (i32.load + (local.tee $11 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $11) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block (result i32) + (local.set $5 + (i32.const 0) + ) + (local.get $7) + ) + ) + ) + ) + (local.set $5 + (i32.const -1) + ) + ) + ) + (local.set $7 + (local.get $11) + ) + (local.set $8 + (i32.const 0) + ) + (loop $label$55 + (if + (i32.gt_u + (local.tee $6 + (i32.add + (i32.load8_s + (local.get $7) + ) + (i32.const -65) + ) + ) + (i32.const 57) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $11 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (if + (i32.lt_u + (i32.add + (local.tee $6 + (i32.and + (local.tee $13 + (i32.load8_s + (i32.add + (i32.add + (i32.mul + (local.get $8) + (i32.const 58) + ) + (i32.const 1168) + ) + (local.get $6) + ) + ) + ) + (i32.const 255) + ) + ) + (i32.const -1) + ) + (i32.const 8) + ) + (block + (local.set $7 + (local.get $11) + ) + (local.set $8 + (local.get $6) + ) + (br $label$55) + ) + ) + ) + (if + (i32.eqz + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (local.set $14 + (i32.gt_s + (local.get $9) + (i32.const -1) + ) + ) + (block $label$59 + (block $label$60 + (if + (i32.eq + (i32.shr_s + (i32.shl + (local.get $13) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 19) + ) + (if + (local.get $14) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + (br $label$60) + ) + (block + (if + (local.get $14) + (block + (i32.store + (i32.add + (local.get $4) + (i32.shl + (local.get $9) + (i32.const 2) + ) + ) + (local.get $6) + ) + (i64.store + (local.get $16) + (i64.load + (i32.add + (local.get $3) + (i32.shl + (local.get $9) + (i32.const 3) + ) + ) + ) + ) + (br $label$60) + ) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $15 + (i32.const 0) + ) + (br $label$5) + ) + ) + (call $21 + (local.get $16) + (local.get $6) + (local.get $2) + ) + ) + ) + (br $label$59) + ) + (if + (i32.eqz + (local.get $30) + ) + (block + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + ) + (local.set $9 + (i32.and + (local.tee $7 + (i32.load8_s + (local.get $7) + ) + ) + (i32.const -33) + ) + ) + (if + (i32.eqz + (i32.and + (i32.ne + (local.get $8) + (i32.const 0) + ) + (i32.eq + (i32.and + (local.get $7) + (i32.const 15) + ) + (i32.const 3) + ) + ) + ) + (local.set $9 + (local.get $7) + ) + ) + (local.set $7 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8192) + ) + (local.set $12 + (local.get $7) + ) + ) + (block $label$70 + (block $label$71 + (block $label$72 + (block $label$73 + (block $label$74 + (block $label$75 + (block $label$76 + (block $label$77 + (block $label$78 + (block $label$79 + (block $label$80 + (block $label$81 + (block $label$82 + (block $label$83 + (block $label$84 + (block $label$85 + (block $label$86 + (block $label$87 + (block $label$88 + (block $label$89 + (br_table $label$78 $label$77 $label$80 $label$77 $label$78 $label$78 $label$78 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$79 $label$77 $label$77 $label$77 $label$77 $label$87 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$77 $label$78 $label$77 $label$83 $label$85 $label$78 $label$78 $label$78 $label$77 $label$85 $label$77 $label$77 $label$77 $label$82 $label$89 $label$86 $label$88 $label$77 $label$77 $label$81 $label$77 $label$84 $label$77 $label$77 $label$87 $label$77 + (i32.sub + (local.get $9) + (i32.const 65) + ) + ) + ) + (block $label$90 + (block $label$91 + (block $label$92 + (block $label$93 + (block $label$94 + (block $label$95 + (block $label$96 + (block $label$97 + (br_table $label$97 $label$96 $label$95 $label$94 $label$93 $label$90 $label$92 $label$91 $label$90 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (local.get $8) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store16 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store8 + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i32.store + (i32.load + (local.get $16) + ) + (local.get $15) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (i64.store + (i32.load + (local.get $16) + ) + (i64.extend_i32_s + (local.get $15) + ) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $10 + (i32.const 0) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $12 + (i32.or + (local.get $12) + (i32.const 8) + ) + ) + (if + (i32.le_u + (local.get $5) + (i32.const 8) + ) + (local.set $5 + (i32.const 8) + ) + ) + (local.set $9 + (i32.const 120) + ) + (br $label$76) + ) + (br $label$76) + ) + (if + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (local.set $7 + (local.get $21) + ) + (block + (local.set $1 + (local.get $21) + ) + (loop $label$101 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.and + (local.get $50) + (i64.const 7) + ) + (i64.const 48) + ) + ) + (br_if $label$101 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 3) + ) + ) + (i64.const 0) + ) + ) + (local.set $7 + (local.get $1) + ) + ) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 8) + ) + (block + (local.set $8 + (i32.add + (local.tee $1 + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + (i32.const 1) + ) + ) + (if + (i32.le_s + (local.get $5) + (local.get $1) + ) + (local.set $5 + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1648) + ) + (br $label$71) + ) + (block + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1648) + ) + (br $label$71) + ) + ) + ) + (if + (i64.lt_s + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block + (i64.store + (local.get $16) + (local.tee $50 + (i64.sub + (i64.const 0) + (local.get $50) + ) + ) + ) + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 1648) + ) + (br $label$75) + ) + ) + (if + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block + (local.set $6 + (i32.const 1) + ) + (local.set $8 + (i32.const 1649) + ) + (br $label$75) + ) + (block + (local.set $6 + (local.tee $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + ) + (local.set $8 + (if (result i32) + (local.get $1) + (i32.const 1650) + (i32.const 1648) + ) + ) + (br $label$75) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1648) + ) + (br $label$75) + ) + (i64.store8 + (local.get $39) + (i64.load + (local.get $16) + ) + ) + (local.set $1 + (local.get $39) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (i32.const 1) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1648) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $1 + (call $23 + (i32.load + (call $11) + ) + ) + ) + (br $label$74) + ) + (if + (i32.eqz + (local.tee $1 + (i32.load + (local.get $16) + ) + ) + ) + (local.set $1 + (i32.const 1658) + ) + ) + (br $label$74) + ) + (i64.store32 + (local.get $37) + (i64.load + (local.get $16) + ) + ) + (i32.store + (local.get $42) + (i32.const 0) + ) + (i32.store + (local.get $16) + (local.get $37) + ) + (local.set $7 + (local.get $37) + ) + (local.set $6 + (i32.const -1) + ) + (br $label$73) + ) + (local.set $7 + (i32.load + (local.get $16) + ) + ) + (if + (local.get $5) + (block + (local.set $6 + (local.get $5) + ) + (br $label$73) + ) + (block + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (i32.const 0) + (local.get $12) + ) + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (local.set $52 + (f64.load + (local.get $16) + ) + ) + (i32.store + (local.get $20) + (i32.const 0) + ) + (local.set $26 + (if (result i32) + (i64.lt_s + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + (i32.const 1665) + ) + (block (result i32) + (local.set $1 + (i32.and + (local.get $12) + (i32.const 1) + ) + ) + (if (result i32) + (i32.and + (local.get $12) + (i32.const 2048) + ) + (block (result i32) + (local.set $24 + (i32.const 1) + ) + (i32.const 1668) + ) + (block (result i32) + (local.set $24 + (local.get $1) + ) + (if (result i32) + (local.get $1) + (i32.const 1671) + (i32.const 1666) + ) + ) + ) + ) + ) + ) + (block $label$119 + (if + (i64.lt_u + (i64.and + (i64.reinterpret_f64 + (local.get $52) + ) + (i64.const 9218868437227405312) + ) + (i64.const 9218868437227405312) + ) + (block + (if + (local.tee $1 + (f64.ne + (local.tee $52 + (f64.mul + (call $26 + (local.get $52) + (local.get $20) + ) + (f64.const 2) + ) + ) + (f64.const 0) + ) + ) + (i32.store + (local.get $20) + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -1) + ) + ) + ) + (if + (i32.eq + (local.tee $22 + (i32.or + (local.get $9) + (i32.const 32) + ) + ) + (i32.const 97) + ) + (block + (local.set $1 + (i32.add + (local.get $26) + (i32.const 9) + ) + ) + (if + (local.tee $6 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $26 + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.or + (i32.gt_u + (local.get $5) + (i32.const 11) + ) + (i32.eqz + (local.tee $1 + (i32.sub + (i32.const 12) + (local.get $5) + ) + ) + ) + ) + ) + (block + (local.set $53 + (f64.const 8) + ) + (loop $label$125 + (local.set $53 + (f64.mul + (local.get $53) + (f64.const 16) + ) + ) + (br_if $label$125 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (local.set $52 + (if (result f64) + (i32.eq + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + (f64.neg + (f64.add + (local.get $53) + (f64.sub + (f64.neg + (local.get $52) + ) + (local.get $53) + ) + ) + ) + (f64.sub + (f64.add + (local.get $52) + (local.get $53) + ) + (local.get $53) + ) + ) + ) + ) + ) + (local.set $1 + (i32.sub + (i32.const 0) + (local.tee $7 + (i32.load + (local.get $20) + ) + ) + ) + ) + (if + (i32.eq + (local.tee $1 + (call $22 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $7) + (i32.const 0) + ) + (local.get $1) + (local.get $7) + ) + ) + (local.get $33) + ) + ) + (local.get $33) + ) + (block + (i32.store8 + (local.get $40) + (i32.const 48) + ) + (local.set $1 + (local.get $40) + ) + ) + ) + (local.set $13 + (i32.or + (local.get $24) + (i32.const 2) + ) + ) + (i32.store8 + (i32.add + (local.get $1) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $7) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $8 + (i32.add + (local.get $1) + (i32.const -2) + ) + ) + (i32.add + (local.get $9) + (i32.const 15) + ) + ) + (local.set $9 + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + (local.set $14 + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (local.set $1 + (local.get $19) + ) + (loop $label$131 + (i32.store8 + (local.get $1) + (i32.or + (i32.load8_u + (i32.add + (local.tee $7 + (i32.trunc_f64_s + (local.get $52) + ) + ) + (i32.const 1632) + ) + ) + (local.get $6) + ) + ) + (local.set $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_s + (local.get $7) + ) + ) + (f64.const 16) + ) + ) + (local.set $1 + (block $label$132 (result i32) + (if (result i32) + (i32.eq + (i32.sub + (local.tee $7 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.get $27) + ) + (i32.const 1) + ) + (block (result i32) + (drop + (br_if $label$132 + (local.get $7) + (i32.and + (local.get $14) + (i32.and + (local.get $9) + (f64.eq + (local.get $52) + (f64.const 0) + ) + ) + ) + ) + ) + (i32.store8 + (local.get $7) + (i32.const 46) + ) + (i32.add + (local.get $1) + (i32.const 2) + ) + ) + (local.get $7) + ) + ) + ) + (br_if $label$131 + (f64.ne + (local.get $52) + (f64.const 0) + ) + ) + ) + (local.set $6 + (i32.sub + (i32.add + (local.get $46) + (local.get $5) + ) + (local.tee $7 + (local.get $8) + ) + ) + ) + (local.set $9 + (i32.add + (i32.sub + (local.get $44) + (local.get $7) + ) + (local.get $1) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.and + (i32.ne + (local.get $5) + (i32.const 0) + ) + (i32.lt_s + (i32.add + (local.get $45) + (local.get $1) + ) + (local.get $5) + ) + ) + (local.get $6) + (local.tee $6 + (local.get $9) + ) + ) + (local.get $13) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $26) + (local.get $13) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $27) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $19) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (i32.sub + (local.get $6) + (i32.add + (local.get $1) + (local.tee $1 + (i32.sub + (local.get $28) + (local.get $7) + ) + ) + ) + ) + (i32.const 0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $8) + (local.get $1) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $5) + (local.get $10) + ) + (local.set $10 + (local.get $5) + ) + ) + (br $label$119) + ) + ) + (if + (local.get $1) + (block + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (i32.const -28) + ) + ) + ) + (local.set $52 + (f64.mul + (local.get $52) + (f64.const 268435456) + ) + ) + ) + (local.set $6 + (i32.load + (local.get $20) + ) + ) + ) + (local.set $8 + (local.tee $7 + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $47) + (local.get $48) + ) + ) + ) + (loop $label$145 + (i32.store + (local.get $8) + (local.tee $1 + (i32.trunc_f64_s + (local.get $52) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$145 + (f64.ne + (local.tee $52 + (f64.mul + (f64.sub + (local.get $52) + (f64.convert_i32_u + (local.get $1) + ) + ) + (f64.const 1e9) + ) + ) + (f64.const 0) + ) + ) + ) + (if + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $1 + (local.get $7) + ) + (loop $label$147 + (local.set $14 + (if (result i32) + (i32.gt_s + (local.get $6) + (i32.const 29) + ) + (i32.const 29) + (local.get $6) + ) + ) + (block $label$150 + (if + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (block + (local.set $50 + (i64.extend_i32_u + (local.get $14) + ) + ) + (local.set $13 + (i32.const 0) + ) + (loop $label$152 + (i64.store32 + (local.get $6) + (i64.rem_u + (local.tee $51 + (i64.add + (i64.shl + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $50) + ) + (i64.extend_i32_u + (local.get $13) + ) + ) + ) + (i64.const 1000000000) + ) + ) + (local.set $13 + (i32.wrap_i64 + (i64.div_u + (local.get $51) + (i64.const 1000000000) + ) + ) + ) + (br_if $label$152 + (i32.ge_u + (local.tee $6 + (i32.add + (local.get $6) + (i32.const -4) + ) + ) + (local.get $1) + ) + ) + ) + (br_if $label$150 + (i32.eqz + (local.get $13) + ) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (local.get $13) + ) + ) + ) + ) + (loop $label$153 + (if + (i32.gt_u + (local.get $8) + (local.get $1) + ) + (if + (i32.eqz + (i32.load + (local.tee $6 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $8 + (local.get $6) + ) + (br $label$153) + ) + ) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.sub + (i32.load + (local.get $20) + ) + (local.get $14) + ) + ) + ) + (br_if $label$147 + (i32.gt_s + (local.get $6) + (i32.const 0) + ) + ) + ) + ) + (local.set $1 + (local.get $7) + ) + ) + (local.set $18 + (if (result i32) + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (i32.const 6) + (local.get $5) + ) + ) + (if + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (block + (local.set $14 + (i32.add + (i32.div_s + (i32.add + (local.get $18) + (i32.const 25) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (local.set $25 + (i32.eq + (local.get $22) + (i32.const 102) + ) + ) + (local.set $5 + (local.get $8) + ) + (loop $label$160 + (if + (i32.gt_s + (local.tee $13 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const 9) + ) + (local.set $13 + (i32.const 9) + ) + ) + (block $label$162 + (if + (i32.lt_u + (local.get $1) + (local.get $5) + ) + (block + (local.set $29 + (i32.add + (i32.shl + (i32.const 1) + (local.get $13) + ) + (i32.const -1) + ) + ) + (local.set $35 + (i32.shr_u + (i32.const 1000000000) + (local.get $13) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (local.get $1) + ) + (loop $label$164 + (i32.store + (local.get $8) + (i32.add + (i32.shr_u + (local.tee $32 + (i32.load + (local.get $8) + ) + ) + (local.get $13) + ) + (local.get $6) + ) + ) + (local.set $6 + (i32.mul + (i32.and + (local.get $32) + (local.get $29) + ) + (local.get $35) + ) + ) + (br_if $label$164 + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (local.get $5) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + (br_if $label$162 + (i32.eqz + (local.get $6) + ) + ) + (i32.store + (local.get $5) + (local.get $6) + ) + (local.set $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + ) + (block + (local.set $8 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (if + (i32.eqz + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $6 + (i32.add + (local.tee $8 + (if (result i32) + (local.get $25) + (local.get $7) + (local.get $1) + ) + ) + (i32.shl + (local.get $14) + (i32.const 2) + ) + ) + ) + (if + (i32.gt_s + (i32.shr_s + (i32.sub + (local.get $5) + (local.get $8) + ) + (i32.const 2) + ) + (local.get $14) + ) + (local.set $5 + (local.get $6) + ) + ) + (i32.store + (local.get $20) + (local.tee $6 + (i32.add + (i32.load + (local.get $20) + ) + (local.get $13) + ) + ) + ) + (br_if $label$160 + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + ) + (local.set $13 + (local.get $5) + ) + ) + ) + (local.set $13 + (local.get $8) + ) + ) + (local.set $25 + (local.get $7) + ) + (block $label$172 + (if + (i32.lt_u + (local.get $1) + (local.get $13) + ) + (block + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$172 + (i32.lt_u + (local.tee $6 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $8 + (i32.const 10) + ) + (loop $label$174 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$174 + (i32.ge_u + (local.get $6) + (local.tee $8 + (i32.mul + (local.get $8) + (i32.const 10) + ) + ) + ) + ) + ) + ) + (local.set $5 + (i32.const 0) + ) + ) + ) + (local.set $29 + (i32.eq + (local.get $22) + (i32.const 103) + ) + ) + (local.set $35 + (i32.ne + (local.get $18) + (i32.const 0) + ) + ) + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.sub + (local.get $18) + (if (result i32) + (i32.ne + (local.get $22) + (i32.const 102) + ) + (local.get $5) + (i32.const 0) + ) + ) + (i32.shr_s + (i32.shl + (i32.and + (local.get $35) + (local.get $29) + ) + (i32.const 31) + ) + (i32.const 31) + ) + ) + ) + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $13) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (block + (if + (i32.lt_s + (local.tee $8 + (i32.add + (i32.rem_s + (local.tee $14 + (i32.add + (local.get $8) + (i32.const 9216) + ) + ) + (i32.const 9) + ) + (i32.const 1) + ) + ) + (i32.const 9) + ) + (block + (local.set $6 + (i32.const 10) + ) + (loop $label$180 + (local.set $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + (br_if $label$180 + (i32.ne + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 1) + ) + ) + (i32.const 9) + ) + ) + ) + ) + (local.set $6 + (i32.const 10) + ) + ) + (local.set $14 + (i32.rem_u + (local.tee $22 + (i32.load + (local.tee $8 + (i32.add + (i32.add + (local.get $7) + (i32.const 4) + ) + (i32.shl + (i32.add + (i32.div_s + (local.get $14) + (i32.const 9) + ) + (i32.const -1024) + ) + (i32.const 2) + ) + ) + ) + ) + ) + (local.get $6) + ) + ) + (block $label$182 + (if + (i32.eqz + (i32.and + (local.tee $32 + (i32.eq + (i32.add + (local.get $8) + (i32.const 4) + ) + (local.get $13) + ) + ) + (i32.eqz + (local.get $14) + ) + ) + ) + (block + (local.set $52 + (if (result f64) + (i32.lt_u + (local.get $14) + (local.tee $49 + (i32.div_s + (local.get $6) + (i32.const 2) + ) + ) + ) + (f64.const 0.5) + (if (result f64) + (i32.and + (local.get $32) + (i32.eq + (local.get $14) + (local.get $49) + ) + ) + (f64.const 1) + (f64.const 1.5) + ) + ) + ) + (local.set $53 + (if (result f64) + (i32.and + (i32.div_u + (local.get $22) + (local.get $6) + ) + (i32.const 1) + ) + (f64.const 9007199254740994) + (f64.const 9007199254740992) + ) + ) + (block $label$190 + (if + (local.get $24) + (block + (br_if $label$190 + (i32.ne + (i32.load8_s + (local.get $26) + ) + (i32.const 45) + ) + ) + (local.set $53 + (f64.neg + (local.get $53) + ) + ) + (local.set $52 + (f64.neg + (local.get $52) + ) + ) + ) + ) + ) + (i32.store + (local.get $8) + (local.tee $14 + (i32.sub + (local.get $22) + (local.get $14) + ) + ) + ) + (br_if $label$182 + (f64.eq + (f64.add + (local.get $53) + (local.get $52) + ) + (local.get $53) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (local.get $14) + (local.get $6) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + (loop $label$193 + (i32.store + (local.get $8) + (i32.const 0) + ) + (if + (i32.lt_u + (local.tee $8 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + (local.get $1) + ) + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -4) + ) + ) + (i32.const 0) + ) + ) + (i32.store + (local.get $8) + (local.tee $5 + (i32.add + (i32.load + (local.get $8) + ) + (i32.const 1) + ) + ) + ) + (br_if $label$193 + (i32.gt_u + (local.get $5) + (i32.const 999999999) + ) + ) + ) + ) + (local.set $5 + (i32.mul + (i32.shr_s + (i32.sub + (local.get $25) + (local.get $1) + ) + (i32.const 2) + ) + (i32.const 9) + ) + ) + (br_if $label$182 + (i32.lt_u + (local.tee $14 + (i32.load + (local.get $1) + ) + ) + (i32.const 10) + ) + ) + (local.set $6 + (i32.const 10) + ) + (loop $label$195 + (local.set $5 + (i32.add + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$195 + (i32.ge_u + (local.get $14) + (local.tee $6 + (i32.mul + (local.get $6) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + ) + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (if + (i32.le_u + (local.get $13) + (local.tee $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (block + (local.set $14 + (local.get $1) + ) + (local.set $6 + (local.get $5) + ) + (local.set $8 + (local.get $13) + ) + ) + ) + (local.set $32 + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (loop $label$198 + (block $label$199 + (if + (i32.le_u + (local.get $8) + (local.get $14) + ) + (block + (local.set $22 + (i32.const 0) + ) + (br $label$199) + ) + ) + (if + (i32.load + (local.tee $1 + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + (local.set $22 + (i32.const 1) + ) + (block + (local.set $8 + (local.get $1) + ) + (br $label$198) + ) + ) + ) + ) + (block $label$203 + (if + (local.get $29) + (block + (local.set $1 + (if (result i32) + (i32.and + (i32.gt_s + (local.tee $1 + (i32.add + (i32.xor + (i32.and + (local.get $35) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $18) + ) + ) + (local.get $6) + ) + (i32.gt_s + (local.get $6) + (i32.const -5) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.sub + (i32.add + (local.get $1) + (i32.const -1) + ) + (local.get $6) + ) + ) + (block (result i32) + (local.set $5 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + (br_if $label$203 + (local.tee $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + ) + (block $label$207 + (if + (local.get $22) + (block + (if + (i32.eqz + (local.tee $18 + (i32.load + (i32.add + (local.get $8) + (i32.const -4) + ) + ) + ) + ) + (block + (local.set $9 + (i32.const 9) + ) + (br $label$207) + ) + ) + (if + (i32.rem_u + (local.get $18) + (i32.const 10) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$207) + ) + (block + (local.set $13 + (i32.const 10) + ) + (local.set $9 + (i32.const 0) + ) + ) + ) + (loop $label$212 + (local.set $9 + (i32.add + (local.get $9) + (i32.const 1) + ) + ) + (br_if $label$212 + (i32.eqz + (i32.rem_u + (local.get $18) + (local.tee $13 + (i32.mul + (local.get $13) + (i32.const 10) + ) + ) + ) + ) + ) + ) + ) + (local.set $9 + (i32.const 9) + ) + ) + ) + (local.set $18 + (i32.add + (i32.mul + (i32.shr_s + (i32.sub + (local.get $8) + (local.get $25) + ) + (i32.const 2) + ) + (i32.const 9) + ) + (i32.const -9) + ) + ) + (if + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (local.get $18) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (if + (i32.ge_s + (local.get $1) + (if (result i32) + (i32.lt_s + (local.tee $9 + (i32.sub + (i32.add + (local.get $18) + (local.get $6) + ) + (local.get $9) + ) + ) + (i32.const 0) + ) + (local.tee $9 + (i32.const 0) + ) + (local.get $9) + ) + ) + (local.set $1 + (local.get $9) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $18) + ) + (local.set $5 + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $25 + (i32.eq + (i32.or + (local.get $5) + (i32.const 32) + ) + (i32.const 102) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (if + (i32.le_s + (local.get $6) + (i32.const 0) + ) + (local.set $6 + (i32.const 0) + ) + ) + ) + (block + (if + (i32.lt_s + (i32.sub + (local.get $28) + (local.tee $9 + (call $22 + (i64.extend_i32_s + (if (result i32) + (i32.lt_s + (local.get $6) + (i32.const 0) + ) + (local.get $32) + (local.get $6) + ) + ) + (local.get $33) + ) + ) + ) + (i32.const 2) + ) + (loop $label$229 + (i32.store8 + (local.tee $9 + (i32.add + (local.get $9) + (i32.const -1) + ) + ) + (i32.const 48) + ) + (br_if $label$229 + (i32.lt_s + (i32.sub + (local.get $28) + (local.get $9) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.store8 + (i32.add + (local.get $9) + (i32.const -1) + ) + (i32.add + (i32.and + (i32.shr_s + (local.get $6) + (i32.const 31) + ) + (i32.const 2) + ) + (i32.const 43) + ) + ) + (i32.store8 + (local.tee $6 + (i32.add + (local.get $9) + (i32.const -2) + ) + ) + (local.get $5) + ) + (local.set $9 + (local.get $6) + ) + (local.set $6 + (i32.sub + (local.get $28) + (local.get $6) + ) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $18 + (i32.add + (i32.add + (i32.add + (i32.add + (local.get $24) + (i32.const 1) + ) + (local.get $1) + ) + (i32.ne + (local.tee $29 + (i32.or + (local.get $1) + (local.get $13) + ) + ) + (i32.const 0) + ) + ) + (local.get $6) + ) + ) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (block $label$231 + (if + (local.get $25) + (block + (local.set $6 + (local.tee $9 + (if (result i32) + (i32.gt_u + (local.get $14) + (local.get $7) + ) + (local.get $7) + (local.get $14) + ) + ) + ) + (loop $label$235 + (local.set $5 + (call $22 + (i64.extend_i32_u + (i32.load + (local.get $6) + ) + ) + (local.get $31) + ) + ) + (block $label$236 + (if + (i32.eq + (local.get $6) + (local.get $9) + ) + (block + (br_if $label$236 + (i32.ne + (local.get $5) + (local.get $31) + ) + ) + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $5 + (local.get $34) + ) + ) + (block + (br_if $label$236 + (i32.le_u + (local.get $5) + (local.get $19) + ) + ) + (drop + (call $35 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $5) + (local.get $27) + ) + ) + ) + (loop $label$239 + (br_if $label$239 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $5) + (i32.sub + (local.get $41) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (if + (i32.le_u + (local.tee $5 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + (local.get $7) + ) + (block + (local.set $6 + (local.get $5) + ) + (br $label$235) + ) + ) + ) + (block $label$242 + (if + (local.get $29) + (block + (br_if $label$242 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (i32.const 1700) + (i32.const 1) + (local.get $0) + ) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 0) + ) + (i32.lt_u + (local.get $5) + (local.get $8) + ) + ) + (loop $label$245 + (if + (i32.gt_u + (local.tee $7 + (call $22 + (i64.extend_i32_u + (i32.load + (local.get $5) + ) + ) + (local.get $31) + ) + ) + (local.get $19) + ) + (block + (drop + (call $35 + (local.get $19) + (i32.const 48) + (i32.sub + (local.get $7) + (local.get $27) + ) + ) + ) + (loop $label$247 + (br_if $label$247 + (i32.gt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $7) + (if (result i32) + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (local.get $1) + ) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $1) + (i32.const -9) + ) + ) + (if + (i32.and + (i32.gt_s + (local.get $1) + (i32.const 9) + ) + (i32.lt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (local.get $8) + ) + ) + (block + (local.set $1 + (local.get $7) + ) + (br $label$245) + ) + (local.set $1 + (local.get $7) + ) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 9) + ) + (i32.const 9) + (i32.const 0) + ) + ) + (block + (local.set $5 + (i32.add + (local.get $14) + (i32.const 4) + ) + ) + (if + (i32.eqz + (local.get $22) + ) + (local.set $8 + (local.get $5) + ) + ) + (if + (i32.gt_s + (local.get $1) + (i32.const -1) + ) + (block + (local.set $13 + (i32.eqz + (local.get $13) + ) + ) + (local.set $7 + (local.get $14) + ) + (local.set $5 + (local.get $1) + ) + (loop $label$256 + (if + (i32.eq + (local.tee $1 + (call $22 + (i64.extend_i32_u + (i32.load + (local.get $7) + ) + ) + (local.get $31) + ) + ) + (local.get $31) + ) + (block + (i32.store8 + (local.get $34) + (i32.const 48) + ) + (local.set $1 + (local.get $34) + ) + ) + ) + (block $label$258 + (if + (i32.eq + (local.get $7) + (local.get $14) + ) + (block + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $1) + (i32.const 1) + (local.get $0) + ) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$258 + (i32.and + (local.get $13) + (i32.lt_s + (local.get $5) + (i32.const 1) + ) + ) + ) + (br_if $label$258 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (i32.const 1700) + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (br_if $label$258 + (i32.le_u + (local.get $1) + (local.get $19) + ) + ) + (drop + (call $35 + (local.get $19) + (i32.const 48) + (i32.add + (local.get $1) + (local.get $43) + ) + ) + ) + (loop $label$262 + (br_if $label$262 + (i32.gt_u + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (local.get $19) + ) + ) + ) + ) + ) + ) + (local.set $6 + (i32.sub + (local.get $41) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $1) + (if (result i32) + (i32.gt_s + (local.get $5) + (local.get $6) + ) + (local.get $6) + (local.get $5) + ) + (local.get $0) + ) + ) + ) + (br_if $label$256 + (i32.and + (i32.lt_u + (local.tee $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (local.get $8) + ) + (i32.gt_s + (local.tee $5 + (i32.sub + (local.get $5) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + ) + (local.set $1 + (local.get $5) + ) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (i32.add + (local.get $1) + (i32.const 18) + ) + (i32.const 18) + (i32.const 0) + ) + (br_if $label$231 + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $9) + (i32.sub + (local.get $28) + (local.get $9) + ) + (local.get $0) + ) + ) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $18) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $18) + (local.get $10) + ) + (local.set $10 + (local.get $18) + ) + ) + ) + (block + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.tee $8 + (i32.add + (if (result i32) + (local.tee $6 + (i32.or + (f64.ne + (local.get $52) + (local.get $52) + ) + (i32.const 0) + ) + ) + (local.tee $24 + (i32.const 0) + ) + (local.get $24) + ) + (i32.const 3) + ) + ) + (local.get $7) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + (block + (drop + (call $20 + (local.get $26) + (local.get $24) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $7 + (if (result i32) + (local.tee $5 + (i32.ne + (i32.and + (local.get $9) + (i32.const 32) + ) + (i32.const 0) + ) + ) + (i32.const 1684) + (i32.const 1688) + ) + ) + (local.set $5 + (if (result i32) + (local.get $5) + (i32.const 1692) + (i32.const 1696) + ) + ) + (if + (i32.eqz + (local.get $6) + ) + (local.set $5 + (local.get $7) + ) + ) + (if + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $5) + (i32.const 3) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $8) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.ge_s + (local.get $8) + (local.get $10) + ) + (local.set $10 + (local.get $8) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $7 + (local.get $5) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1648) + ) + (local.set $5 + (local.get $21) + ) + (br $label$70) + ) + (local.set $7 + (i32.and + (local.get $9) + (i32.const 32) + ) + ) + (local.set $7 + (if (result i32) + (i64.eq + (local.tee $50 + (i64.load + (local.get $16) + ) + ) + (i64.const 0) + ) + (block (result i32) + (local.set $50 + (i64.const 0) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (loop $label$280 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.load8_u + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $50) + ) + (i32.const 15) + ) + (i32.const 1632) + ) + ) + (local.get $7) + ) + ) + (br_if $label$280 + (i64.ne + (local.tee $50 + (i64.shr_u + (local.get $50) + (i64.const 4) + ) + ) + (i64.const 0) + ) + ) + ) + (local.set $50 + (i64.load + (local.get $16) + ) + ) + (local.get $1) + ) + ) + ) + (local.set $8 + (i32.add + (i32.shr_s + (local.get $9) + (i32.const 4) + ) + (i32.const 1648) + ) + ) + (if + (local.tee $1 + (i32.or + (i32.eqz + (i32.and + (local.get $12) + (i32.const 8) + ) + ) + (i64.eq + (local.get $50) + (i64.const 0) + ) + ) + ) + (local.set $8 + (i32.const 1648) + ) + ) + (local.set $6 + (if (result i32) + (local.get $1) + (i32.const 0) + (i32.const 2) + ) + ) + (br $label$71) + ) + (local.set $7 + (call $22 + (local.get $50) + (local.get $21) + ) + ) + (br $label$71) + ) + (local.set $14 + (i32.eqz + (local.tee $13 + (call $16 + (local.get $1) + (i32.const 0) + (local.get $5) + ) + ) + ) + ) + (local.set $8 + (i32.sub + (local.get $13) + (local.get $1) + ) + ) + (local.set $9 + (i32.add + (local.get $1) + (local.get $5) + ) + ) + (local.set $12 + (local.get $7) + ) + (local.set $7 + (if (result i32) + (local.get $14) + (local.get $5) + (local.get $8) + ) + ) + (local.set $6 + (i32.const 0) + ) + (local.set $8 + (i32.const 1648) + ) + (local.set $5 + (if (result i32) + (local.get $14) + (local.get $9) + (local.get $13) + ) + ) + (br $label$70) + ) + (local.set $1 + (i32.const 0) + ) + (local.set $5 + (i32.const 0) + ) + (local.set $8 + (local.get $7) + ) + (loop $label$288 + (block $label$289 + (br_if $label$289 + (i32.eqz + (local.tee $9 + (i32.load + (local.get $8) + ) + ) + ) + ) + (br_if $label$289 + (i32.or + (i32.lt_s + (local.tee $5 + (call $25 + (local.get $36) + (local.get $9) + ) + ) + (i32.const 0) + ) + (i32.gt_u + (local.get $5) + (i32.sub + (local.get $6) + (local.get $1) + ) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$288 + (i32.gt_u + (local.get $6) + (local.tee $1 + (i32.add + (local.get $5) + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_s + (local.get $5) + (i32.const 0) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$5) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (local.get $12) + ) + (if + (local.get $1) + (block + (local.set $5 + (i32.const 0) + ) + (loop $label$292 + (br_if $label$72 + (i32.eqz + (local.tee $8 + (i32.load + (local.get $7) + ) + ) + ) + ) + (br_if $label$72 + (i32.gt_s + (local.tee $5 + (i32.add + (local.tee $8 + (call $25 + (local.get $36) + (local.get $8) + ) + ) + (local.get $5) + ) + ) + (local.get $1) + ) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $36) + (local.get $8) + (local.get $0) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (br_if $label$292 + (i32.lt_u + (local.get $5) + (local.get $1) + ) + ) + (br $label$72) + ) + ) + (block + (local.set $1 + (i32.const 0) + ) + (br $label$72) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $1) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (if + (i32.le_s + (local.get $10) + (local.get $1) + ) + (local.set $10 + (local.get $1) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + (local.set $1 + (i32.and + (local.get $12) + (i32.const -65537) + ) + ) + (if + (i32.gt_s + (local.get $5) + (i32.const -1) + ) + (local.set $12 + (local.get $1) + ) + ) + (local.set $5 + (if (result i32) + (i32.or + (local.get $5) + (local.tee $9 + (i64.ne + (i64.load + (local.get $16) + ) + (i64.const 0) + ) + ) + ) + (block (result i32) + (local.set $1 + (local.get $7) + ) + (if + (i32.gt_s + (local.get $5) + (local.tee $7 + (i32.add + (i32.xor + (i32.and + (local.get $9) + (i32.const 1) + ) + (i32.const 1) + ) + (i32.sub + (local.get $38) + (local.get $7) + ) + ) + ) + ) + (local.set $7 + (local.get $5) + ) + ) + (local.get $21) + ) + (block (result i32) + (local.set $1 + (local.get $21) + ) + (local.set $7 + (i32.const 0) + ) + (local.get $21) + ) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (if (result i32) + (i32.lt_s + (local.get $10) + (local.tee $5 + (i32.add + (if (result i32) + (i32.lt_s + (local.get $7) + (local.tee $9 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (local.tee $7 + (local.get $9) + ) + (local.get $7) + ) + (local.get $6) + ) + ) + ) + (local.tee $10 + (local.get $5) + ) + (local.get $10) + ) + (local.get $5) + (local.get $12) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $8) + (local.get $6) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 65536) + ) + ) + (call $24 + (local.get $0) + (i32.const 48) + (local.get $7) + (local.get $9) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (i32.load + (local.get $0) + ) + (i32.const 32) + ) + ) + (drop + (call $20 + (local.get $1) + (local.get $9) + (local.get $0) + ) + ) + ) + (call $24 + (local.get $0) + (i32.const 32) + (local.get $10) + (local.get $5) + (i32.xor + (local.get $12) + (i32.const 8192) + ) + ) + (local.set $1 + (local.get $11) + ) + (br $label$4) + ) + ) + (br $label$2) + ) + (if + (i32.eqz + (local.get $0) + ) + (if + (local.get $17) + (block + (local.set $0 + (i32.const 1) + ) + (loop $label$308 + (if + (local.tee $1 + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + ) + (block + (call $21 + (i32.add + (local.get $3) + (i32.shl + (local.get $0) + (i32.const 3) + ) + ) + (local.get $1) + (local.get $2) + ) + (br_if $label$308 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + (br $label$2) + ) + ) + ) + (loop $label$310 + (if + (i32.load + (i32.add + (local.get $4) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + (block + (local.set $15 + (i32.const -1) + ) + (br $label$2) + ) + ) + (br_if $label$310 + (i32.lt_s + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 10) + ) + ) + (local.set $15 + (i32.const 1) + ) + ) + ) + (local.set $15 + (i32.const 0) + ) + ) + ) + ) + (global.set $global$1 + (local.get $23) + ) + (local.get $15) + ) + ) + (func $19 (; 32 ;) (type $1) (param $0 i32) (result i32) + (i32.const 0) + ) + (func $20 (; 33 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (block $label$1 (result i32) + (block $label$2 + (block $label$3 + (br_if $label$3 + (local.tee $3 + (i32.load + (local.tee $4 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (if + (call $29 + (local.get $2) + ) + (local.set $3 + (i32.const 0) + ) + (block + (local.set $3 + (i32.load + (local.get $4) + ) + ) + (br $label$3) + ) + ) + (br $label$2) + ) + (if + (i32.lt_u + (i32.sub + (local.get $3) + (local.tee $4 + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (local.get $1) + ) + (block + (local.set $3 + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $1) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (br $label$2) + ) + ) + (local.set $2 + (block $label$7 (result i32) + (if (result i32) + (i32.gt_s + (i32.load8_s offset=75 + (local.get $2) + ) + (i32.const -1) + ) + (block (result i32) + (local.set $3 + (local.get $1) + ) + (loop $label$9 + (drop + (br_if $label$7 + (i32.const 0) + (i32.eqz + (local.get $3) + ) + ) + ) + (if + (i32.ne + (i32.load8_s + (i32.add + (local.get $0) + (local.tee $6 + (i32.add + (local.get $3) + (i32.const -1) + ) + ) + ) + ) + (i32.const 10) + ) + (block + (local.set $3 + (local.get $6) + ) + (br $label$9) + ) + ) + ) + (br_if $label$2 + (i32.lt_u + (call_indirect (type $0) + (local.get $2) + (local.get $0) + (local.get $3) + (i32.add + (i32.and + (i32.load offset=36 + (local.get $2) + ) + (i32.const 3) + ) + (i32.const 2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.load + (local.get $5) + ) + ) + (local.set $1 + (i32.sub + (local.get $1) + (local.get $3) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (local.get $3) + ) + ) + (local.get $3) + ) + (i32.const 0) + ) + ) + ) + (drop + (call $36 + (local.get $4) + (local.get $0) + (local.get $1) + ) + ) + (i32.store + (local.get $5) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $1) + ) + ) + (local.set $3 + (i32.add + (local.get $2) + (local.get $1) + ) + ) + ) + (local.get $3) + ) + ) + (func $21 (; 34 ;) (type $8) (param $0 i32) (param $1 i32) (param $2 i32) + (local $3 i32) + (local $4 i64) + (local $5 f64) + (block $label$1 + (if + (i32.le_u + (local.get $1) + (i32.const 20) + ) + (block $label$3 + (block $label$4 + (block $label$5 + (block $label$6 + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (block $label$12 + (block $label$13 + (br_table $label$13 $label$12 $label$11 $label$10 $label$9 $label$8 $label$7 $label$6 $label$5 $label$4 $label$3 + (i32.sub + (local.get $1) + (i32.const 9) + ) + ) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (local.get $3) + ) + ) + (br $label$1) + ) + (local.set $4 + (i64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (i64.store + (local.get $0) + (local.get $4) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 65535) + ) + (i32.const 16) + ) + (i32.const 16) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 65535) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_s + (i32.shr_s + (i32.shl + (i32.and + (local.get $3) + (i32.const 255) + ) + (i32.const 24) + ) + (i32.const 24) + ) + ) + ) + (br $label$1) + ) + (local.set $3 + (i32.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 3) + ) + (i32.const -4) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i64.store + (local.get $0) + (i64.extend_i32_u + (i32.and + (local.get $3) + (i32.const 255) + ) + ) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + (br $label$1) + ) + (local.set $5 + (f64.load + (local.tee $1 + (i32.and + (i32.add + (i32.load + (local.get $2) + ) + (i32.const 7) + ) + (i32.const -8) + ) + ) + ) + ) + (i32.store + (local.get $2) + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + (f64.store + (local.get $0) + (local.get $5) + ) + ) + ) + ) + ) + (func $22 (; 35 ;) (type $9) (param $0 i64) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i64) + (block $label$1 (result i32) + (local.set $2 + (i32.wrap_i64 + (local.get $0) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 4294967295) + ) + (block + (loop $label$3 + (i64.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i64.or + (i64.rem_u + (local.get $0) + (i64.const 10) + ) + (i64.const 48) + ) + ) + (local.set $4 + (i64.div_u + (local.get $0) + (i64.const 10) + ) + ) + (if + (i64.gt_u + (local.get $0) + (i64.const 42949672959) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$3) + ) + ) + ) + (local.set $2 + (i32.wrap_i64 + (local.get $4) + ) + ) + ) + ) + (if + (local.get $2) + (loop $label$6 + (i32.store8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + (i32.or + (i32.rem_u + (local.get $2) + (i32.const 10) + ) + (i32.const 48) + ) + ) + (local.set $3 + (i32.div_u + (local.get $2) + (i32.const 10) + ) + ) + (if + (i32.ge_u + (local.get $2) + (i32.const 10) + ) + (block + (local.set $2 + (local.get $3) + ) + (br $label$6) + ) + ) + ) + ) + (local.get $1) + ) + ) + (func $23 (; 36 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.const 0) + ) + (block $label$2 + (block $label$3 + (block $label$4 + (loop $label$5 + (br_if $label$4 + (i32.eq + (i32.load8_u + (i32.add + (local.get $1) + (i32.const 1702) + ) + ) + (local.get $0) + ) + ) + (br_if $label$5 + (i32.ne + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (i32.const 87) + ) + ) + (local.set $1 + (i32.const 87) + ) + (local.set $0 + (i32.const 1790) + ) + (br $label$3) + ) + ) + (if + (local.get $1) + (block + (local.set $0 + (i32.const 1790) + ) + (br $label$3) + ) + (local.set $0 + (i32.const 1790) + ) + ) + (br $label$2) + ) + (loop $label$8 + (local.set $2 + (local.get $0) + ) + (loop $label$9 + (local.set $0 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (if + (i32.load8_s + (local.get $2) + ) + (block + (local.set $2 + (local.get $0) + ) + (br $label$9) + ) + ) + ) + (br_if $label$8 + (local.tee $1 + (i32.add + (local.get $1) + (i32.const -1) + ) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $24 (; 37 ;) (type $10) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (param $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (block $label$1 + (local.set $7 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 256) + ) + ) + (local.set $6 + (local.get $7) + ) + (block $label$2 + (if + (i32.and + (i32.gt_s + (local.get $2) + (local.get $3) + ) + (i32.eqz + (i32.and + (local.get $4) + (i32.const 73728) + ) + ) + ) + (block + (drop + (call $35 + (local.get $6) + (local.get $1) + (if (result i32) + (i32.gt_u + (local.tee $5 + (i32.sub + (local.get $2) + (local.get $3) + ) + ) + (i32.const 256) + ) + (i32.const 256) + (local.get $5) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 32) + ) + ) + ) + (if + (i32.gt_u + (local.get $5) + (i32.const 255) + ) + (block + (loop $label$7 + (if + (local.get $4) + (block + (drop + (call $20 + (local.get $6) + (i32.const 256) + (local.get $0) + ) + ) + (local.set $1 + (i32.load + (local.get $0) + ) + ) + ) + ) + (local.set $4 + (i32.eqz + (i32.and + (local.get $1) + (i32.const 32) + ) + ) + ) + (br_if $label$7 + (i32.gt_u + (local.tee $5 + (i32.add + (local.get $5) + (i32.const -256) + ) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + (local.set $5 + (i32.and + (i32.sub + (local.get $2) + (local.get $3) + ) + (i32.const 255) + ) + ) + ) + (br_if $label$2 + (i32.eqz + (local.get $4) + ) + ) + ) + (drop + (call $20 + (local.get $6) + (local.get $5) + (local.get $0) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $7) + ) + ) + ) + (func $25 (; 38 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (if (result i32) + (local.get $0) + (call $28 + (local.get $0) + (local.get $1) + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $26 (; 39 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (call $27 + (local.get $0) + (local.get $1) + ) + ) + (func $27 (; 40 ;) (type $11) (param $0 f64) (param $1 i32) (result f64) + (local $2 i64) + (local $3 i64) + (block $label$1 (result f64) + (block $label$2 + (block $label$3 + (block $label$4 + (block $label$5 + (br_table $label$5 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$3 $label$4 $label$3 + (i32.sub + (i32.shr_s + (i32.shl + (i32.and + (i32.and + (i32.wrap_i64 + (local.tee $3 + (i64.shr_u + (local.tee $2 + (i64.reinterpret_f64 + (local.get $0) + ) + ) + (i64.const 52) + ) + ) + ) + (i32.const 65535) + ) + (i32.const 2047) + ) + (i32.const 16) + ) + (i32.const 16) + ) + (i32.const 0) + ) + ) + ) + (i32.store + (local.get $1) + (if (result i32) + (f64.ne + (local.get $0) + (f64.const 0) + ) + (block (result i32) + (local.set $0 + (call $27 + (f64.mul + (local.get $0) + (f64.const 18446744073709551615) + ) + (local.get $1) + ) + ) + (i32.add + (i32.load + (local.get $1) + ) + (i32.const -64) + ) + ) + (i32.const 0) + ) + ) + (br $label$2) + ) + (br $label$2) + ) + (i32.store + (local.get $1) + (i32.add + (i32.and + (i32.wrap_i64 + (local.get $3) + ) + (i32.const 2047) + ) + (i32.const -1022) + ) + ) + (local.set $0 + (f64.reinterpret_i64 + (i64.or + (i64.and + (local.get $2) + (i64.const -9218868437227405313) + ) + (i64.const 4602678819172646912) + ) + ) + ) + ) + (local.get $0) + ) + ) + (func $28 (; 41 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (if (result i32) + (local.get $0) + (block (result i32) + (if + (i32.lt_u + (local.get $1) + (i32.const 128) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (br $label$1 + (i32.const 1) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.const 2048) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 192) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 2) + ) + ) + ) + (if + (i32.or + (i32.lt_u + (local.get $1) + (i32.const 55296) + ) + (i32.eq + (i32.and + (local.get $1) + (i32.const -8192) + ) + (i32.const 57344) + ) + ) + (block + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 224) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (br $label$1 + (i32.const 3) + ) + ) + ) + (if (result i32) + (i32.lt_u + (i32.add + (local.get $1) + (i32.const -65536) + ) + (i32.const 1048576) + ) + (block (result i32) + (i32.store8 + (local.get $0) + (i32.or + (i32.shr_u + (local.get $1) + (i32.const 18) + ) + (i32.const 240) + ) + ) + (i32.store8 offset=1 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 12) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=2 + (local.get $0) + (i32.or + (i32.and + (i32.shr_u + (local.get $1) + (i32.const 6) + ) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.store8 offset=3 + (local.get $0) + (i32.or + (i32.and + (local.get $1) + (i32.const 63) + ) + (i32.const 128) + ) + ) + (i32.const 4) + ) + (block (result i32) + (i32.store + (call $11) + (i32.const 84) + ) + (i32.const -1) + ) + ) + ) + (i32.const 1) + ) + ) + ) + (func $29 (; 42 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.load8_s + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 74) + ) + ) + ) + ) + (i32.store8 + (local.get $2) + (i32.or + (i32.add + (local.get $1) + (i32.const 255) + ) + (local.get $1) + ) + ) + (local.tee $0 + (if (result i32) + (i32.and + (local.tee $1 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + (block (result i32) + (i32.store + (local.get $0) + (i32.or + (local.get $1) + (i32.const 32) + ) + ) + (i32.const -1) + ) + (block (result i32) + (i32.store offset=8 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $0) + (i32.const 0) + ) + (i32.store offset=28 + (local.get $0) + (local.tee $1 + (i32.load offset=44 + (local.get $0) + ) + ) + ) + (i32.store offset=20 + (local.get $0) + (local.get $1) + ) + (i32.store offset=16 + (local.get $0) + (i32.add + (local.get $1) + (i32.load offset=48 + (local.get $0) + ) + ) + ) + (i32.const 0) + ) + ) + ) + ) + ) + (func $30 (; 43 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (block $label$1 (result i32) + (local.set $2 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (i32.store + (local.tee $3 + (local.get $2) + ) + (local.get $1) + ) + (local.set $0 + (call $17 + (i32.load + (i32.const 1024) + ) + (local.get $0) + (local.get $3) + ) + ) + (global.set $global$1 + (local.get $2) + ) + (local.get $0) + ) + ) + (func $31 (; 44 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (block $label$1 (result i32) + (local.set $14 + (global.get $global$1) + ) + (global.set $global$1 + (i32.add + (global.get $global$1) + (i32.const 16) + ) + ) + (local.set $18 + (local.get $14) + ) + (block $label$2 + (if + (i32.lt_u + (local.get $0) + (i32.const 245) + ) + (block + (local.set $3 + (i32.and + (i32.add + (local.get $0) + (i32.const 11) + ) + (i32.const -8) + ) + ) + (if + (i32.and + (local.tee $0 + (i32.shr_u + (local.tee $8 + (i32.load + (i32.const 3644) + ) + ) + (local.tee $2 + (i32.shr_u + (if (result i32) + (i32.lt_u + (local.get $0) + (i32.const 11) + ) + (local.tee $3 + (i32.const 16) + ) + (local.get $3) + ) + (i32.const 3) + ) + ) + ) + ) + (i32.const 3) + ) + (block + (local.set $4 + (i32.load + (local.tee $1 + (i32.add + (local.tee $7 + (i32.load + (local.tee $3 + (i32.add + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $5 + (i32.add + (i32.xor + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 1) + ) + (local.get $2) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $4) + ) + (i32.store + (i32.const 3644) + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $5) + ) + (i32.const -1) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 12) + ) + ) + ) + (local.get $7) + ) + (block + (i32.store + (local.get $0) + (local.get $2) + ) + (i32.store + (local.get $3) + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=4 + (local.get $7) + (i32.or + (local.tee $0 + (i32.shl + (local.get $5) + (i32.const 3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $7) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $1) + ) + ) + ) + (if + (i32.gt_u + (local.get $3) + (local.tee $16 + (i32.load + (i32.const 3652) + ) + ) + ) + (block + (if + (local.get $0) + (block + (local.set $5 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.tee $0 + (i32.and + (i32.shl + (local.get $0) + (local.get $2) + ) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $2) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $12 + (i32.load + (local.tee $5 + (i32.add + (local.tee $9 + (i32.load + (local.tee $2 + (i32.add + (local.tee $4 + (i32.add + (i32.shl + (i32.shl + (local.tee $11 + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $0) + (local.get $5) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $2 + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.eq + (local.get $4) + (local.get $12) + ) + (i32.store + (i32.const 3644) + (local.tee $7 + (i32.and + (local.get $8) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $11) + ) + (i32.const -1) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $12) + (i32.const 12) + ) + ) + ) + (local.get $9) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store + (local.get $2) + (local.get $12) + ) + (local.set $7 + (local.get $8) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=4 + (local.get $9) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.tee $4 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.or + (local.tee $11 + (i32.sub + (i32.shl + (local.get $11) + (i32.const 3) + ) + (local.get $3) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $4) + (local.get $11) + ) + (local.get $11) + ) + (if + (local.get $16) + (block + (local.set $9 + (i32.load + (i32.const 3664) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (if + (i32.and + (local.get $7) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (local.set $6 + (local.get $3) + ) + (local.set $1 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3644) + (i32.or + (local.get $7) + (local.get $0) + ) + ) + (local.set $6 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $1 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $1) + (local.get $9) + ) + (i32.store offset=8 + (local.get $9) + (local.get $1) + ) + (i32.store offset=12 + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3652) + (local.get $11) + ) + (i32.store + (i32.const 3664) + (local.get $4) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (local.get $5) + ) + ) + ) + (if + (local.tee $6 + (i32.load + (i32.const 3648) + ) + ) + (block + (local.set $2 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $6) + (i32.sub + (i32.const 0) + (local.get $6) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (local.set $9 + (i32.sub + (i32.and + (i32.load offset=4 + (local.tee $2 + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $0) + (local.get $2) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $2) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $1 + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $1) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $1 + (local.get $2) + ) + (loop $label$25 + (block $label$26 + (if + (i32.eqz + (local.tee $0 + (i32.load offset=16 + (local.get $1) + ) + ) + ) + (br_if $label$26 + (i32.eqz + (local.tee $0 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + ) + ) + (if + (local.tee $7 + (i32.lt_u + (local.tee $1 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.get $9) + ) + ) + (local.set $9 + (local.get $1) + ) + ) + (local.set $1 + (local.get $0) + ) + (if + (local.get $7) + (local.set $2 + (local.get $0) + ) + ) + (br $label$25) + ) + ) + (if + (i32.lt_u + (local.get $2) + (local.tee $12 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.ge_u + (local.get $2) + (local.tee $13 + (i32.add + (local.get $2) + (local.get $3) + ) + ) + ) + (call $fimport$10) + ) + (local.set $15 + (i32.load offset=24 + (local.get $2) + ) + ) + (block $label$32 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $2) + ) + ) + (local.get $2) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (br $label$32) + ) + ) + ) + (loop $label$36 + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + (if + (local.tee $7 + (i32.load + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (local.set $1 + (local.get $11) + ) + (br $label$36) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $4 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $11 + (i32.load offset=8 + (local.get $2) + ) + ) + (local.get $12) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $7 + (i32.add + (local.get $11) + (i32.const 12) + ) + ) + ) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $2) + ) + (block + (i32.store + (local.get $7) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $11) + ) + (local.set $4 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (block $label$46 + (if + (local.get $15) + (block + (if + (i32.eq + (local.get $2) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $2) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $4) + ) + (if + (i32.eqz + (local.get $4) + ) + (block + (i32.store + (i32.const 3648) + (i32.and + (local.get $6) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$46) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $15) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $15) + (i32.const 16) + ) + ) + ) + (local.get $2) + ) + (i32.store + (local.get $0) + (local.get $4) + ) + (i32.store offset=20 + (local.get $15) + (local.get $4) + ) + ) + (br_if $label$46 + (i32.eqz + (local.get $4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.tee $0 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $4) + (local.get $15) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $4) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $4) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $2) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $4) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $4) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.tee $0 + (i32.add + (local.get $9) + (local.get $3) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $3) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $9) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $13) + (local.get $9) + ) + (local.get $9) + ) + (if + (local.get $16) + (block + (local.set $7 + (i32.load + (i32.const 3664) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.tee $0 + (i32.shr_u + (local.get $16) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (if + (i32.and + (local.get $8) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (local.set $10 + (local.get $1) + ) + (local.set $5 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3644) + (i32.or + (local.get $8) + (local.get $0) + ) + ) + (local.set $10 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $5 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $10) + (local.get $7) + ) + (i32.store offset=12 + (local.get $5) + (local.get $7) + ) + (i32.store offset=8 + (local.get $7) + (local.get $5) + ) + (i32.store offset=12 + (local.get $7) + (local.get $3) + ) + ) + ) + (i32.store + (i32.const 3652) + (local.get $9) + ) + (i32.store + (i32.const 3664) + (local.get $13) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (local.set $0 + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $0) + (i32.const -65) + ) + (local.set $0 + (i32.const -1) + ) + (block + (local.set $7 + (i32.and + (local.tee $0 + (i32.add + (local.get $0) + (i32.const 11) + ) + ) + (i32.const -8) + ) + ) + (if + (local.tee $5 + (i32.load + (i32.const 3648) + ) + ) + (block + (local.set $17 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $0) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (local.set $3 + (i32.sub + (i32.const 0) + (local.get $7) + ) + ) + (block $label$78 + (block $label$79 + (block $label$80 + (if + (local.tee $1 + (i32.load + (i32.add + (i32.shl + (local.get $17) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + (block + (local.set $0 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $17) + (i32.const 1) + ) + ) + ) + (local.set $4 + (i32.const 0) + ) + (local.set $10 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $17) + (i32.const 31) + ) + (i32.const 0) + (local.get $0) + ) + ) + ) + (local.set $0 + (i32.const 0) + ) + (loop $label$84 + (if + (i32.lt_u + (local.tee $6 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + (if + (local.get $6) + (block + (local.set $3 + (local.get $6) + ) + (local.set $0 + (local.get $1) + ) + ) + (block + (local.set $3 + (i32.const 0) + ) + (local.set $0 + (local.get $1) + ) + (br $label$79) + ) + ) + ) + (local.set $1 + (if (result i32) + (i32.or + (i32.eqz + (local.tee $19 + (i32.load offset=20 + (local.get $1) + ) + ) + ) + (i32.eq + (local.get $19) + (local.tee $6 + (i32.load + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $10) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + (local.get $4) + (local.get $19) + ) + ) + (local.set $10 + (i32.shl + (local.get $10) + (i32.xor + (i32.and + (local.tee $4 + (i32.eqz + (local.get $6) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (if + (local.get $4) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $0) + ) + (br $label$80) + ) + (block + (local.set $4 + (local.get $1) + ) + (local.set $1 + (local.get $6) + ) + (br $label$84) + ) + ) + ) + ) + (block + (local.set $4 + (i32.const 0) + ) + (local.set $1 + (i32.const 0) + ) + ) + ) + ) + (br_if $label$79 + (local.tee $0 + (if (result i32) + (i32.and + (i32.eqz + (local.get $4) + ) + (i32.eqz + (local.get $1) + ) + ) + (block (result i32) + (if + (i32.eqz + (local.tee $0 + (i32.and + (local.get $5) + (i32.or + (local.tee $0 + (i32.shl + (i32.const 2) + (local.get $17) + ) + ) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $7) + ) + (br $label$2) + ) + ) + (local.set $10 + (i32.and + (i32.shr_u + (local.tee $0 + (i32.add + (i32.and + (local.get $0) + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (i32.const -1) + ) + ) + (i32.const 12) + ) + (i32.const 16) + ) + ) + (i32.load + (i32.add + (i32.shl + (i32.add + (i32.or + (i32.or + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $0) + (local.get $10) + ) + ) + (i32.const 5) + ) + (i32.const 8) + ) + ) + (local.get $10) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 4) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 2) + ) + ) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (local.tee $4 + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 1) + ) + (i32.const 1) + ) + ) + ) + (i32.shr_u + (local.get $4) + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + (local.get $4) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + (br $label$78) + ) + (loop $label$96 + (if + (local.tee $10 + (i32.lt_u + (local.tee $4 + (i32.sub + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.get $3) + ) + ) + (local.set $3 + (local.get $4) + ) + ) + (if + (local.get $10) + (local.set $1 + (local.get $0) + ) + ) + (if + (local.tee $4 + (i32.load offset=16 + (local.get $0) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (br $label$96) + ) + ) + (br_if $label$96 + (local.tee $0 + (i32.load offset=20 + (local.get $0) + ) + ) + ) + (local.set $4 + (local.get $1) + ) + ) + ) + (if + (local.get $4) + (if + (i32.lt_u + (local.get $3) + (i32.sub + (i32.load + (i32.const 3652) + ) + (local.get $7) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.tee $12 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.ge_u + (local.get $4) + (local.tee $6 + (i32.add + (local.get $4) + (local.get $7) + ) + ) + ) + (call $fimport$10) + ) + (local.set $10 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$104 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + ) + (block + (local.set $13 + (i32.const 0) + ) + (br $label$104) + ) + ) + ) + (loop $label$108 + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + (if + (local.tee $11 + (i32.load + (local.tee $9 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $11) + ) + (local.set $1 + (local.get $9) + ) + (br $label$108) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $12) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $9 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $12) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $11 + (i32.add + (local.get $9) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $11) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $9) + ) + (local.set $13 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (block $label$118 + (if + (local.get $10) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $13) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (i32.store + (i32.const 3648) + (local.tee $2 + (i32.and + (local.get $5) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + ) + (br $label$118) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $10) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $10) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $13) + ) + (i32.store offset=20 + (local.get $10) + (local.get $13) + ) + ) + (if + (i32.eqz + (local.get $13) + ) + (block + (local.set $2 + (local.get $5) + ) + (br $label$118) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $13) + (local.tee $0 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $13) + (local.get $10) + ) + (if + (local.tee $1 + (i32.load offset=16 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $0) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $13) + (local.get $1) + ) + (i32.store offset=24 + (local.get $1) + (local.get $13) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=20 + (local.get $4) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $13) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $13) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + (block $label$136 + (if + (i32.lt_u + (local.get $3) + (i32.const 16) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.tee $0 + (i32.add + (local.get $3) + (local.get $7) + ) + ) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $4) + (local.get $0) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + (block + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $7) + (i32.const 3) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $3) + ) + (local.get $3) + ) + (local.set $0 + (i32.shr_u + (local.get $3) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $3) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 3644) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (local.set $16 + (local.get $1) + ) + (local.set $8 + (local.get $0) + ) + ) + ) + (block + (i32.store + (i32.const 3644) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $16 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $8 + (local.get $3) + ) + ) + ) + (i32.store + (local.get $16) + (local.get $6) + ) + (i32.store offset=12 + (local.get $8) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $8) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$136) + ) + ) + (local.set $1 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $3) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $3) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $3) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $5 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $5) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $5) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.get $2) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3648) + (i32.or + (local.get $2) + (local.get $0) + ) + ) + (i32.store + (local.get $1) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $1) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (local.set $0 + (i32.load + (local.get $1) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $3) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$151 + (block $label$152 + (block $label$153 + (loop $label$154 + (br_if $label$152 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $3) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$153 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $0 + (local.get $1) + ) + (br $label$154) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$136) + ) + ) + (br $label$151) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 3660) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + (local.set $0 + (local.get $7) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.tee $1 + (i32.load + (i32.const 3652) + ) + ) + (local.get $0) + ) + (block + (local.set $2 + (i32.load + (i32.const 3664) + ) + ) + (if + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + (i32.const 15) + ) + (block + (i32.store + (i32.const 3664) + (local.tee $1 + (i32.add + (local.get $2) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3652) + (local.get $3) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $1) + (local.get $3) + ) + (local.get $3) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + ) + (block + (i32.store + (i32.const 3652) + (i32.const 0) + ) + (i32.store + (i32.const 3664) + (i32.const 0) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 3) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 4) + ) + ) + (i32.or + (i32.load + (local.get $0) + ) + (i32.const 1) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $10 + (i32.load + (i32.const 3656) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 3656) + (local.tee $3 + (i32.sub + (local.get $10) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3668) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 3668) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (if + (i32.le_u + (local.tee $6 + (i32.and + (local.tee $8 + (i32.add + (local.tee $1 + (if (result i32) + (i32.load + (i32.const 4116) + ) + (i32.load + (i32.const 4124) + ) + (block (result i32) + (i32.store + (i32.const 4124) + (i32.const 4096) + ) + (i32.store + (i32.const 4120) + (i32.const 4096) + ) + (i32.store + (i32.const 4128) + (i32.const -1) + ) + (i32.store + (i32.const 4132) + (i32.const -1) + ) + (i32.store + (i32.const 4136) + (i32.const 0) + ) + (i32.store + (i32.const 4088) + (i32.const 0) + ) + (i32.store + (local.get $18) + (local.tee $1 + (i32.xor + (i32.and + (local.get $18) + (i32.const -16) + ) + (i32.const 1431655768) + ) + ) + ) + (i32.store + (i32.const 4116) + (local.get $1) + ) + (i32.const 4096) + ) + ) + ) + (local.tee $13 + (i32.add + (local.get $0) + (i32.const 47) + ) + ) + ) + ) + (local.tee $4 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + ) + ) + (local.get $0) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + (if + (local.tee $2 + (i32.load + (i32.const 4084) + ) + ) + (if + (i32.or + (i32.le_u + (local.tee $1 + (i32.add + (local.tee $3 + (i32.load + (i32.const 4076) + ) + ) + (local.get $6) + ) + ) + (local.get $3) + ) + (i32.gt_u + (local.get $1) + (local.get $2) + ) + ) + (block + (global.set $global$1 + (local.get $14) + ) + (return + (i32.const 0) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $0) + (i32.const 48) + ) + ) + (block $label$171 + (block $label$172 + (if + (i32.eqz + (i32.and + (i32.load + (i32.const 4088) + ) + (i32.const 4) + ) + ) + (block + (block $label$174 + (block $label$175 + (block $label$176 + (br_if $label$176 + (i32.eqz + (local.tee $3 + (i32.load + (i32.const 3668) + ) + ) + ) + ) + (local.set $2 + (i32.const 4092) + ) + (loop $label$177 + (block $label$178 + (if + (i32.le_u + (local.tee $1 + (i32.load + (local.get $2) + ) + ) + (local.get $3) + ) + (br_if $label$178 + (i32.gt_u + (i32.add + (local.get $1) + (i32.load + (local.tee $5 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + (local.get $3) + ) + ) + ) + (br_if $label$176 + (i32.eqz + (local.tee $1 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (br $label$177) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.sub + (local.get $8) + (local.get $10) + ) + (local.get $4) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (local.tee $1 + (call $34 + (local.get $3) + ) + ) + (i32.add + (i32.load + (local.get $2) + ) + (i32.load + (local.get $5) + ) + ) + ) + (br_if $label$172 + (i32.ne + (local.get $1) + (i32.const -1) + ) + ) + (block + (local.set $2 + (local.get $1) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + (br $label$174) + ) + (if + (i32.ne + (local.tee $1 + (call $34 + (i32.const 0) + ) + ) + (i32.const -1) + ) + (block + (local.set $2 + (i32.sub + (i32.and + (i32.add + (local.tee $5 + (i32.add + (local.tee $2 + (i32.load + (i32.const 4120) + ) + ) + (i32.const -1) + ) + ) + (local.tee $3 + (local.get $1) + ) + ) + (i32.sub + (i32.const 0) + (local.get $2) + ) + ) + (local.get $3) + ) + ) + (local.set $4 + (i32.add + (local.tee $3 + (i32.add + (if (result i32) + (i32.and + (local.get $5) + (local.get $3) + ) + (local.get $2) + (i32.const 0) + ) + (local.get $6) + ) + ) + (local.tee $5 + (i32.load + (i32.const 4076) + ) + ) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $3) + (local.get $0) + ) + (i32.lt_u + (local.get $3) + (i32.const 2147483647) + ) + ) + (block + (if + (local.tee $2 + (i32.load + (i32.const 4084) + ) + ) + (br_if $label$174 + (i32.or + (i32.le_u + (local.get $4) + (local.get $5) + ) + (i32.gt_u + (local.get $4) + (local.get $2) + ) + ) + ) + ) + (br_if $label$172 + (i32.eq + (local.tee $2 + (call $34 + (local.get $3) + ) + ) + (local.get $1) + ) + ) + (local.set $1 + (local.get $3) + ) + (br $label$175) + ) + ) + ) + ) + (br $label$174) + ) + (local.set $5 + (i32.sub + (i32.const 0) + (local.get $1) + ) + ) + (if + (i32.and + (i32.gt_u + (local.get $7) + (local.get $1) + ) + (i32.and + (i32.lt_u + (local.get $1) + (i32.const 2147483647) + ) + (i32.ne + (local.get $2) + (i32.const -1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $3 + (i32.and + (i32.add + (i32.sub + (local.get $13) + (local.get $1) + ) + (local.tee $3 + (i32.load + (i32.const 4124) + ) + ) + ) + (i32.sub + (i32.const 0) + (local.get $3) + ) + ) + ) + (i32.const 2147483647) + ) + (if + (i32.eq + (call $34 + (local.get $3) + ) + (i32.const -1) + ) + (block + (drop + (call $34 + (local.get $5) + ) + ) + (br $label$174) + ) + (local.set $3 + (i32.add + (local.get $3) + (local.get $1) + ) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (local.set $3 + (local.get $1) + ) + ) + (if + (i32.ne + (local.get $2) + (i32.const -1) + ) + (block + (local.set $1 + (local.get $2) + ) + (br $label$172) + ) + ) + ) + (i32.store + (i32.const 4088) + (i32.or + (i32.load + (i32.const 4088) + ) + (i32.const 4) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $6) + (i32.const 2147483647) + ) + (if + (i32.and + (i32.lt_u + (local.tee $1 + (call $34 + (local.get $6) + ) + ) + (local.tee $3 + (call $34 + (i32.const 0) + ) + ) + ) + (i32.and + (i32.ne + (local.get $1) + (i32.const -1) + ) + (i32.ne + (local.get $3) + (i32.const -1) + ) + ) + ) + (br_if $label$172 + (i32.gt_u + (local.tee $3 + (i32.sub + (local.get $3) + (local.get $1) + ) + ) + (i32.add + (local.get $0) + (i32.const 40) + ) + ) + ) + ) + ) + (br $label$171) + ) + (i32.store + (i32.const 4076) + (local.tee $2 + (i32.add + (i32.load + (i32.const 4076) + ) + (local.get $3) + ) + ) + ) + (if + (i32.gt_u + (local.get $2) + (i32.load + (i32.const 4080) + ) + ) + (i32.store + (i32.const 4080) + (local.get $2) + ) + ) + (block $label$198 + (if + (local.tee $8 + (i32.load + (i32.const 3668) + ) + ) + (block + (local.set $2 + (i32.const 4092) + ) + (block $label$200 + (block $label$201 + (loop $label$202 + (br_if $label$201 + (i32.eq + (local.get $1) + (i32.add + (local.tee $4 + (i32.load + (local.get $2) + ) + ) + (local.tee $5 + (i32.load + (local.tee $7 + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + ) + ) + ) + ) + ) + (br_if $label$202 + (local.tee $2 + (i32.load offset=8 + (local.get $2) + ) + ) + ) + ) + (br $label$200) + ) + (if + (i32.eqz + (i32.and + (i32.load offset=12 + (local.get $2) + ) + (i32.const 8) + ) + ) + (if + (i32.and + (i32.lt_u + (local.get $8) + (local.get $1) + ) + (i32.ge_u + (local.get $8) + (local.get $4) + ) + ) + (block + (i32.store + (local.get $7) + (i32.add + (local.get $5) + (local.get $3) + ) + ) + (local.set $5 + (i32.load + (i32.const 3656) + ) + ) + (local.set $1 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $8) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3668) + (local.tee $2 + (i32.add + (local.get $8) + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $1) + (local.tee $1 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3656) + (local.tee $1 + (i32.add + (i32.sub + (local.get $3) + (local.get $1) + ) + (local.get $5) + ) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $2) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3672) + (i32.load + (i32.const 4132) + ) + ) + (br $label$198) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.tee $2 + (i32.load + (i32.const 3660) + ) + ) + ) + (block + (i32.store + (i32.const 3660) + (local.get $1) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (local.set $10 + (i32.add + (local.get $1) + (local.get $3) + ) + ) + (local.set $5 + (i32.const 4092) + ) + (block $label$208 + (block $label$209 + (loop $label$210 + (br_if $label$209 + (i32.eq + (i32.load + (local.get $5) + ) + (local.get $10) + ) + ) + (br_if $label$210 + (local.tee $5 + (i32.load offset=8 + (local.get $5) + ) + ) + ) + (local.set $5 + (i32.const 4092) + ) + ) + (br $label$208) + ) + (if + (i32.and + (i32.load offset=12 + (local.get $5) + ) + (i32.const 8) + ) + (local.set $5 + (i32.const 4092) + ) + (block + (i32.store + (local.get $5) + (local.get $1) + ) + (i32.store + (local.tee $5 + (i32.add + (local.get $5) + (i32.const 4) + ) + ) + (i32.add + (i32.load + (local.get $5) + ) + (local.get $3) + ) + ) + (local.set $7 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.get $10) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $6 + (i32.add + (local.tee $13 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $7) + (i32.const 0) + ) + ) + ) + (local.get $0) + ) + ) + (local.set $7 + (i32.sub + (i32.sub + (local.tee $4 + (i32.add + (local.get $10) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + (local.get $13) + ) + (local.get $0) + ) + ) + (i32.store offset=4 + (local.get $13) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (block $label$217 + (if + (i32.eq + (local.get $4) + (local.get $8) + ) + (block + (i32.store + (i32.const 3656) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3656) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 3668) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + ) + (block + (if + (i32.eq + (local.get $4) + (i32.load + (i32.const 3664) + ) + ) + (block + (i32.store + (i32.const 3652) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3652) + ) + (local.get $7) + ) + ) + ) + (i32.store + (i32.const 3664) + (local.get $6) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $0) + ) + (local.get $0) + ) + (br $label$217) + ) + ) + (i32.store + (local.tee $0 + (i32.add + (local.tee $0 + (if (result i32) + (i32.eq + (i32.and + (local.tee $0 + (i32.load offset=4 + (local.get $4) + ) + ) + (i32.const 3) + ) + (i32.const 1) + ) + (block (result i32) + (local.set $11 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$222 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $5 + (i32.load offset=12 + (local.get $4) + ) + ) + (block $label$224 + (if + (i32.ne + (local.tee $3 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $2) + ) + (call $fimport$10) + ) + (br_if $label$224 + (i32.eq + (i32.load offset=12 + (local.get $3) + ) + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $5) + (local.get $3) + ) + (block + (i32.store + (i32.const 3644) + (i32.and + (i32.load + (i32.const 3644) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + ) + (block $label$228 + (if + (i32.eq + (local.get $5) + (local.get $0) + ) + (local.set $20 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $5) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $5) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (local.set $20 + (local.get $0) + ) + (br $label$228) + ) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $3) + (local.get $5) + ) + (i32.store + (local.get $20) + (local.get $3) + ) + ) + (block + (local.set $8 + (i32.load offset=24 + (local.get $4) + ) + ) + (block $label$234 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $4) + ) + ) + (local.get $4) + ) + (block + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $3 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (local.get $3) + ) + (block + (local.set $12 + (i32.const 0) + ) + (br $label$234) + ) + ) + ) + (loop $label$239 + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + (if + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $3) + ) + (local.set $1 + (local.get $5) + ) + (br $label$239) + ) + ) + ) + (if + (i32.lt_u + (local.get $1) + (local.get $2) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (i32.const 0) + ) + (local.set $12 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $4) + ) + ) + (local.get $2) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $3 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $4) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $4) + ) + (block + (i32.store + (local.get $3) + (local.get $0) + ) + (i32.store + (local.get $1) + (local.get $5) + ) + (local.set $12 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $8) + ) + ) + (block $label$249 + (if + (i32.eq + (local.get $4) + (i32.load + (local.tee $0 + (i32.add + (i32.shl + (local.tee $1 + (i32.load offset=28 + (local.get $4) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + ) + (block + (i32.store + (local.get $0) + (local.get $12) + ) + (br_if $label$249 + (local.get $12) + ) + (i32.store + (i32.const 3648) + (i32.and + (i32.load + (i32.const 3648) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $1) + ) + (i32.const -1) + ) + ) + ) + (br $label$222) + ) + (block + (if + (i32.lt_u + (local.get $8) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $4) + ) + (i32.store + (local.get $0) + (local.get $12) + ) + (i32.store offset=20 + (local.get $8) + (local.get $12) + ) + ) + (br_if $label$222 + (i32.eqz + (local.get $12) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $12) + (local.tee $1 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $12) + (local.get $8) + ) + (if + (local.tee $3 + (i32.load + (local.tee $0 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $3) + (local.get $1) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $12) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $12) + ) + ) + ) + ) + (br_if $label$222 + (i32.eqz + (local.tee $0 + (i32.load offset=4 + (local.get $0) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $12) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $12) + ) + ) + ) + ) + ) + ) + (local.set $7 + (i32.add + (local.get $11) + (local.get $7) + ) + ) + (i32.add + (local.get $4) + (local.get $11) + ) + ) + (local.get $4) + ) + ) + (i32.const 4) + ) + ) + (i32.and + (i32.load + (local.get $0) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $6) + (i32.or + (local.get $7) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $6) + (local.get $7) + ) + (local.get $7) + ) + (local.set $0 + (i32.shr_u + (local.get $7) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $7) + (i32.const 256) + ) + (block + (local.set $3 + (i32.add + (i32.shl + (i32.shl + (local.get $0) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (block $label$263 + (if + (i32.and + (local.tee $1 + (i32.load + (i32.const 3644) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $0) + ) + ) + ) + (block + (if + (i32.ge_u + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (block + (local.set $21 + (local.get $1) + ) + (local.set $9 + (local.get $0) + ) + (br $label$263) + ) + ) + (call $fimport$10) + ) + (block + (i32.store + (i32.const 3644) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (local.set $21 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (local.set $9 + (local.get $3) + ) + ) + ) + ) + (i32.store + (local.get $21) + (local.get $6) + ) + (i32.store offset=12 + (local.get $9) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $9) + ) + (i32.store offset=12 + (local.get $6) + (local.get $3) + ) + (br $label$217) + ) + ) + (local.set $3 + (i32.add + (i32.shl + (local.tee $2 + (block $label$267 (result i32) + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $7) + (i32.const 8) + ) + ) + (block (result i32) + (drop + (br_if $label$267 + (i32.const 31) + (i32.gt_u + (local.get $7) + (i32.const 16777215) + ) + ) + ) + (i32.or + (i32.and + (i32.shr_u + (local.get $7) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $3 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $3) + ) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $1) + (local.get $0) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $1) + (local.get $0) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + (i32.store offset=28 + (local.get $6) + (local.get $2) + ) + (i32.store offset=4 + (local.tee $0 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 0) + ) + (i32.store + (local.get $0) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $1 + (i32.load + (i32.const 3648) + ) + ) + (local.tee $0 + (i32.shl + (i32.const 1) + (local.get $2) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3648) + (i32.or + (local.get $1) + (local.get $0) + ) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $3) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (local.set $0 + (i32.load + (local.get $3) + ) + ) + (local.set $1 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $2) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.shl + (local.get $7) + (if (result i32) + (i32.eq + (local.get $2) + (i32.const 31) + ) + (i32.const 0) + (local.get $1) + ) + ) + ) + (block $label$273 + (block $label$274 + (block $label$275 + (loop $label$276 + (br_if $label$274 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $7) + ) + ) + (local.set $3 + (i32.shl + (local.get $2) + (i32.const 1) + ) + ) + (br_if $label$275 + (i32.eqz + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $2) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $2 + (local.get $3) + ) + (local.set $0 + (local.get $1) + ) + (br $label$276) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $2) + (local.get $6) + ) + (i32.store offset=24 + (local.get $6) + (local.get $0) + ) + (i32.store offset=12 + (local.get $6) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $6) + ) + (br $label$217) + ) + ) + (br $label$273) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $3 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $1 + (i32.load + (i32.const 3660) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $1) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $6) + ) + (i32.store + (local.get $3) + (local.get $6) + ) + (i32.store offset=8 + (local.get $6) + (local.get $2) + ) + (i32.store offset=12 + (local.get $6) + (local.get $0) + ) + (i32.store offset=24 + (local.get $6) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $13) + (i32.const 8) + ) + ) + ) + ) + ) + (loop $label$281 + (block $label$282 + (if + (i32.le_u + (local.tee $2 + (i32.load + (local.get $5) + ) + ) + (local.get $8) + ) + (br_if $label$282 + (i32.gt_u + (local.tee $13 + (i32.add + (local.get $2) + (i32.load offset=4 + (local.get $5) + ) + ) + ) + (local.get $8) + ) + ) + ) + (local.set $5 + (i32.load offset=8 + (local.get $5) + ) + ) + (br $label$281) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $5 + (i32.add + (local.tee $7 + (i32.add + (local.get $13) + (i32.const -47) + ) + ) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (local.set $10 + (i32.add + (local.tee $7 + (if (result i32) + (i32.lt_u + (local.tee $2 + (i32.add + (local.get $7) + (if (result i32) + (i32.and + (local.get $5) + (i32.const 7) + ) + (local.get $2) + (i32.const 0) + ) + ) + ) + (local.tee $12 + (i32.add + (local.get $8) + (i32.const 16) + ) + ) + ) + (local.get $8) + (local.get $2) + ) + ) + (i32.const 8) + ) + ) + (local.set $5 + (i32.add + (local.get $7) + (i32.const 24) + ) + ) + (local.set $9 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $2 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $4 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3668) + (local.tee $4 + (i32.add + (local.get $1) + (if (result i32) + (i32.and + (local.get $4) + (i32.const 7) + ) + (local.get $2) + (local.tee $2 + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3656) + (local.tee $2 + (i32.sub + (local.get $9) + (local.get $2) + ) + ) + ) + (i32.store offset=4 + (local.get $4) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $4) + (local.get $2) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3672) + (i32.load + (i32.const 4132) + ) + ) + (i32.store + (local.tee $2 + (i32.add + (local.get $7) + (i32.const 4) + ) + ) + (i32.const 27) + ) + (i64.store align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4092) + ) + ) + (i64.store offset=8 align=4 + (local.get $10) + (i64.load align=4 + (i32.const 4100) + ) + ) + (i32.store + (i32.const 4092) + (local.get $1) + ) + (i32.store + (i32.const 4096) + (local.get $3) + ) + (i32.store + (i32.const 4104) + (i32.const 0) + ) + (i32.store + (i32.const 4100) + (local.get $10) + ) + (local.set $1 + (local.get $5) + ) + (loop $label$290 + (i32.store + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (i32.const 7) + ) + (br_if $label$290 + (i32.lt_u + (i32.add + (local.get $1) + (i32.const 4) + ) + (local.get $13) + ) + ) + ) + (if + (i32.ne + (local.get $7) + (local.get $8) + ) + (block + (i32.store + (local.get $2) + (i32.and + (i32.load + (local.get $2) + ) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $8) + (i32.or + (local.tee $4 + (i32.sub + (local.get $7) + (local.get $8) + ) + ) + (i32.const 1) + ) + ) + (i32.store + (local.get $7) + (local.get $4) + ) + (local.set $1 + (i32.shr_u + (local.get $4) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.const 256) + ) + (block + (local.set $2 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (if + (i32.and + (local.tee $3 + (i32.load + (i32.const 3644) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $3 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (local.set $15 + (local.get $3) + ) + (local.set $11 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 3644) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (local.set $11 + (local.get $2) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $8) + ) + (i32.store offset=12 + (local.get $11) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $11) + ) + (i32.store offset=12 + (local.get $8) + (local.get $2) + ) + (br $label$198) + ) + ) + (local.set $2 + (i32.add + (i32.shl + (local.tee $5 + (if (result i32) + (local.tee $1 + (i32.shr_u + (local.get $4) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $4) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $4) + (i32.add + (local.tee $1 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $1) + (local.tee $2 + (i32.and + (i32.shr_u + (i32.add + (local.get $1) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $2) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $3 + (i32.shl + (local.get $3) + (local.get $1) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $3) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + (i32.store offset=28 + (local.get $8) + (local.get $5) + ) + (i32.store offset=20 + (local.get $8) + (i32.const 0) + ) + (i32.store + (local.get $12) + (i32.const 0) + ) + (if + (i32.eqz + (i32.and + (local.tee $3 + (i32.load + (i32.const 3648) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $5) + ) + ) + ) + ) + (block + (i32.store + (i32.const 3648) + (i32.or + (local.get $3) + (local.get $1) + ) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $2) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (local.set $1 + (i32.load + (local.get $2) + ) + ) + (local.set $3 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $5) + (i32.const 1) + ) + ) + ) + (local.set $5 + (i32.shl + (local.get $4) + (if (result i32) + (i32.eq + (local.get $5) + (i32.const 31) + ) + (i32.const 0) + (local.get $3) + ) + ) + ) + (block $label$304 + (block $label$305 + (block $label$306 + (loop $label$307 + (br_if $label$305 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $1) + ) + (i32.const -8) + ) + (local.get $4) + ) + ) + (local.set $2 + (i32.shl + (local.get $5) + (i32.const 1) + ) + ) + (br_if $label$306 + (i32.eqz + (local.tee $3 + (i32.load + (local.tee $5 + (i32.add + (i32.add + (local.get $1) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $5) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $5 + (local.get $2) + ) + (local.set $1 + (local.get $3) + ) + (br $label$307) + ) + ) + (if + (i32.lt_u + (local.get $5) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (local.get $8) + ) + (i32.store offset=24 + (local.get $8) + (local.get $1) + ) + (i32.store offset=12 + (local.get $8) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $8) + ) + (br $label$198) + ) + ) + (br $label$304) + ) + (if + (i32.and + (i32.ge_u + (local.tee $5 + (i32.load + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + ) + (local.tee $3 + (i32.load + (i32.const 3660) + ) + ) + ) + (i32.ge_u + (local.get $1) + (local.get $3) + ) + ) + (block + (i32.store offset=12 + (local.get $5) + (local.get $8) + ) + (i32.store + (local.get $2) + (local.get $8) + ) + (i32.store offset=8 + (local.get $8) + (local.get $5) + ) + (i32.store offset=12 + (local.get $8) + (local.get $1) + ) + (i32.store offset=24 + (local.get $8) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + ) + (block + (if + (i32.or + (i32.eqz + (local.tee $2 + (i32.load + (i32.const 3660) + ) + ) + ) + (i32.lt_u + (local.get $1) + (local.get $2) + ) + ) + (i32.store + (i32.const 3660) + (local.get $1) + ) + ) + (i32.store + (i32.const 4092) + (local.get $1) + ) + (i32.store + (i32.const 4096) + (local.get $3) + ) + (i32.store + (i32.const 4104) + (i32.const 0) + ) + (i32.store + (i32.const 3680) + (i32.load + (i32.const 4116) + ) + ) + (i32.store + (i32.const 3676) + (i32.const -1) + ) + (local.set $2 + (i32.const 0) + ) + (loop $label$314 + (i32.store offset=12 + (local.tee $5 + (i32.add + (i32.shl + (i32.shl + (local.get $2) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (local.get $5) + ) + (i32.store offset=8 + (local.get $5) + (local.get $5) + ) + (br_if $label$314 + (i32.ne + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (i32.const 32) + ) + ) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const -40) + ) + ) + (local.set $3 + (i32.and + (i32.sub + (i32.const 0) + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 8) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.store + (i32.const 3668) + (local.tee $3 + (i32.add + (local.get $1) + (local.tee $1 + (if (result i32) + (i32.and + (local.get $2) + (i32.const 7) + ) + (local.get $3) + (i32.const 0) + ) + ) + ) + ) + ) + (i32.store + (i32.const 3656) + (local.tee $1 + (i32.sub + (local.get $5) + (local.get $1) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store offset=4 + (i32.add + (local.get $3) + (local.get $1) + ) + (i32.const 40) + ) + (i32.store + (i32.const 3672) + (i32.load + (i32.const 4132) + ) + ) + ) + ) + ) + (if + (i32.gt_u + (local.tee $1 + (i32.load + (i32.const 3656) + ) + ) + (local.get $0) + ) + (block + (i32.store + (i32.const 3656) + (local.tee $3 + (i32.sub + (local.get $1) + (local.get $0) + ) + ) + ) + (i32.store + (i32.const 3668) + (local.tee $1 + (i32.add + (local.tee $2 + (i32.load + (i32.const 3668) + ) + ) + (local.get $0) + ) + ) + ) + (i32.store offset=4 + (local.get $1) + (i32.or + (local.get $3) + (i32.const 1) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.or + (local.get $0) + (i32.const 3) + ) + ) + (global.set $global$1 + (local.get $14) + ) + (return + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + ) + ) + (i32.store + (call $11) + (i32.const 12) + ) + (global.set $global$1 + (local.get $14) + ) + (i32.const 0) + ) + ) + (func $32 (; 45 ;) (type $2) (param $0 i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (block $label$1 + (if + (i32.eqz + (local.get $0) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.add + (local.get $0) + (i32.const -8) + ) + ) + (local.tee $11 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (local.tee $8 + (i32.and + (local.tee $0 + (i32.load + (i32.add + (local.get $0) + (i32.const -4) + ) + ) + ) + (i32.const 3) + ) + ) + (i32.const 1) + ) + (call $fimport$10) + ) + (local.set $6 + (i32.add + (local.get $1) + (local.tee $4 + (i32.and + (local.get $0) + (i32.const -8) + ) + ) + ) + ) + (block $label$5 + (if + (i32.and + (local.get $0) + (i32.const 1) + ) + (block + (local.set $3 + (local.get $1) + ) + (local.set $2 + (local.get $4) + ) + ) + (block + (if + (i32.eqz + (local.get $8) + ) + (return) + ) + (if + (i32.lt_u + (local.tee $0 + (i32.add + (local.get $1) + (i32.sub + (i32.const 0) + (local.tee $8 + (i32.load + (local.get $1) + ) + ) + ) + ) + ) + (local.get $11) + ) + (call $fimport$10) + ) + (local.set $1 + (i32.add + (local.get $8) + (local.get $4) + ) + ) + (if + (i32.eq + (local.get $0) + (i32.load + (i32.const 3664) + ) + ) + (block + (if + (i32.ne + (i32.and + (local.tee $3 + (i32.load + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 3) + ) + (i32.const 3) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (i32.store + (i32.const 3652) + (local.get $1) + ) + (i32.store + (local.get $2) + (i32.and + (local.get $3) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $0) + (i32.or + (local.get $1) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $0) + (local.get $1) + ) + (local.get $1) + ) + (return) + ) + ) + (local.set $10 + (i32.shr_u + (local.get $8) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $8) + (i32.const 256) + ) + (block + (local.set $3 + (i32.load offset=12 + (local.get $0) + ) + ) + (if + (i32.ne + (local.tee $4 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.tee $2 + (i32.add + (i32.shl + (i32.shl + (local.get $10) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $4) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $4) + ) + (local.get $0) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $4) + ) + (block + (i32.store + (i32.const 3644) + (i32.and + (i32.load + (i32.const 3644) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $10) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (if + (i32.eq + (local.get $3) + (local.get $2) + ) + (local.set $5 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $3) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $2 + (i32.add + (local.get $3) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (local.set $5 + (local.get $2) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $4) + (local.get $3) + ) + (i32.store + (local.get $5) + (local.get $4) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + (local.set $12 + (i32.load offset=24 + (local.get $0) + ) + ) + (block $label$22 + (if + (i32.eq + (local.tee $4 + (i32.load offset=12 + (local.get $0) + ) + ) + (local.get $0) + ) + (block + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.tee $5 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $5 + (local.get $8) + ) + (if + (i32.eqz + (local.tee $4 + (i32.load + (local.get $5) + ) + ) + ) + (block + (local.set $7 + (i32.const 0) + ) + (br $label$22) + ) + ) + ) + (loop $label$27 + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + (if + (local.tee $10 + (i32.load + (local.tee $8 + (i32.add + (local.get $4) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $4 + (local.get $10) + ) + (local.set $5 + (local.get $8) + ) + (br $label$27) + ) + ) + ) + (if + (i32.lt_u + (local.get $5) + (local.get $11) + ) + (call $fimport$10) + (block + (i32.store + (local.get $5) + (i32.const 0) + ) + (local.set $7 + (local.get $4) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $5 + (i32.load offset=8 + (local.get $0) + ) + ) + (local.get $11) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $8 + (i32.add + (local.get $5) + (i32.const 12) + ) + ) + ) + (local.get $0) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $10 + (i32.add + (local.get $4) + (i32.const 8) + ) + ) + ) + (local.get $0) + ) + (block + (i32.store + (local.get $8) + (local.get $4) + ) + (i32.store + (local.get $10) + (local.get $5) + ) + (local.set $7 + (local.get $4) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (if + (local.get $12) + (block + (if + (i32.eq + (local.get $0) + (i32.load + (local.tee $5 + (i32.add + (i32.shl + (local.tee $4 + (i32.load offset=28 + (local.get $0) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + ) + (block + (i32.store + (local.get $5) + (local.get $7) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (i32.store + (i32.const 3648) + (i32.and + (i32.load + (i32.const 3648) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $12) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $12) + (i32.const 16) + ) + ) + ) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $7) + ) + (i32.store offset=20 + (local.get $12) + (local.get $7) + ) + ) + (if + (i32.eqz + (local.get $7) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + (br $label$5) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $7) + (local.tee $5 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $7) + (local.get $12) + ) + (if + (local.tee $4 + (i32.load + (local.tee $8 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $4) + (local.get $5) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + ) + ) + ) + (if + (local.tee $4 + (i32.load offset=4 + (local.get $8) + ) + ) + (if + (i32.lt_u + (local.get $4) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $7) + (local.get $4) + ) + (i32.store offset=24 + (local.get $4) + (local.get $7) + ) + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + (block + (local.set $3 + (local.get $0) + ) + (local.set $2 + (local.get $1) + ) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.get $3) + (local.get $6) + ) + (call $fimport$10) + ) + (if + (i32.eqz + (i32.and + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 4) + ) + ) + ) + ) + (i32.const 1) + ) + ) + (call $fimport$10) + ) + (if + (i32.and + (local.get $0) + (i32.const 2) + ) + (block + (i32.store + (local.get $1) + (i32.and + (local.get $0) + (i32.const -2) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $2) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $2) + ) + (local.get $2) + ) + ) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 3668) + ) + ) + (block + (i32.store + (i32.const 3656) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3656) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3668) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (if + (i32.ne + (local.get $3) + (i32.load + (i32.const 3664) + ) + ) + (return) + ) + (i32.store + (i32.const 3664) + (i32.const 0) + ) + (i32.store + (i32.const 3652) + (i32.const 0) + ) + (return) + ) + ) + (if + (i32.eq + (local.get $6) + (i32.load + (i32.const 3664) + ) + ) + (block + (i32.store + (i32.const 3652) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3652) + ) + (local.get $2) + ) + ) + ) + (i32.store + (i32.const 3664) + (local.get $3) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $0) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $0) + ) + (local.get $0) + ) + (return) + ) + ) + (local.set $5 + (i32.add + (i32.and + (local.get $0) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shr_u + (local.get $0) + (i32.const 3) + ) + ) + (block $label$61 + (if + (i32.lt_u + (local.get $0) + (i32.const 256) + ) + (block + (local.set $2 + (i32.load offset=12 + (local.get $6) + ) + ) + (if + (i32.ne + (local.tee $1 + (i32.load offset=8 + (local.get $6) + ) + ) + (local.tee $0 + (i32.add + (i32.shl + (i32.shl + (local.get $4) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load offset=12 + (local.get $1) + ) + (local.get $6) + ) + (call $fimport$10) + ) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $1) + ) + (block + (i32.store + (i32.const 3644) + (i32.and + (i32.load + (i32.const 3644) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $4) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + (if + (i32.eq + (local.get $2) + (local.get $0) + ) + (local.set $14 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + (block + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $2) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (local.set $14 + (local.get $0) + ) + (call $fimport$10) + ) + ) + ) + (i32.store offset=12 + (local.get $1) + (local.get $2) + ) + (i32.store + (local.get $14) + (local.get $1) + ) + ) + (block + (local.set $7 + (i32.load offset=24 + (local.get $6) + ) + ) + (block $label$73 + (if + (i32.eq + (local.tee $0 + (i32.load offset=12 + (local.get $6) + ) + ) + (local.get $6) + ) + (block + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.tee $2 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + (i32.const 4) + ) + ) + ) + ) + (local.set $2 + (local.get $1) + ) + (if + (i32.eqz + (local.tee $0 + (i32.load + (local.get $2) + ) + ) + ) + (block + (local.set $9 + (i32.const 0) + ) + (br $label$73) + ) + ) + ) + (loop $label$78 + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + (if + (local.tee $4 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 16) + ) + ) + ) + ) + (block + (local.set $0 + (local.get $4) + ) + (local.set $2 + (local.get $1) + ) + (br $label$78) + ) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $2) + (i32.const 0) + ) + (local.set $9 + (local.get $0) + ) + ) + ) + ) + (block + (if + (i32.lt_u + (local.tee $2 + (i32.load offset=8 + (local.get $6) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.ne + (i32.load + (local.tee $1 + (i32.add + (local.get $2) + (i32.const 12) + ) + ) + ) + (local.get $6) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $4 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + (local.get $6) + ) + (block + (i32.store + (local.get $1) + (local.get $0) + ) + (i32.store + (local.get $4) + (local.get $2) + ) + (local.set $9 + (local.get $0) + ) + ) + (call $fimport$10) + ) + ) + ) + ) + (if + (local.get $7) + (block + (if + (i32.eq + (local.get $6) + (i32.load + (local.tee $2 + (i32.add + (i32.shl + (local.tee $0 + (i32.load offset=28 + (local.get $6) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + ) + ) + (block + (i32.store + (local.get $2) + (local.get $9) + ) + (if + (i32.eqz + (local.get $9) + ) + (block + (i32.store + (i32.const 3648) + (i32.and + (i32.load + (i32.const 3648) + ) + (i32.xor + (i32.shl + (i32.const 1) + (local.get $0) + ) + (i32.const -1) + ) + ) + ) + (br $label$61) + ) + ) + ) + (block + (if + (i32.lt_u + (local.get $7) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + ) + (if + (i32.eq + (i32.load + (local.tee $0 + (i32.add + (local.get $7) + (i32.const 16) + ) + ) + ) + (local.get $6) + ) + (i32.store + (local.get $0) + (local.get $9) + ) + (i32.store offset=20 + (local.get $7) + (local.get $9) + ) + ) + (br_if $label$61 + (i32.eqz + (local.get $9) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $9) + (local.tee $2 + (i32.load + (i32.const 3660) + ) + ) + ) + (call $fimport$10) + ) + (i32.store offset=24 + (local.get $9) + (local.get $7) + ) + (if + (local.tee $0 + (i32.load + (local.tee $1 + (i32.add + (local.get $6) + (i32.const 16) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (local.get $2) + ) + (call $fimport$10) + (block + (i32.store offset=16 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + (if + (local.tee $0 + (i32.load offset=4 + (local.get $1) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store offset=20 + (local.get $9) + (local.get $0) + ) + (i32.store offset=24 + (local.get $0) + (local.get $9) + ) + ) + ) + ) + ) + ) + ) + ) + ) + (i32.store offset=4 + (local.get $3) + (i32.or + (local.get $5) + (i32.const 1) + ) + ) + (i32.store + (i32.add + (local.get $3) + (local.get $5) + ) + (local.get $5) + ) + (if + (i32.eq + (local.get $3) + (i32.load + (i32.const 3664) + ) + ) + (block + (i32.store + (i32.const 3652) + (local.get $5) + ) + (return) + ) + (local.set $2 + (local.get $5) + ) + ) + ) + ) + (local.set $1 + (i32.shr_u + (local.get $2) + (i32.const 3) + ) + ) + (if + (i32.lt_u + (local.get $2) + (i32.const 256) + ) + (block + (local.set $0 + (i32.add + (i32.shl + (i32.shl + (local.get $1) + (i32.const 1) + ) + (i32.const 2) + ) + (i32.const 3684) + ) + ) + (if + (i32.and + (local.tee $2 + (i32.load + (i32.const 3644) + ) + ) + (local.tee $1 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (if + (i32.lt_u + (local.tee $1 + (i32.load + (local.tee $2 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (local.set $15 + (local.get $2) + ) + (local.set $13 + (local.get $1) + ) + ) + ) + (block + (i32.store + (i32.const 3644) + (i32.or + (local.get $2) + (local.get $1) + ) + ) + (local.set $15 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + (local.set $13 + (local.get $0) + ) + ) + ) + (i32.store + (local.get $15) + (local.get $3) + ) + (i32.store offset=12 + (local.get $13) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $13) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (return) + ) + ) + (local.set $0 + (i32.add + (i32.shl + (local.tee $1 + (if (result i32) + (local.tee $0 + (i32.shr_u + (local.get $2) + (i32.const 8) + ) + ) + (if (result i32) + (i32.gt_u + (local.get $2) + (i32.const 16777215) + ) + (i32.const 31) + (i32.or + (i32.and + (i32.shr_u + (local.get $2) + (i32.add + (local.tee $0 + (i32.add + (i32.sub + (i32.const 14) + (i32.or + (i32.or + (local.tee $4 + (i32.and + (i32.shr_u + (i32.add + (local.tee $1 + (i32.shl + (local.get $0) + (local.tee $0 + (i32.and + (i32.shr_u + (i32.add + (local.get $0) + (i32.const 1048320) + ) + (i32.const 16) + ) + (i32.const 8) + ) + ) + ) + ) + (i32.const 520192) + ) + (i32.const 16) + ) + (i32.const 4) + ) + ) + (local.get $0) + ) + (local.tee $1 + (i32.and + (i32.shr_u + (i32.add + (local.tee $0 + (i32.shl + (local.get $1) + (local.get $4) + ) + ) + (i32.const 245760) + ) + (i32.const 16) + ) + (i32.const 2) + ) + ) + ) + ) + (i32.shr_u + (i32.shl + (local.get $0) + (local.get $1) + ) + (i32.const 15) + ) + ) + ) + (i32.const 7) + ) + ) + (i32.const 1) + ) + (i32.shl + (local.get $0) + (i32.const 1) + ) + ) + ) + (i32.const 0) + ) + ) + (i32.const 2) + ) + (i32.const 3948) + ) + ) + (i32.store offset=28 + (local.get $3) + (local.get $1) + ) + (i32.store offset=20 + (local.get $3) + (i32.const 0) + ) + (i32.store offset=16 + (local.get $3) + (i32.const 0) + ) + (block $label$113 + (if + (i32.and + (local.tee $4 + (i32.load + (i32.const 3648) + ) + ) + (local.tee $5 + (i32.shl + (i32.const 1) + (local.get $1) + ) + ) + ) + (block + (local.set $0 + (i32.load + (local.get $0) + ) + ) + (local.set $4 + (i32.sub + (i32.const 25) + (i32.shr_u + (local.get $1) + (i32.const 1) + ) + ) + ) + (local.set $1 + (i32.shl + (local.get $2) + (if (result i32) + (i32.eq + (local.get $1) + (i32.const 31) + ) + (i32.const 0) + (local.get $4) + ) + ) + ) + (block $label$117 + (block $label$118 + (block $label$119 + (loop $label$120 + (br_if $label$118 + (i32.eq + (i32.and + (i32.load offset=4 + (local.get $0) + ) + (i32.const -8) + ) + (local.get $2) + ) + ) + (local.set $4 + (i32.shl + (local.get $1) + (i32.const 1) + ) + ) + (br_if $label$119 + (i32.eqz + (local.tee $5 + (i32.load + (local.tee $1 + (i32.add + (i32.add + (local.get $0) + (i32.const 16) + ) + (i32.shl + (i32.shr_u + (local.get $1) + (i32.const 31) + ) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (local.set $1 + (local.get $4) + ) + (local.set $0 + (local.get $5) + ) + (br $label$120) + ) + ) + (if + (i32.lt_u + (local.get $1) + (i32.load + (i32.const 3660) + ) + ) + (call $fimport$10) + (block + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + (br $label$113) + ) + ) + (br $label$117) + ) + (if + (i32.and + (i32.ge_u + (local.tee $2 + (i32.load + (local.tee $1 + (i32.add + (local.get $0) + (i32.const 8) + ) + ) + ) + ) + (local.tee $4 + (i32.load + (i32.const 3660) + ) + ) + ) + (i32.ge_u + (local.get $0) + (local.get $4) + ) + ) + (block + (i32.store offset=12 + (local.get $2) + (local.get $3) + ) + (i32.store + (local.get $1) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $2) + ) + (i32.store offset=12 + (local.get $3) + (local.get $0) + ) + (i32.store offset=24 + (local.get $3) + (i32.const 0) + ) + ) + (call $fimport$10) + ) + ) + ) + (block + (i32.store + (i32.const 3648) + (i32.or + (local.get $4) + (local.get $5) + ) + ) + (i32.store + (local.get $0) + (local.get $3) + ) + (i32.store offset=24 + (local.get $3) + (local.get $0) + ) + (i32.store offset=12 + (local.get $3) + (local.get $3) + ) + (i32.store offset=8 + (local.get $3) + (local.get $3) + ) + ) + ) + ) + (i32.store + (i32.const 3676) + (local.tee $0 + (i32.add + (i32.load + (i32.const 3676) + ) + (i32.const -1) + ) + ) + ) + (if + (local.get $0) + (return) + (local.set $0 + (i32.const 4100) + ) + ) + (loop $label$128 + (local.set $0 + (i32.add + (local.tee $2 + (i32.load + (local.get $0) + ) + ) + (i32.const 8) + ) + ) + (br_if $label$128 + (local.get $2) + ) + ) + (i32.store + (i32.const 3676) + (i32.const -1) + ) + ) + ) + (func $33 (; 46 ;) (type $6) + (nop) + ) + (func $34 (; 47 ;) (type $1) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (block $label$1 (result i32) + (local.set $1 + (i32.add + (local.tee $2 + (i32.load + (global.get $global$0) + ) + ) + (local.tee $0 + (i32.and + (i32.add + (local.get $0) + (i32.const 15) + ) + (i32.const -16) + ) + ) + ) + ) + (if + (i32.or + (i32.and + (i32.gt_s + (local.get $0) + (i32.const 0) + ) + (i32.lt_s + (local.get $1) + (local.get $2) + ) + ) + (i32.lt_s + (local.get $1) + (i32.const 0) + ) + ) + (block + (drop + (call $fimport$6) + ) + (call $fimport$11 + (i32.const 12) + ) + (return + (i32.const -1) + ) + ) + ) + (i32.store + (global.get $global$0) + (local.get $1) + ) + (if + (i32.gt_s + (local.get $1) + (call $fimport$5) + ) + (if + (i32.eqz + (call $fimport$4) + ) + (block + (call $fimport$11 + (i32.const 12) + ) + (i32.store + (global.get $global$0) + (local.get $2) + ) + (return + (i32.const -1) + ) + ) + ) + ) + (local.get $2) + ) + ) + (func $35 (; 48 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (block $label$1 (result i32) + (local.set $4 + (i32.add + (local.get $0) + (local.get $2) + ) + ) + (if + (i32.ge_s + (local.get $2) + (i32.const 20) + ) + (block + (local.set $1 + (i32.and + (local.get $1) + (i32.const 255) + ) + ) + (if + (local.tee $3 + (i32.and + (local.get $0) + (i32.const 3) + ) + ) + (block + (local.set $3 + (i32.sub + (i32.add + (local.get $0) + (i32.const 4) + ) + (local.get $3) + ) + ) + (loop $label$4 + (if + (i32.lt_s + (local.get $0) + (local.get $3) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + ) + ) + (local.set $3 + (i32.or + (i32.or + (i32.or + (local.get $1) + (i32.shl + (local.get $1) + (i32.const 8) + ) + ) + (i32.shl + (local.get $1) + (i32.const 16) + ) + ) + (i32.shl + (local.get $1) + (i32.const 24) + ) + ) + ) + (local.set $5 + (i32.and + (local.get $4) + (i32.const -4) + ) + ) + (loop $label$6 + (if + (i32.lt_s + (local.get $0) + (local.get $5) + ) + (block + (i32.store + (local.get $0) + (local.get $3) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (br $label$6) + ) + ) + ) + ) + ) + (loop $label$8 + (if + (i32.lt_s + (local.get $0) + (local.get $4) + ) + (block + (i32.store8 + (local.get $0) + (local.get $1) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (br $label$8) + ) + ) + ) + (i32.sub + (local.get $0) + (local.get $2) + ) + ) + ) + (func $36 (; 49 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (local $3 i32) + (block $label$1 (result i32) + (if + (i32.ge_s + (local.get $2) + (i32.const 4096) + ) + (return + (call $fimport$12 + (local.get $0) + (local.get $1) + (local.get $2) + ) + ) + ) + (local.set $3 + (local.get $0) + ) + (if + (i32.eq + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.and + (local.get $1) + (i32.const 3) + ) + ) + (block + (loop $label$4 + (if + (i32.and + (local.get $0) + (i32.const 3) + ) + (block + (if + (i32.eqz + (local.get $2) + ) + (return + (local.get $3) + ) + ) + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$4) + ) + ) + ) + (loop $label$7 + (if + (i32.ge_s + (local.get $2) + (i32.const 4) + ) + (block + (i32.store + (local.get $0) + (i32.load + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 4) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 4) + ) + ) + (br $label$7) + ) + ) + ) + ) + ) + (loop $label$9 + (if + (i32.gt_s + (local.get $2) + (i32.const 0) + ) + (block + (i32.store8 + (local.get $0) + (i32.load8_s + (local.get $1) + ) + ) + (local.set $0 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (local.set $1 + (i32.add + (local.get $1) + (i32.const 1) + ) + ) + (local.set $2 + (i32.sub + (local.get $2) + (i32.const 1) + ) + ) + (br $label$9) + ) + ) + ) + (local.get $3) + ) + ) + (func $37 (; 50 ;) (type $3) (result i32) + (i32.const 0) + ) + (func $38 (; 51 ;) (type $4) (param $0 i32) (param $1 i32) (result i32) + (call_indirect (type $1) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 0) + ) + ) + ) + (func $39 (; 52 ;) (type $12) (param $0 i32) (param $1 i32) (param $2 i32) (param $3 i32) (result i32) + (call_indirect (type $0) + (local.get $1) + (local.get $2) + (local.get $3) + (i32.add + (i32.and + (local.get $0) + (i32.const 3) + ) + (i32.const 2) + ) + ) + ) + (func $40 (; 53 ;) (type $5) (param $0 i32) (param $1 i32) + (call_indirect (type $2) + (local.get $1) + (i32.add + (i32.and + (local.get $0) + (i32.const 1) + ) + (i32.const 6) + ) + ) + ) + (func $41 (; 54 ;) (type $1) (param $0 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 0) + ) + (i32.const 0) + ) + ) + (func $42 (; 55 ;) (type $0) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 (result i32) + (call $fimport$3 + (i32.const 1) + ) + (i32.const 0) + ) + ) + (func $43 (; 56 ;) (type $2) (param $0 i32) + (call $fimport$3 + (i32.const 2) + ) + ) +) + diff --git a/cranelift/wasmtests/fac-multi-value.wat b/cranelift/wasmtests/fac-multi-value.wat new file mode 100644 index 0000000000..7a4d5c0fab --- /dev/null +++ b/cranelift/wasmtests/fac-multi-value.wat @@ -0,0 +1,19 @@ +(module + ;; Iterative factorial without locals. + (func $pick0 (param i64) (result i64 i64) + (get_local 0) (get_local 0) + ) + (func $pick1 (param i64 i64) (result i64 i64 i64) + (get_local 0) (get_local 1) (get_local 0) + ) + (func (export "fac-ssa") (param i64) (result i64) + (i64.const 1) (get_local 0) + (loop $l (param i64 i64) (result i64) + (call $pick1) (call $pick1) (i64.mul) + (call $pick1) (i64.const 1) (i64.sub) + (call $pick0) (i64.const 0) (i64.gt_u) + (br_if $l) + (drop) (return) + ) + ) +) diff --git a/cranelift/wasmtests/fibonacci.wat b/cranelift/wasmtests/fibonacci.wat new file mode 100644 index 0000000000..1788a467ca --- /dev/null +++ b/cranelift/wasmtests/fibonacci.wat @@ -0,0 +1,22 @@ +(module + (memory 1) + (func $main (local i32 i32 i32 i32) + (set_local 0 (i32.const 0)) + (set_local 1 (i32.const 1)) + (set_local 2 (i32.const 1)) + (set_local 3 (i32.const 0)) + (block + (loop + (br_if 1 (i32.gt_s (get_local 0) (i32.const 5))) + (set_local 3 (get_local 2)) + (set_local 2 (i32.add (get_local 2) (get_local 1))) + (set_local 1 (get_local 3)) + (set_local 0 (i32.add (get_local 0) (i32.const 1))) + (br 0) + ) + ) + (i32.store (i32.const 0) (get_local 2)) + ) + (start $main) + (data (i32.const 0) "0000") +) diff --git a/cranelift/wasmtests/globals.wat b/cranelift/wasmtests/globals.wat new file mode 100644 index 0000000000..646e5f0f45 --- /dev/null +++ b/cranelift/wasmtests/globals.wat @@ -0,0 +1,8 @@ +(module + (global $x (mut i32) (i32.const 4)) + (memory 1) + (func $main (local i32) + (i32.store (i32.const 0) (get_global $x)) + ) + (start $main) +) diff --git a/cranelift/wasmtests/icall-simd.wat b/cranelift/wasmtests/icall-simd.wat new file mode 100644 index 0000000000..d656b265b9 --- /dev/null +++ b/cranelift/wasmtests/icall-simd.wat @@ -0,0 +1,7 @@ +(module + (type $ft (func (param v128) (result v128))) + (func $foo (export "foo") (param i32) (param v128) (result v128) + (call_indirect (type $ft) (local.get 1) (local.get 0)) + ) + (table (;0;) 23 23 anyfunc) +) diff --git a/cranelift/wasmtests/icall.wat b/cranelift/wasmtests/icall.wat new file mode 100644 index 0000000000..76f28f47a9 --- /dev/null +++ b/cranelift/wasmtests/icall.wat @@ -0,0 +1,7 @@ +(module + (type $ft (func (param f32) (result i32))) + (func $foo (export "foo") (param i32 f32) (result i32) + (call_indirect (type $ft) (get_local 1) (get_local 0)) + ) + (table (;0;) 23 23 anyfunc) +) diff --git a/cranelift/wasmtests/if-reachability-translation-0.wat b/cranelift/wasmtests/if-reachability-translation-0.wat new file mode 100644 index 0000000000..54145a9d4e --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-0.wat @@ -0,0 +1,12 @@ +;; An unreachable `if` means that the consequent, alternative, and following +;; block are also unreachable. + +(module + (func (param i32) (result i32) + unreachable + if ;; label = @2 + nop + else + nop + end + i32.const 0)) diff --git a/cranelift/wasmtests/if-reachability-translation-1.wat b/cranelift/wasmtests/if-reachability-translation-1.wat new file mode 100644 index 0000000000..6e1e6121f4 --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-1.wat @@ -0,0 +1,12 @@ +;; Reachable `if` head and reachable consequent and alternative means that the +;; following block is also reachable. + +(module + (func (param i32) (result i32) + local.get 0 + if ;; label = @2 + nop + else + nop + end + i32.const 0)) diff --git a/cranelift/wasmtests/if-reachability-translation-2.wat b/cranelift/wasmtests/if-reachability-translation-2.wat new file mode 100644 index 0000000000..4bbaf99820 --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-2.wat @@ -0,0 +1,12 @@ +;; Reachable `if` head and unreachable consequent and reachable alternative +;; means that the following block is also reachable. + +(module + (func (param i32) (result i32) + local.get 0 + if + unreachable + else + nop + end + i32.const 0)) diff --git a/cranelift/wasmtests/if-reachability-translation-3.wat b/cranelift/wasmtests/if-reachability-translation-3.wat new file mode 100644 index 0000000000..72251cba16 --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-3.wat @@ -0,0 +1,12 @@ +;; Reachable `if` head and consequent and unreachable alternative means that the +;; following block is also reachable. + +(module + (func (param i32) (result i32) + local.get 0 + if + nop + else + unreachable + end + i32.const 0)) diff --git a/cranelift/wasmtests/if-reachability-translation-4.wat b/cranelift/wasmtests/if-reachability-translation-4.wat new file mode 100644 index 0000000000..b8a4069430 --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-4.wat @@ -0,0 +1,12 @@ +;; Reachable `if` head and unreachable consequent and alternative means that the +;; following block is unreachable. + +(module + (func (param i32) (result i32) + local.get 0 + if + unreachable + else + unreachable + end + i32.const 0)) diff --git a/cranelift/wasmtests/if-reachability-translation-5.wat b/cranelift/wasmtests/if-reachability-translation-5.wat new file mode 100644 index 0000000000..7b1f665e05 --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-5.wat @@ -0,0 +1,14 @@ +;; Reachable `if` head and unreachable consequent and alternative, but with a +;; branch out of the consequent, means that the following block is reachable. + +(module + (func (param i32 i32) (result i32) + local.get 0 + if + local.get 1 + br_if 0 + unreachable + else + unreachable + end + i32.const 0)) diff --git a/cranelift/wasmtests/if-reachability-translation-6.wat b/cranelift/wasmtests/if-reachability-translation-6.wat new file mode 100644 index 0000000000..d9da824f14 --- /dev/null +++ b/cranelift/wasmtests/if-reachability-translation-6.wat @@ -0,0 +1,14 @@ +;; Reachable `if` head and unreachable consequent and alternative, but with a +;; branch out of the alternative, means that the following block is reachable. + +(module + (func (param i32 i32) (result i32) + local.get 0 + if + unreachable + else + local.get 1 + br_if 0 + unreachable + end + i32.const 0)) diff --git a/cranelift/wasmtests/issue-1306-name-section-with-u32-max-function-index.wasm b/cranelift/wasmtests/issue-1306-name-section-with-u32-max-function-index.wasm new file mode 100644 index 0000000000..79d8dc3388 Binary files /dev/null and b/cranelift/wasmtests/issue-1306-name-section-with-u32-max-function-index.wasm differ diff --git a/cranelift/wasmtests/memory.wat b/cranelift/wasmtests/memory.wat new file mode 100644 index 0000000000..0c81bad174 --- /dev/null +++ b/cranelift/wasmtests/memory.wat @@ -0,0 +1,11 @@ +(module + (memory 1) + (func $main (local i32) + (i32.store (i32.const 0) (i32.const 0x0)) + (if (i32.load (i32.const 0)) + (then (i32.store (i32.const 0) (i32.const 0xa))) + (else (i32.store (i32.const 0) (i32.const 0xb)))) + ) + (start $main) + (data (i32.const 0) "0000") +) diff --git a/cranelift/wasmtests/multi-0.wat b/cranelift/wasmtests/multi-0.wat new file mode 100644 index 0000000000..d1cc24c596 --- /dev/null +++ b/cranelift/wasmtests/multi-0.wat @@ -0,0 +1,3 @@ +(module + (func (export "i64.dup") (param i64) (result i64 i64) + (get_local 0) (get_local 0))) diff --git a/cranelift/wasmtests/multi-1.wat b/cranelift/wasmtests/multi-1.wat new file mode 100644 index 0000000000..a814647419 --- /dev/null +++ b/cranelift/wasmtests/multi-1.wat @@ -0,0 +1,6 @@ +(module + (func (export "multiBlock") (param i64 i32) (result i32 i64 f64) + (local.get 1) + (local.get 0) + (block (param i32 i64) (result i32 i64 f64) + (f64.const 1234.5)))) diff --git a/cranelift/wasmtests/multi-10.wat b/cranelift/wasmtests/multi-10.wat new file mode 100644 index 0000000000..01fbf42941 --- /dev/null +++ b/cranelift/wasmtests/multi-10.wat @@ -0,0 +1,10 @@ +(module + (func (export "f") (param i64 i32) (result i64 i64) + (local.get 0) + (local.get 1) + ;; If with else. Fewer params than results. + (if (param i64) (result i64 i64) + (then + (i64.const -1)) + (else + (i64.const -2))))) diff --git a/cranelift/wasmtests/multi-11.wat b/cranelift/wasmtests/multi-11.wat new file mode 100644 index 0000000000..1ae75889bc --- /dev/null +++ b/cranelift/wasmtests/multi-11.wat @@ -0,0 +1,7 @@ +(module + (func (export "multiLoop") (param i64) (result i64 i64) + (local.get 0) + ;; Fewer params than results. + (loop (param i64) (result i64 i64) + i64.const 42 + return))) diff --git a/cranelift/wasmtests/multi-12.wat b/cranelift/wasmtests/multi-12.wat new file mode 100644 index 0000000000..9a3e4f7fb5 --- /dev/null +++ b/cranelift/wasmtests/multi-12.wat @@ -0,0 +1,9 @@ +(module + (func (export "multiLoop") (param i64 i64 i64) (result i64 i64) + (local.get 2) + (local.get 1) + (local.get 0) + ;; More params than results. + (loop (param i64 i64 i64) (result i64 i64) + drop + return))) diff --git a/cranelift/wasmtests/multi-13.wat b/cranelift/wasmtests/multi-13.wat new file mode 100644 index 0000000000..4f4846300e --- /dev/null +++ b/cranelift/wasmtests/multi-13.wat @@ -0,0 +1,10 @@ +(module + (func (export "as-if-then") (param i32 i32) (result i32) + (block (result i32) + (if (result i32) (local.get 0) + (then (br 1 (i32.const 3))) + (else (local.get 1)) + ) + ) + ) +) diff --git a/cranelift/wasmtests/multi-14.wat b/cranelift/wasmtests/multi-14.wat new file mode 100644 index 0000000000..26d0cb596a --- /dev/null +++ b/cranelift/wasmtests/multi-14.wat @@ -0,0 +1,10 @@ +(module + (func (export "as-if-else") (param i32 i32) (result i32) + (block (result i32) + (if (result i32) (local.get 0) + (then (local.get 1)) + (else (br 1 (i32.const 4))) + ) + ) + ) +) diff --git a/cranelift/wasmtests/multi-15.wat b/cranelift/wasmtests/multi-15.wat new file mode 100644 index 0000000000..2f017bd6aa --- /dev/null +++ b/cranelift/wasmtests/multi-15.wat @@ -0,0 +1,22 @@ +(module + (func (export "large-sig") + (param i32 i64 f32 f32 i32 f64 f32 i32 i32 i32 f32 f64 f64 f64 i32 i32 f32) + (result f64 f32 i32 i32 i32 i64 f32 i32 i32 f32 f64 f64 i32 f32 i32 f64) + (local.get 5) + (local.get 2) + (local.get 0) + (local.get 8) + (local.get 7) + (local.get 1) + (local.get 3) + (local.get 9) + (local.get 4) + (local.get 6) + (local.get 13) + (local.get 11) + (local.get 15) + (local.get 16) + (local.get 14) + (local.get 12) + ) +) diff --git a/cranelift/wasmtests/multi-16.wat b/cranelift/wasmtests/multi-16.wat new file mode 100644 index 0000000000..1e60aa8cc8 --- /dev/null +++ b/cranelift/wasmtests/multi-16.wat @@ -0,0 +1,9 @@ +(module + (func (export "param") (param i32) (result i32) + (i32.const 1) + (if (param i32) (result i32) (local.get 0) + (then (i32.const 2) (i32.add)) + (else (i32.const -2) (i32.add)) + ) + ) +) diff --git a/cranelift/wasmtests/multi-2.wat b/cranelift/wasmtests/multi-2.wat new file mode 100644 index 0000000000..6f2a7378b1 --- /dev/null +++ b/cranelift/wasmtests/multi-2.wat @@ -0,0 +1,6 @@ +(module + (func (export "multiLoop") (param i64 i64) (result i64 i64) + (local.get 1) + (local.get 0) + (loop (param i64 i64) (result i64 i64) + return))) diff --git a/cranelift/wasmtests/multi-3.wat b/cranelift/wasmtests/multi-3.wat new file mode 100644 index 0000000000..d58071f9c7 --- /dev/null +++ b/cranelift/wasmtests/multi-3.wat @@ -0,0 +1,13 @@ +(module + (func (export "multiIf") (param i32 i64 i64) (result i64 i64) + (local.get 2) + (local.get 1) + (local.get 0) + (if (param i64 i64) (result i64 i64) + (then return) + ;; Hits the code path for an `else` after a block that ends unreachable. + (else + (drop) + (drop) + (i64.const 0) + (i64.const 0))))) diff --git a/cranelift/wasmtests/multi-4.wat b/cranelift/wasmtests/multi-4.wat new file mode 100644 index 0000000000..9c028531d3 --- /dev/null +++ b/cranelift/wasmtests/multi-4.wat @@ -0,0 +1,13 @@ +(module + (func (export "multiIf2") (param i32 i64 i64) (result i64 i64) + (local.get 2) + (local.get 1) + (local.get 0) + (if (param i64 i64) (result i64 i64) + (then + i64.add + i64.const 1) + ;; Hits the code path for an `else` after a block that does not end unreachable. + (else + i64.sub + i64.const 2)))) diff --git a/cranelift/wasmtests/multi-5.wat b/cranelift/wasmtests/multi-5.wat new file mode 100644 index 0000000000..92944770f9 --- /dev/null +++ b/cranelift/wasmtests/multi-5.wat @@ -0,0 +1,11 @@ +(module + (func (export "foo") + i32.const 1 + i64.const 2 + ;; More params than results. + (block (param i32 i64) (result i32) + drop + ) + drop + ) +) diff --git a/cranelift/wasmtests/multi-6.wat b/cranelift/wasmtests/multi-6.wat new file mode 100644 index 0000000000..c1135a1187 --- /dev/null +++ b/cranelift/wasmtests/multi-6.wat @@ -0,0 +1,11 @@ +(module + (func (export "foo") + i32.const 1 + ;; Fewer params than results. + (block (param i32) (result i32 i64) + i64.const 2 + ) + drop + drop + ) +) diff --git a/cranelift/wasmtests/multi-7.wat b/cranelift/wasmtests/multi-7.wat new file mode 100644 index 0000000000..c4545ba26c --- /dev/null +++ b/cranelift/wasmtests/multi-7.wat @@ -0,0 +1,9 @@ +(module + (func (export "f") (param i64 i32) (result i64) + (local.get 0) + (local.get 1) + ;; If with no else. Same number of params and results. + (if (param i64) (result i64) + (then + (drop) + (i64.const -1))))) diff --git a/cranelift/wasmtests/multi-8.wat b/cranelift/wasmtests/multi-8.wat new file mode 100644 index 0000000000..1bc23f9f5d --- /dev/null +++ b/cranelift/wasmtests/multi-8.wat @@ -0,0 +1,12 @@ +(module + (func (export "f") (param i64 i32) (result i64) + (local.get 0) + (local.get 1) + ;; If with else. Same number of params and results. + (if (param i64) (result i64) + (then + (drop) + (i64.const -1)) + (else + (drop) + (i64.const -2))))) diff --git a/cranelift/wasmtests/multi-9.wat b/cranelift/wasmtests/multi-9.wat new file mode 100644 index 0000000000..d0cecf71b2 --- /dev/null +++ b/cranelift/wasmtests/multi-9.wat @@ -0,0 +1,15 @@ +(module + (func (export "f") (param i64 i32) (result i64) + (local.get 0) + (local.get 1) + (local.get 1) + ;; If with else. More params than results. + (if (param i64 i32) (result i64) + (then + (drop) + (drop) + (i64.const -1)) + (else + (drop) + (drop) + (i64.const -2))))) diff --git a/cranelift/wasmtests/nullref.wat b/cranelift/wasmtests/nullref.wat new file mode 100644 index 0000000000..86714a2b6e --- /dev/null +++ b/cranelift/wasmtests/nullref.wat @@ -0,0 +1,11 @@ +(module + (func (result nullref) + ref.null + ) + + (func (result nullref) + (block (result nullref) + ref.null + ) + ) +) diff --git a/cranelift/wasmtests/passive-data.wat b/cranelift/wasmtests/passive-data.wat new file mode 100644 index 0000000000..9316cd0f59 --- /dev/null +++ b/cranelift/wasmtests/passive-data.wat @@ -0,0 +1,11 @@ +(module + (data $passive "this is a passive data segment") + + (func (export "init") (param i32 i32 i32) + local.get 0 ;; dst + local.get 1 ;; src + local.get 2 ;; cnt + memory.init $passive) + + (func (export "drop") + data.drop $passive)) diff --git a/cranelift/wasmtests/ref-func-0.wat b/cranelift/wasmtests/ref-func-0.wat new file mode 100644 index 0000000000..5a6a6a3b68 --- /dev/null +++ b/cranelift/wasmtests/ref-func-0.wat @@ -0,0 +1,12 @@ +(module + (func $imported (import "env" "f") (param i32) (result i32)) + (func $local (result anyref anyref funcref funcref) + global.get 0 + global.get 1 + global.get 2 + global.get 3) + + (global (export "anyref-imported") anyref (ref.func $imported)) + (global (export "anyref-local") anyref (ref.func $local)) + (global (export "funcref-imported") funcref (ref.func $imported)) + (global (export "funcref-local") funcref (ref.func $local))) diff --git a/cranelift/wasmtests/rust_fannkuch.wat b/cranelift/wasmtests/rust_fannkuch.wat new file mode 100644 index 0000000000..cd47f661c8 --- /dev/null +++ b/cranelift/wasmtests/rust_fannkuch.wat @@ -0,0 +1,2511 @@ +(module + (type $0 (func (param i32 i32 i32) (result i32))) + (type $1 (func (param i32 i32) (result i32))) + (type $2 (func (param i32))) + (type $3 (func (param i32) (result i32))) + (type $4 (func (param i32 i32))) + (type $5 (func (param i64 i32) (result i32))) + (type $6 (func (param i32) (result i64))) + (type $7 (func)) + (type $8 (func (param i32 i32))) + (type $9 (func (param i32 i32 i32) (result i32))) + (memory $0 17) + (data (i32.const 1048576) "src/lib.rs\00\00\00\00\00\00attempt to divide by zero\00\00\00\00\00\00\00attempt to divide with overflow\00index out of bounds: the len is but the index is 00010203040506070809101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899called `Option::unwrap()` on a `None` valuesrc/libcore/option.rssrc/lib.rs") + (data (i32.const 1048982) "\10\00\n\00\00\00%\00\00\00\1d\00\00\00\10\00\10\00\19\00\00\00\00\00\10\00\n\00\00\00&\00\00\00\15\00\00\000\00\10\00\1f\00\00\00\00\00\10\00\n\00\00\00&\00\00\00\15\00\00\00\00\00\10\00\n\00\00\00.\00\00\00\15\00\00\00\00\00\10\00\n\00\00\000\00\00\00\15\00\00\00\00\00\10\00\n\00\00\00-\00\00\00\11\00\00\00\00\00\10\00\n\00\00\00E\00\00\00\17\00\00\00\00\00\10\00\n\00\00\00q\00\00\00\"\00\00\00\00\00\10\00\n\00\00\00s\00\00\00\11\00\00\00P\00\10\00 \00\00\00p\00\10\00\12\00\00\00\02\00\00\00\00\00\00\00\01\00\00\00\03\00\00\00J\01\10\00+\00\00\00u\01\10\00\15\00\00\00Y\01\00\00\15\00\00\00\8a\01\10\00\n\00\00\00\08\00\00\00\t\00\00\00\8a\01\10\00\n\00\00\00\n\00\00\00\14") + (table $0 4 4 funcref) + (elem (i32.const 1) $4 $7 $8) + (global $global$0 (mut i32) (i32.const 1048576)) + (global $global$1 i32 (i32.const 1049244)) + (global $global$2 i32 (i32.const 1049244)) + (export "memory" (memory $0)) + (export "__heap_base" (global $global$1)) + (export "__data_end" (global $global$2)) + (export "run_fannkuch" (func $10)) + (func $0 (; 0 ;) (type $7) + (local $0 i32) + (local $1 i32) + (local.set $0 + (i32.const 1) + ) + (block $label$1 + (block $label$2 + (block $label$3 + (if + (i32.eq + (i32.load + (i32.const 1049232) + ) + (i32.const 1) + ) + (block + (i32.store + (i32.const 1049236) + (local.tee $0 + (i32.add + (i32.load + (i32.const 1049236) + ) + (i32.const 1) + ) + ) + ) + (br_if $label$3 + (i32.lt_u + (local.get $0) + (i32.const 3) + ) + ) + (br $label$2) + ) + ) + (i64.store + (i32.const 1049232) + (i64.const 4294967297) + ) + ) + (br_if $label$2 + (i32.le_s + (local.tee $1 + (i32.load + (i32.const 1049240) + ) + ) + (i32.const -1) + ) + ) + (i32.store + (i32.const 1049240) + (local.get $1) + ) + (br_if $label$1 + (i32.lt_u + (local.get $0) + (i32.const 2) + ) + ) + ) + (unreachable) + ) + (unreachable) + ) + (func $1 (; 1 ;) (type $2) (param $0 i32) + (local $1 i32) + (global.set $global$0 + (local.tee $1 + (i32.sub + (global.get $global$0) + (i32.const 16) + ) + ) + ) + (if + (i32.eqz + (i32.load offset=8 + (local.get $0) + ) + ) + (block + (call $2 + (i32.const 1049172) + ) + (unreachable) + ) + ) + (i64.store offset=8 + (local.get $1) + (i64.load align=4 + (i32.add + (local.get $0) + (i32.const 20) + ) + ) + ) + (i64.store + (local.get $1) + (i64.load offset=12 align=4 + (local.get $0) + ) + ) + (call $0) + (unreachable) + ) + (func $2 (; 2 ;) (type $2) (param $0 i32) + (local $1 i32) + (local $2 i64) + (local $3 i64) + (local $4 i64) + (global.set $global$0 + (local.tee $1 + (i32.sub + (global.get $global$0) + (i32.const 48) + ) + ) + ) + (local.set $2 + (i64.load offset=8 align=4 + (local.get $0) + ) + ) + (local.set $3 + (i64.load offset=16 align=4 + (local.get $0) + ) + ) + (local.set $4 + (i64.load align=4 + (local.get $0) + ) + ) + (i32.store + (i32.add + (local.get $1) + (i32.const 20) + ) + (i32.const 0) + ) + (i64.store offset=24 + (local.get $1) + (local.get $4) + ) + (i32.store offset=16 + (local.get $1) + (i32.const 1048656) + ) + (i64.store offset=4 align=4 + (local.get $1) + (i64.const 1) + ) + (i32.store + (local.get $1) + (i32.add + (local.get $1) + (i32.const 24) + ) + ) + (i64.store offset=40 + (local.get $1) + (local.get $3) + ) + (i64.store offset=32 + (local.get $1) + (local.get $2) + ) + (call $5 + (local.get $1) + (i32.add + (local.get $1) + (i32.const 32) + ) + ) + (unreachable) + ) + (func $3 (; 3 ;) (type $8) (param $0 i32) (param $1 i32) + (local $2 i32) + (global.set $global$0 + (local.tee $2 + (i32.sub + (global.get $global$0) + (i32.const 48) + ) + ) + ) + (i32.store offset=4 + (local.get $2) + (i32.const 16) + ) + (i32.store + (local.get $2) + (local.get $1) + ) + (i32.store + (i32.add + (local.get $2) + (i32.const 44) + ) + (i32.const 1) + ) + (i32.store + (i32.add + (local.get $2) + (i32.const 28) + ) + (i32.const 2) + ) + (i32.store offset=36 + (local.get $2) + (i32.const 1) + ) + (i64.store offset=12 align=4 + (local.get $2) + (i64.const 2) + ) + (i32.store offset=8 + (local.get $2) + (i32.const 1049140) + ) + (i32.store offset=40 + (local.get $2) + (local.get $2) + ) + (i32.store offset=32 + (local.get $2) + (i32.add + (local.get $2) + (i32.const 4) + ) + ) + (i32.store offset=24 + (local.get $2) + (i32.add + (local.get $2) + (i32.const 32) + ) + ) + (call $5 + (i32.add + (local.get $2) + (i32.const 8) + ) + (local.get $0) + ) + (unreachable) + ) + (func $4 (; 4 ;) (type $1) (param $0 i32) (param $1 i32) (result i32) + (call $6 + (i64.load32_u + (local.get $0) + ) + (local.get $1) + ) + ) + (func $5 (; 5 ;) (type $4) (param $0 i32) (param $1 i32) + (local $2 i32) + (local $3 i64) + (global.set $global$0 + (local.tee $2 + (i32.sub + (global.get $global$0) + (i32.const 32) + ) + ) + ) + (local.set $3 + (i64.load align=4 + (local.get $1) + ) + ) + (i64.store align=4 + (i32.add + (local.get $2) + (i32.const 20) + ) + (i64.load offset=8 align=4 + (local.get $1) + ) + ) + (i64.store offset=12 align=4 + (local.get $2) + (local.get $3) + ) + (i32.store offset=8 + (local.get $2) + (local.get $0) + ) + (i32.store offset=4 + (local.get $2) + (i32.const 1049156) + ) + (i32.store + (local.get $2) + (i32.const 1048656) + ) + (call $1 + (local.get $2) + ) + (unreachable) + ) + (func $6 (; 6 ;) (type $5) (param $0 i64) (param $1 i32) (result i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i64) + (local $14 i32) + (local $15 i32) + (global.set $global$0 + (local.tee $6 + (i32.sub + (global.get $global$0) + (i32.const 48) + ) + ) + ) + (local.set $2 + (i32.const 39) + ) + (block $label$1 + (block $label$2 + (if + (i64.ge_u + (local.get $0) + (i64.const 10000) + ) + (block + (loop $label$4 + (i32.store16 align=1 + (i32.add + (local.tee $3 + (i32.add + (i32.add + (local.get $6) + (i32.const 9) + ) + (local.get $2) + ) + ) + (i32.const -4) + ) + (i32.load16_u align=1 + (i32.add + (i32.shl + (local.tee $5 + (i32.div_u + (local.tee $4 + (i32.wrap_i64 + (i64.add + (local.get $0) + (i64.mul + (local.tee $13 + (i64.div_u + (local.get $0) + (i64.const 10000) + ) + ) + (i64.const -10000) + ) + ) + ) + ) + (i32.const 100) + ) + ) + (i32.const 1) + ) + (i32.const 1048706) + ) + ) + ) + (i32.store16 align=1 + (i32.add + (local.get $3) + (i32.const -2) + ) + (i32.load16_u align=1 + (i32.add + (i32.shl + (i32.add + (i32.mul + (local.get $5) + (i32.const -100) + ) + (local.get $4) + ) + (i32.const 1) + ) + (i32.const 1048706) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const -4) + ) + ) + (br_if $label$4 + (block (result i32) + (local.set $14 + (i64.gt_u + (local.get $0) + (i64.const 99999999) + ) + ) + (local.set $0 + (local.get $13) + ) + (local.get $14) + ) + ) + ) + (br_if $label$1 + (i32.le_s + (local.tee $3 + (i32.wrap_i64 + (local.get $13) + ) + ) + (i32.const 99) + ) + ) + (br $label$2) + ) + ) + (br_if $label$1 + (i32.le_s + (local.tee $3 + (i32.wrap_i64 + (local.tee $13 + (local.get $0) + ) + ) + ) + (i32.const 99) + ) + ) + ) + (i32.store16 align=1 + (i32.add + (local.tee $2 + (i32.add + (local.get $2) + (i32.const -2) + ) + ) + (i32.add + (local.get $6) + (i32.const 9) + ) + ) + (i32.load16_u align=1 + (i32.add + (i32.shl + (i32.and + (i32.add + (i32.mul + (local.tee $3 + (i32.div_u + (i32.and + (local.tee $4 + (i32.wrap_i64 + (local.get $13) + ) + ) + (i32.const 65535) + ) + (i32.const 100) + ) + ) + (i32.const -100) + ) + (local.get $4) + ) + (i32.const 65535) + ) + (i32.const 1) + ) + (i32.const 1048706) + ) + ) + ) + ) + (block $label$5 + (if + (i32.le_s + (local.get $3) + (i32.const 9) + ) + (block + (i32.store8 + (i32.add + (local.tee $2 + (i32.add + (local.get $2) + (i32.const -1) + ) + ) + (i32.add + (local.get $6) + (i32.const 9) + ) + ) + (i32.add + (local.get $3) + (i32.const 48) + ) + ) + (br $label$5) + ) + ) + (i32.store16 align=1 + (i32.add + (local.tee $2 + (i32.add + (local.get $2) + (i32.const -2) + ) + ) + (i32.add + (local.get $6) + (i32.const 9) + ) + ) + (i32.load16_u align=1 + (i32.add + (i32.shl + (local.get $3) + (i32.const 1) + ) + (i32.const 1048706) + ) + ) + ) + ) + (local.set $7 + (i32.sub + (i32.const 39) + (local.get $2) + ) + ) + (local.set $3 + (i32.const 1) + ) + (local.set $8 + (select + (i32.const 43) + (i32.const 1114112) + (local.tee $11 + (i32.and + (local.tee $4 + (i32.load + (local.get $1) + ) + ) + (i32.const 1) + ) + ) + ) + ) + (local.set $9 + (i32.and + (i32.shr_s + (i32.shl + (local.get $4) + (i32.const 29) + ) + (i32.const 31) + ) + (i32.const 1048656) + ) + ) + (local.set $10 + (i32.add + (i32.add + (local.get $6) + (i32.const 9) + ) + (local.get $2) + ) + ) + (block $label$7 + (block $label$8 + (block $label$9 + (block $label$10 + (block $label$11 + (block $label$12 + (block $label$13 + (block $label$14 + (local.set $3 + (block $label$15 (result i32) + (block $label$16 + (block $label$17 + (block $label$18 + (block $label$19 + (if + (i32.eq + (i32.load offset=8 + (local.get $1) + ) + (i32.const 1) + ) + (block + (br_if $label$19 + (i32.le_u + (local.tee $5 + (i32.load + (i32.add + (local.get $1) + (i32.const 12) + ) + ) + ) + (local.tee $2 + (i32.add + (local.get $7) + (local.get $11) + ) + ) + ) + ) + (br_if $label$18 + (i32.and + (local.get $4) + (i32.const 8) + ) + ) + (local.set $4 + (i32.sub + (local.get $5) + (local.get $2) + ) + ) + (br_if $label$17 + (i32.eqz + (i32.and + (local.tee $3 + (select + (i32.const 1) + (local.tee $3 + (i32.load8_u offset=48 + (local.get $1) + ) + ) + (i32.eq + (local.get $3) + (i32.const 3) + ) + ) + ) + (i32.const 3) + ) + ) + ) + (br_if $label$16 + (i32.eq + (local.get $3) + (i32.const 2) + ) + ) + (local.set $5 + (i32.const 0) + ) + (br $label$15 + (local.get $4) + ) + ) + ) + (br_if $label$9 + (call $9 + (local.get $1) + (local.get $8) + (local.get $9) + ) + ) + (br $label$8) + ) + (br_if $label$9 + (call $9 + (local.get $1) + (local.get $8) + (local.get $9) + ) + ) + (br $label$8) + ) + (i32.store8 offset=48 + (local.get $1) + (i32.const 1) + ) + (i32.store offset=4 + (local.get $1) + (i32.const 48) + ) + (br_if $label$9 + (call $9 + (local.get $1) + (local.get $8) + (local.get $9) + ) + ) + (local.set $3 + (i32.sub + (local.get $5) + (local.get $2) + ) + ) + (br_if $label$14 + (i32.eqz + (i32.and + (local.tee $4 + (select + (i32.const 1) + (local.tee $4 + (i32.load8_u + (i32.add + (local.get $1) + (i32.const 48) + ) + ) + ) + (i32.eq + (local.get $4) + (i32.const 3) + ) + ) + ) + (i32.const 3) + ) + ) + ) + (br_if $label$13 + (i32.eq + (local.get $4) + (i32.const 2) + ) + ) + (local.set $4 + (i32.const 0) + ) + (br $label$12) + ) + (local.set $5 + (local.get $4) + ) + (br $label$15 + (i32.const 0) + ) + ) + (local.set $5 + (i32.shr_u + (i32.add + (local.get $4) + (i32.const 1) + ) + (i32.const 1) + ) + ) + (i32.shr_u + (local.get $4) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.const -1) + ) + (local.set $4 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.set $11 + (i32.add + (local.get $1) + (i32.const 24) + ) + ) + (local.set $12 + (i32.add + (local.get $1) + (i32.const 28) + ) + ) + (block $label$21 + (loop $label$22 + (br_if $label$21 + (i32.ge_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $3) + ) + ) + (br_if $label$22 + (i32.eqz + (call_indirect (type $1) + (i32.load + (local.get $11) + ) + (i32.load + (local.get $4) + ) + (i32.load offset=16 + (i32.load + (local.get $12) + ) + ) + ) + ) + ) + ) + (br $label$7) + ) + (local.set $4 + (i32.load + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + ) + (local.set $3 + (i32.const 1) + ) + (br_if $label$9 + (call $9 + (local.get $1) + (local.get $8) + (local.get $9) + ) + ) + (br_if $label$9 + (call_indirect (type $0) + (i32.load + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 24) + ) + ) + ) + (local.get $10) + (local.get $7) + (i32.load offset=12 + (i32.load + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 28) + ) + ) + ) + ) + ) + ) + (local.set $7 + (i32.load + (local.get $2) + ) + ) + (local.set $2 + (i32.const -1) + ) + (local.set $1 + (i32.add + (i32.load + (local.get $1) + ) + (i32.const 16) + ) + ) + (loop $label$23 + (br_if $label$11 + (i32.ge_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $5) + ) + ) + (br_if $label$23 + (i32.eqz + (call_indirect (type $1) + (local.get $7) + (local.get $4) + (i32.load + (local.get $1) + ) + ) + ) + ) + ) + (br $label$9) + ) + (local.set $4 + (local.get $3) + ) + (local.set $3 + (i32.const 0) + ) + (br $label$12) + ) + (local.set $4 + (i32.shr_u + (i32.add + (local.get $3) + (i32.const 1) + ) + (i32.const 1) + ) + ) + (local.set $3 + (i32.shr_u + (local.get $3) + (i32.const 1) + ) + ) + ) + (local.set $2 + (i32.const -1) + ) + (local.set $5 + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 24) + ) + ) + (local.set $9 + (i32.add + (local.get $1) + (i32.const 28) + ) + ) + (block $label$24 + (loop $label$25 + (br_if $label$24 + (i32.ge_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $3) + ) + ) + (br_if $label$25 + (i32.eqz + (call_indirect (type $1) + (i32.load + (local.get $8) + ) + (i32.load + (local.get $5) + ) + (i32.load offset=16 + (i32.load + (local.get $9) + ) + ) + ) + ) + ) + ) + (br $label$7) + ) + (local.set $5 + (i32.load + (i32.add + (local.get $1) + (i32.const 4) + ) + ) + ) + (local.set $3 + (i32.const 1) + ) + (br_if $label$9 + (call_indirect (type $0) + (i32.load + (local.tee $2 + (i32.add + (local.get $1) + (i32.const 24) + ) + ) + ) + (local.get $10) + (local.get $7) + (i32.load offset=12 + (i32.load + (local.tee $1 + (i32.add + (local.get $1) + (i32.const 28) + ) + ) + ) + ) + ) + ) + (local.set $7 + (i32.load + (local.get $2) + ) + ) + (local.set $2 + (i32.const -1) + ) + (local.set $1 + (i32.add + (i32.load + (local.get $1) + ) + (i32.const 16) + ) + ) + (loop $label$26 + (br_if $label$10 + (i32.ge_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $4) + ) + ) + (br_if $label$26 + (i32.eqz + (call_indirect (type $1) + (local.get $7) + (local.get $5) + (i32.load + (local.get $1) + ) + ) + ) + ) + ) + (br $label$9) + ) + (global.set $global$0 + (i32.add + (local.get $6) + (i32.const 48) + ) + ) + (return + (i32.const 0) + ) + ) + (local.set $3 + (i32.const 0) + ) + ) + (global.set $global$0 + (i32.add + (local.get $6) + (i32.const 48) + ) + ) + (return + (local.get $3) + ) + ) + (return + (block (result i32) + (local.set $15 + (call_indirect (type $0) + (i32.load offset=24 + (local.get $1) + ) + (local.get $10) + (local.get $7) + (i32.load offset=12 + (i32.load + (i32.add + (local.get $1) + (i32.const 28) + ) + ) + ) + ) + ) + (global.set $global$0 + (i32.add + (local.get $6) + (i32.const 48) + ) + ) + (local.get $15) + ) + ) + ) + (global.set $global$0 + (i32.add + (local.get $6) + (i32.const 48) + ) + ) + (i32.const 1) + ) + (func $7 (; 7 ;) (type $2) (param $0 i32) + (nop) + ) + (func $8 (; 8 ;) (type $6) (param $0 i32) (result i64) + (i64.const -2357177763932378009) + ) + (func $9 (; 9 ;) (type $9) (param $0 i32) (param $1 i32) (param $2 i32) (result i32) + (block $label$1 + (return + (block $label$2 (result i32) + (if + (i32.ne + (local.get $1) + (i32.const 1114112) + ) + (drop + (br_if $label$2 + (i32.const 1) + (call_indirect (type $1) + (i32.load offset=24 + (local.get $0) + ) + (local.get $1) + (i32.load offset=16 + (i32.load + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + ) + ) + ) + ) + ) + (br_if $label$1 + (i32.eqz + (local.get $2) + ) + ) + (call_indirect (type $0) + (i32.load offset=24 + (local.get $0) + ) + (local.get $2) + (i32.const 0) + (i32.load offset=12 + (i32.load + (i32.add + (local.get $0) + (i32.const 28) + ) + ) + ) + ) + ) + ) + ) + (i32.const 0) + ) + (func $10 (; 10 ;) (type $3) (param $0 i32) (result i32) + (local $1 i32) + (local $2 i32) + (local $3 i32) + (local $4 i32) + (local $5 i32) + (local $6 i32) + (local $7 i32) + (local $8 i32) + (local $9 i32) + (local $10 i32) + (local $11 i32) + (local $12 i32) + (local $13 i32) + (local $14 i32) + (local $15 i32) + (local $16 i32) + (local $17 i32) + (local $18 i32) + (local $19 i32) + (local $20 i32) + (local $21 i32) + (local $22 i32) + (local $23 i32) + (local $24 i32) + (local $25 i32) + (local $26 i32) + (local $27 i32) + (local $28 i32) + (local $29 i32) + (local $30 i32) + (local $31 i32) + (local $32 i32) + (local $33 i32) + (local $34 i32) + (local $35 i32) + (local $36 i32) + (local $37 i32) + (local $38 i32) + (local $39 i32) + (local $40 i32) + (local $41 i32) + (local $42 i32) + (local $43 i32) + (local $44 i32) + (local $45 i32) + (local $46 i32) + (global.set $global$0 + (local.tee $1 + (i32.sub + (global.get $global$0) + (i32.const 256) + ) + ) + ) + (i64.store offset=56 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store offset=48 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store offset=40 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store offset=32 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store offset=24 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store offset=16 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store offset=8 align=4 + (local.get $1) + (i64.const 4294967297) + ) + (i64.store align=4 + (local.get $1) + (i64.const 4294967297) + ) + (block $label$1 + (if + (i32.ge_u + (local.tee $11 + (i32.add + (local.get $0) + (i32.const 1) + ) + ) + (i32.const 2) + ) + (block + (local.set $3 + (local.get $1) + ) + (local.set $2 + (i32.const 1) + ) + (loop $label$3 + (br_if $label$1 + (i32.ge_u + (local.get $2) + (i32.const 16) + ) + ) + (i32.store + (local.tee $4 + (i32.add + (local.get $3) + (i32.const 4) + ) + ) + (i32.mul + (i32.load + (local.get $3) + ) + (local.get $2) + ) + ) + (local.set $3 + (local.get $4) + ) + (local.set $2 + (local.tee $4 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + ) + (br_if $label$3 + (i32.lt_u + (local.get $4) + (local.get $11) + ) + ) + ) + ) + ) + (if + (i32.lt_u + (local.get $0) + (i32.const 16) + ) + (block + (local.set $20 + (i32.const 1) + ) + (local.set $21 + (local.tee $9 + (i32.load + (i32.add + (local.get $1) + (i32.shl + (local.get $0) + (i32.const 2) + ) + ) + ) + ) + ) + (if + (i32.ge_u + (local.get $9) + (i32.const 24) + ) + (local.set $20 + (select + (i32.const 24) + (i32.const 25) + (i32.eq + (local.get $9) + (i32.mul + (local.tee $21 + (i32.div_u + (local.get $9) + (i32.const 24) + ) + ) + (i32.const 24) + ) + ) + ) + ) + ) + (local.set $40 + (i32.sub + (i32.const 0) + (local.get $0) + ) + ) + (local.set $12 + (i32.add + (local.get $1) + (i32.const 196) + ) + ) + (local.set $41 + (i32.add + (local.get $1) + (i32.const 132) + ) + ) + (local.set $42 + (i32.add + (local.get $1) + (i32.const 124) + ) + ) + (local.set $11 + (i32.add + (local.get $1) + (i32.const 68) + ) + ) + (local.set $43 + (i32.lt_u + (local.get $0) + (i32.const 2) + ) + ) + (loop $label$6 + (i64.store + (i32.add + (local.get $1) + (i32.const 120) + ) + (i64.const 0) + ) + (i64.store + (i32.add + (local.get $1) + (i32.const 112) + ) + (i64.const 0) + ) + (i64.store + (i32.add + (local.get $1) + (i32.const 104) + ) + (i64.const 0) + ) + (i64.store + (i32.add + (local.get $1) + (i32.const 96) + ) + (i64.const 0) + ) + (i64.store + (i32.add + (local.get $1) + (i32.const 88) + ) + (i64.const 0) + ) + (i64.store + (i32.add + (local.get $1) + (i32.const 80) + ) + (i64.const 0) + ) + (i64.store + (i32.add + (local.get $1) + (i32.const 72) + ) + (i64.const 0) + ) + (i64.store offset=64 + (local.get $1) + (i64.const 0) + ) + (i64.store + (local.tee $26 + (i32.add + (local.get $1) + (i32.const 184) + ) + ) + (i64.const 0) + ) + (i64.store + (local.tee $27 + (i32.add + (local.get $1) + (i32.const 176) + ) + ) + (i64.const 0) + ) + (i64.store + (local.tee $28 + (i32.add + (local.get $1) + (i32.const 168) + ) + ) + (i64.const 0) + ) + (i64.store + (local.tee $29 + (i32.add + (local.get $1) + (i32.const 160) + ) + ) + (i64.const 0) + ) + (i64.store + (local.tee $30 + (i32.add + (local.get $1) + (i32.const 152) + ) + ) + (i64.const 0) + ) + (i64.store + (local.tee $31 + (i32.add + (local.get $1) + (i32.const 144) + ) + ) + (i64.const 0) + ) + (i64.store + (local.tee $32 + (i32.add + (local.get $1) + (i32.const 136) + ) + ) + (i64.const 0) + ) + (i64.store offset=128 + (local.get $1) + (i64.const 0) + ) + (i64.store align=4 + (local.tee $33 + (i32.add + (local.get $1) + (i32.const 248) + ) + ) + (i64.const 64424509454) + ) + (i64.store align=4 + (local.tee $34 + (i32.add + (local.get $1) + (i32.const 240) + ) + ) + (i64.const 55834574860) + ) + (i64.store align=4 + (local.tee $35 + (i32.add + (local.get $1) + (i32.const 232) + ) + ) + (i64.const 47244640266) + ) + (i64.store align=4 + (local.tee $36 + (i32.add + (local.get $1) + (i32.const 224) + ) + ) + (i64.const 38654705672) + ) + (i64.store align=4 + (local.tee $37 + (i32.add + (local.get $1) + (i32.const 216) + ) + ) + (i64.const 30064771078) + ) + (i64.store align=4 + (local.tee $38 + (i32.add + (local.get $1) + (i32.const 208) + ) + ) + (i64.const 21474836484) + ) + (i64.store align=4 + (local.tee $39 + (i32.add + (local.get $1) + (i32.const 200) + ) + ) + (i64.const 12884901890) + ) + (i64.store offset=192 align=4 + (local.get $1) + (i64.const 4294967296) + ) + (local.set $7 + (i32.mul + (local.get $13) + (local.get $21) + ) + ) + (local.set $2 + (block $label$7 (result i32) + (block $label$8 + (if + (i32.eqz + (local.get $43) + ) + (block + (local.set $23 + (local.get $40) + ) + (local.set $14 + (local.get $7) + ) + (local.set $15 + (local.get $0) + ) + (local.set $5 + (i32.const 0) + ) + (br $label$8) + ) + ) + (br $label$7 + (i32.const 0) + ) + ) + (i32.const 1) + ) + ) + (loop $label$10 + (block $label$11 + (block $label$12 + (local.set $2 + (block $label$13 (result i32) + (block $label$14 + (block $label$15 + (block $label$16 + (block $label$17 + (block $label$18 + (block $label$19 + (if + (i32.eqz + (local.get $2) + ) + (block + (local.set $13 + (i32.add + (local.get $13) + (i32.const 1) + ) + ) + (local.set $44 + (i32.add + (select + (local.get $9) + (local.tee $3 + (i32.add + (local.get $7) + (local.get $21) + ) + ) + (i32.gt_u + (local.get $3) + (local.get $9) + ) + ) + (i32.const -1) + ) + ) + (local.set $24 + (i32.const 0) + ) + (br_if $label$19 + (i32.ge_s + (local.tee $6 + (i32.load offset=192 + (local.get $1) + ) + ) + (i32.const 1) + ) + ) + (br $label$18) + ) + ) + (block $label$21 + (block $label$22 + (block $label$23 + (block $label$24 + (block $label$25 + (block $label$26 + (block $label$27 + (block $label$28 + (block $label$29 + (block $label$30 + (block $label$31 + (br_table $label$31 $label$30 $label$29 + (local.get $5) + ) + ) + (br_if $label$24 + (i32.ge_u + (local.tee $4 + (i32.add + (local.get $15) + (i32.const -1) + ) + ) + (i32.const 16) + ) + ) + (br_if $label$23 + (i32.eqz + (local.tee $3 + (i32.load + (i32.add + (local.get $1) + (local.tee $2 + (i32.shl + (local.get $4) + (i32.const 2) + ) + ) + ) + ) + ) + ) + ) + (if + (i32.eq + (local.get $14) + (i32.const -2147483648) + ) + (br_if $label$22 + (i32.eq + (local.get $3) + (i32.const -1) + ) + ) + ) + (i32.store + (i32.add + (i32.sub + (local.get $1) + (i32.const -64) + ) + (local.get $2) + ) + (local.tee $16 + (i32.div_s + (local.get $14) + (local.get $3) + ) + ) + ) + (i64.store + (local.get $32) + (i64.load align=4 + (local.get $39) + ) + ) + (i64.store + (local.get $31) + (i64.load align=4 + (local.get $38) + ) + ) + (i64.store + (local.get $30) + (i64.load align=4 + (local.get $37) + ) + ) + (i64.store + (local.get $29) + (i64.load align=4 + (local.get $36) + ) + ) + (i64.store + (local.get $28) + (i64.load align=4 + (local.get $35) + ) + ) + (i64.store + (local.get $27) + (i64.load align=4 + (local.get $34) + ) + ) + (i64.store + (local.get $26) + (i64.load align=4 + (local.get $33) + ) + ) + (i64.store offset=128 + (local.get $1) + (i64.load offset=192 align=4 + (local.get $1) + ) + ) + (local.set $45 + (i32.add + (local.get $16) + (local.get $23) + ) + ) + (local.set $14 + (i32.sub + (local.get $14) + (i32.mul + (local.get $3) + (local.get $16) + ) + ) + ) + (local.set $2 + (i32.const 0) + ) + (local.set $8 + (i32.add + (local.get $1) + (i32.const 192) + ) + ) + (loop $label$33 + (block $label$34 + (if + (i32.gt_u + (local.tee $3 + (i32.add + (local.get $2) + (local.get $16) + ) + ) + (local.get $4) + ) + (block + (br_if $label$27 + (i32.gt_u + (local.tee $46 + (i32.add + (local.get $2) + (local.get $45) + ) + ) + (i32.const 15) + ) + ) + (local.set $3 + (i32.sub + (local.get $3) + (local.get $15) + ) + ) + (br_if $label$34 + (i32.le_u + (local.get $2) + (i32.const 15) + ) + ) + (br $label$28) + ) + ) + (br_if $label$26 + (i32.ge_u + (local.get $3) + (i32.const 16) + ) + ) + (br_if $label$28 + (i32.gt_u + (local.get $2) + (i32.const 15) + ) + ) + ) + (i32.store + (local.get $8) + (i32.load + (i32.add + (i32.add + (local.get $1) + (i32.const 128) + ) + (i32.shl + (local.get $3) + (i32.const 2) + ) + ) + ) + ) + (local.set $8 + (i32.add + (local.get $8) + (i32.const 4) + ) + ) + (br_if $label$33 + (i32.lt_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $15) + ) + ) + ) + (local.set $23 + (i32.add + (local.get $23) + (i32.const 1) + ) + ) + (br_if $label$21 + (i32.gt_u + (local.tee $15 + (local.get $4) + ) + (i32.const 1) + ) + ) + (local.set $2 + (i32.const 0) + ) + (br $label$10) + ) + (i64.store + (local.get $26) + (i64.load align=4 + (local.get $33) + ) + ) + (i64.store + (local.get $27) + (i64.load align=4 + (local.get $34) + ) + ) + (i64.store + (local.get $28) + (i64.load align=4 + (local.get $35) + ) + ) + (i64.store + (local.get $29) + (i64.load align=4 + (local.get $36) + ) + ) + (i64.store + (local.get $30) + (i64.load align=4 + (local.get $37) + ) + ) + (i64.store + (local.get $31) + (i64.load align=4 + (local.get $38) + ) + ) + (i64.store + (local.get $32) + (i64.load align=4 + (local.get $39) + ) + ) + (i64.store offset=128 + (local.get $1) + (i64.load offset=192 align=4 + (local.get $1) + ) + ) + (br_if $label$25 + (i32.gt_u + (local.get $6) + (i32.const 15) + ) + ) + (local.set $17 + (i32.const 1) + ) + (local.set $10 + (local.get $6) + ) + (br $label$13 + (i32.const 0) + ) + ) + (if + (i32.lt_u + (local.get $7) + (local.get $44) + ) + (block + (local.set $25 + (i32.load + (local.get $12) + ) + ) + (i32.store + (local.get $12) + (local.get $6) + ) + (i32.store offset=192 + (local.get $1) + (local.get $25) + ) + (local.set $18 + (local.get $11) + ) + (br_if $label$11 + (i32.lt_s + (local.tee $2 + (i32.load offset=68 + (local.get $1) + ) + ) + (i32.const 1) + ) + ) + (local.set $19 + (i32.const 1) + ) + (br $label$14) + ) + ) + (local.set $22 + (i32.add + (local.get $22) + (local.get $24) + ) + ) + (br_if $label$6 + (i32.lt_u + (local.get $13) + (local.get $20) + ) + ) + (global.set $global$0 + (i32.add + (local.get $1) + (i32.const 256) + ) + ) + (return + (local.get $22) + ) + ) + (call $3 + (i32.const 1049076) + (local.get $2) + ) + (unreachable) + ) + (call $3 + (i32.const 1049060) + (local.get $46) + ) + (unreachable) + ) + (call $3 + (i32.const 1049044) + (i32.add + (local.get $2) + (local.get $16) + ) + ) + (unreachable) + ) + (local.set $10 + (local.get $6) + ) + (br $label$12) + ) + (call $3 + (i32.const 1048980) + (local.get $4) + ) + (unreachable) + ) + (call $2 + (i32.const 1048996) + ) + (unreachable) + ) + (call $2 + (i32.const 1049020) + ) + (unreachable) + ) + (local.set $5 + (i32.const 0) + ) + (br $label$17) + ) + (local.set $5 + (i32.const 1) + ) + (br $label$16) + ) + (local.set $5 + (i32.const 2) + ) + (br $label$15) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$10) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$10) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$10) + ) + (i32.const 1) + ) + ) + (loop $label$37 + (block $label$38 + (block $label$39 + (if + (i32.eqz + (local.get $2) + ) + (block + (if + (local.tee $10 + (i32.load + (local.tee $5 + (i32.add + (local.tee $4 + (i32.shl + (local.tee $3 + (local.get $10) + ) + (i32.const 2) + ) + ) + (i32.add + (local.get $1) + (i32.const 128) + ) + ) + ) + ) + ) + (block + (i32.store + (local.get $5) + (local.get $3) + ) + (block $label$42 + (br_if $label$42 + (i32.lt_u + (local.get $3) + (i32.const 3) + ) + ) + (br_if $label$42 + (i32.eqz + (local.tee $8 + (i32.shr_u + (i32.add + (local.get $3) + (i32.const -1) + ) + (i32.const 1) + ) + ) + ) + ) + (local.set $2 + (i32.add + (local.get $4) + (local.get $42) + ) + ) + (local.set $3 + (local.get $41) + ) + (loop $label$43 + (local.set $4 + (i32.load + (local.get $3) + ) + ) + (i32.store + (local.get $3) + (i32.load + (local.get $2) + ) + ) + (i32.store + (local.get $2) + (local.get $4) + ) + (local.set $3 + (i32.add + (local.get $3) + (i32.const 4) + ) + ) + (local.set $2 + (i32.add + (local.get $2) + (i32.const -4) + ) + ) + (br_if $label$43 + (local.tee $8 + (i32.add + (local.get $8) + (i32.const -1) + ) + ) + ) + ) + ) + (local.set $17 + (i32.add + (local.get $17) + (i32.const 1) + ) + ) + (br_if $label$38 + (i32.lt_u + (local.get $10) + (i32.const 16) + ) + ) + (br $label$12) + ) + ) + (local.set $24 + (i32.add + (select + (i32.sub + (i32.const 0) + (local.get $17) + ) + (local.get $17) + (i32.and + (local.get $7) + (i32.const 1) + ) + ) + (local.get $24) + ) + ) + (local.set $5 + (i32.const 2) + ) + (br $label$39) + ) + ) + (local.set $2 + (i32.const 0) + ) + (i32.store + (local.get $18) + (i32.const 0) + ) + (i32.store offset=192 + (local.get $1) + (local.tee $4 + (local.get $6) + ) + ) + (local.set $5 + (i32.add + (local.get $19) + (i32.const 1) + ) + ) + (local.set $3 + (local.get $12) + ) + (block $label$44 + (block $label$45 + (loop $label$46 + (br_if $label$45 + (i32.ge_u + (i32.add + (local.get $2) + (i32.const 2) + ) + (i32.const 16) + ) + ) + (i32.store + (local.get $3) + (i32.load + (local.tee $3 + (i32.add + (local.get $3) + (i32.const 4) + ) + ) + ) + ) + (br_if $label$46 + (i32.lt_u + (local.tee $2 + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (local.get $19) + ) + ) + ) + (br_if $label$44 + (i32.ge_u + (local.get $5) + (i32.const 16) + ) + ) + (i32.store + (i32.add + (local.tee $3 + (i32.shl + (local.get $5) + (i32.const 2) + ) + ) + (i32.add + (local.get $1) + (i32.const 192) + ) + ) + (local.get $25) + ) + (br_if $label$11 + (i32.le_s + (local.tee $2 + (i32.load + (local.tee $18 + (i32.add + (i32.sub + (local.get $1) + (i32.const -64) + ) + (local.get $3) + ) + ) + ) + ) + (local.get $19) + ) + ) + (local.set $6 + (i32.load + (local.get $12) + ) + ) + (local.set $19 + (local.get $5) + ) + (local.set $25 + (local.get $4) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$37) + ) + (call $3 + (i32.const 1049108) + (i32.add + (local.get $2) + (i32.const 2) + ) + ) + (unreachable) + ) + (call $3 + (i32.const 1049124) + (local.get $5) + ) + (unreachable) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$10) + ) + (local.set $2 + (i32.const 0) + ) + (br $label$37) + ) + ) + (call $3 + (i32.const 1049092) + (local.get $10) + ) + (unreachable) + ) + (local.set $7 + (i32.add + (local.get $7) + (i32.const 1) + ) + ) + (i32.store + (local.get $18) + (i32.add + (local.get $2) + (i32.const 1) + ) + ) + (block $label$47 + (block $label$48 + (if + (i32.ge_s + (local.tee $6 + (i32.load offset=192 + (local.get $1) + ) + ) + (i32.const 1) + ) + (block + (local.set $5 + (i32.const 1) + ) + (br $label$48) + ) + ) + (local.set $5 + (i32.const 2) + ) + (br $label$47) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$10) + ) + (local.set $2 + (i32.const 1) + ) + (br $label$10) + ) + ) + ) + ) + (call $3 + (i32.const 1049212) + (local.get $0) + ) + (unreachable) + ) + (call $3 + (i32.const 1049196) + (local.get $2) + ) + (unreachable) + ) +) + diff --git a/cranelift/wasmtests/select.wat b/cranelift/wasmtests/select.wat new file mode 100644 index 0000000000..45ef241833 --- /dev/null +++ b/cranelift/wasmtests/select.wat @@ -0,0 +1,19 @@ +(module + (func $untyped-select (result i32) + i32.const 42 + i32.const 24 + i32.const 1 + select) + + (func $typed-select-1 (result anyref) + ref.null + ref.null + i32.const 1 + select (result anyref)) + + (func $typed-select-2 (param anyref) (result anyref) + ref.null + local.get 0 + i32.const 1 + select (result anyref)) +) diff --git a/cranelift/wasmtests/simd.wat b/cranelift/wasmtests/simd.wat new file mode 100644 index 0000000000..99b7d5c10d --- /dev/null +++ b/cranelift/wasmtests/simd.wat @@ -0,0 +1,23 @@ +(module + (func $test_splat (result i32) + i32.const 42 + i32x4.splat + i32x4.extract_lane 0 + ) + + (func $test_insert_lane (result i32) + v128.const i64x2 0 0 + i32.const 99 + i32x4.replace_lane 1 + i32x4.extract_lane 1 + ) + + (func $test_const (result i32) + v128.const i32x4 1 2 3 4 + i32x4.extract_lane 3 + ) + + (export "test_splat" (func $test_splat)) + (export "test_insert_lane" (func $test_insert_lane)) + (export "test_const" (func $test_const)) +) diff --git a/cranelift/wasmtests/table-copy.wat b/cranelift/wasmtests/table-copy.wat new file mode 100644 index 0000000000..dd9e1611e6 --- /dev/null +++ b/cranelift/wasmtests/table-copy.wat @@ -0,0 +1,22 @@ +(module $n + (table $t (import "m" "t") 6 funcref) + + (func $i (param i32 i32 i32 i32 i32 i32) (result i32) (local.get 3)) + (func $j (param i32 i32 i32 i32 i32 i32) (result i32) (local.get 4)) + (func $k (param i32 i32 i32 i32 i32 i32) (result i32) (local.get 5)) + + (table $u (export "u") funcref (elem $i $j $k $i $j $k)) + + (func (export "copy_to_t_from_u") (param i32 i32 i32 i32) (result i32) + local.get 0 + local.get 1 + local.get 2 + local.get 3 + table.copy $t $u) + + (func (export "copy_to_u_from_t") (param i32 i32 i32 i32) (result i32) + local.get 0 + local.get 1 + local.get 2 + local.get 3 + table.copy $u $t)) diff --git a/cranelift/wasmtests/unreachable_code.wat b/cranelift/wasmtests/unreachable_code.wat new file mode 100644 index 0000000000..38c1a315ce --- /dev/null +++ b/cranelift/wasmtests/unreachable_code.wat @@ -0,0 +1,77 @@ +(module + (type (;0;) (func (param i32 i64 f64) (result f64))) + (type (;1;) (func)) + (type (;2;) (func (result f32))) + (type (;3;) (func (result f64))) + (type (;4;) (func (param f64 f64) (result f64))) + (type (;5;) (func (result i32))) + (func (result i32) + block (result i32) + unreachable + end + block + end + i32.clz + ) + (func (result i32) + loop (result i32) + unreachable + end + block + end + i32.clz + ) + (func (;0;) (type 5) (result i32) + nop + block (result i32) ;; label = @1 + block ;; label = @2 + block ;; label = @3 + nop + block ;; label = @4 + i32.const 1 + if ;; label = @5 + nop + block ;; label = @6 + nop + nop + loop (result i32) ;; label = @7 + nop + block (result i32) ;; label = @8 + nop + nop + block (result i32) ;; label = @9 + nop + unreachable + end + end + end + block (result i32) ;; label = @7 + block ;; label = @8 + nop + end + i32.const 0 + end + br_if 5 (;@1;) + drop + end + else + nop + end + nop + end + end + end + unreachable + end) + (func + block (result i32) + block (result i32) + i32.const 1 + br 1 + end + end + drop + ) + (table (;0;) 16 anyfunc) + (elem (i32.const 0)) +) diff --git a/cranelift/wasmtests/use_fallthrough_return.wat b/cranelift/wasmtests/use_fallthrough_return.wat new file mode 100644 index 0000000000..44eab2b3f6 --- /dev/null +++ b/cranelift/wasmtests/use_fallthrough_return.wat @@ -0,0 +1,10 @@ +(module + (memory 1) + (func $main (param i32) + (if + (get_local 0) + (then (return)) + (else (unreachable)) + ) + ) +) diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index 66b2c38470..d90f9cd3cf 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -13,9 +13,9 @@ edition = "2018" [dependencies] anyhow = "1.0" -cranelift-codegen = { version = "0.59.0", features = ["enable-serde"] } -cranelift-entity = { version = "0.59.0", features = ["enable-serde"] } -cranelift-wasm = { version = "0.59.0", features = ["enable-serde"] } +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.59.0", features = ["enable-serde"] } +cranelift-entity = { path = "../../cranelift/entity", version = "0.59.0", features = ["enable-serde"] } +cranelift-wasm = { path = "../../cranelift/wasm", version = "0.59.0", features = ["enable-serde"] } wasmparser = "0.51.2" lightbeam = { path = "../lightbeam", optional = true, version = "0.12.0" } indexmap = "1.0.2" @@ -44,7 +44,7 @@ tempfile = "3" target-lexicon = { version = "0.10.0", default-features = false } pretty_env_logger = "0.3.0" rand = { version = "0.7.0", default-features = false, features = ["small_rng"] } -cranelift-codegen = { version = "0.59.0", features = ["enable-serde", "all-arch"] } +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.59.0", features = ["enable-serde", "all-arch"] } filetime = "0.2.7" lazy_static = "1.3.0" diff --git a/crates/jit/Cargo.toml b/crates/jit/Cargo.toml index f7c50e66f2..83c2373de9 100644 --- a/crates/jit/Cargo.toml +++ b/crates/jit/Cargo.toml @@ -11,11 +11,11 @@ readme = "README.md" edition = "2018" [dependencies] -cranelift-codegen = { version = "0.59.0", features = ["enable-serde"] } -cranelift-entity = { version = "0.59.0", features = ["enable-serde"] } -cranelift-wasm = { version = "0.59.0", features = ["enable-serde"] } -cranelift-native = "0.59.0" -cranelift-frontend = "0.59.0" +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.59.0", features = ["enable-serde"] } +cranelift-entity = { path = "../../cranelift/entity", version = "0.59.0", features = ["enable-serde"] } +cranelift-wasm = { path = "../../cranelift/wasm", version = "0.59.0", features = ["enable-serde"] } +cranelift-native = { path = "../../cranelift/native", version = "0.59.0" } +cranelift-frontend = { path = "../../cranelift/frontend", version = "0.59.0" } wasmtime-environ = { path = "../environ", version = "0.12.0" } wasmtime-runtime = { path = "../runtime", version = "0.12.0" } wasmtime-debug = { path = "../debug", version = "0.12.0" } diff --git a/crates/lightbeam/Cargo.toml b/crates/lightbeam/Cargo.toml index f51889b9b2..e6cc26c8a3 100644 --- a/crates/lightbeam/Cargo.toml +++ b/crates/lightbeam/Cargo.toml @@ -19,7 +19,7 @@ memoffset = "0.5.3" itertools = "0.8.2" capstone = "0.6.0" thiserror = "1.0.9" -cranelift-codegen = "0.59.0" +cranelift-codegen = { path = "../../cranelift/codegen", version = "0.59.0" } multi_mut = "0.1" either = "1.5" typemap = "0.3" diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 2eb5b2b35b..ab60ce0bd7 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -34,6 +34,8 @@ - [Testing](./contributing-testing.md) - [Fuzzing](./contributing-fuzzing.md) - [CI](./contributing-ci.md) + - [Coding guidelines](./contributing-coding-guidelines.md) + - [Development process](./contributing-development-process.md) - [Release Process](./contributing-release-process.md) - [Governance](./contributing-governance.md) - [Code of Conduct](./contributing-coc.md) diff --git a/docs/contributing-coding-guidelines.md b/docs/contributing-coding-guidelines.md new file mode 100644 index 0000000000..0fe5133df4 --- /dev/null +++ b/docs/contributing-coding-guidelines.md @@ -0,0 +1,33 @@ +# Coding guidelines + +For the most part, Wasmtime and Cranelift follow common Rust conventions and +[pull request] (PR) workflows, though we do have a few additional things to +be aware of. + +[pull request]: https://help.github.com/articles/about-pull-requests/ + +### rustfmt + +All PRs must be formatted according to rustfmt, and this is checked in the +continuous integration tests. You can format code locally with: + +```sh +$ cargo fmt +``` + +at the root of the repository. You can find [more information about rustfmt +online](https://github.com/rust-lang/rustfmt) too, such as how to configure +your editor. + +### Rustc version support + +Wasmtime supports the current stable version of Rust. + +Cranelift supports stable Rust, and follows the [Rust Update Policy for +Firefox]. + +Some of the developer scripts depend on nightly Rust, for example to run +clippy and other tools, however we avoid depending on these for the main +build. + +[Rust Update Policy for Firefox]: https://wiki.mozilla.org/Rust_Update_Policy_for_Firefox#Schedule diff --git a/docs/contributing-development-process.md b/docs/contributing-development-process.md new file mode 100644 index 0000000000..78781a4b7d --- /dev/null +++ b/docs/contributing-development-process.md @@ -0,0 +1,66 @@ +# Development Process + +We use [issues] for asking questions ([open one here][newissue]!) and tracking +bugs and unimplemented features, and [pull requests] (PRs) for tracking and +reviewing code submissions. + +### Before submitting a PR + +Consider opening an issue to talk about it. PRs without corresponding issues +are appropriate for fairly narrow technical matters, not for fixes to +user-facing bugs or for feature implementations, especially when those features +might have multiple implementation strategies that usefully could be discussed. + +Our issue templates might help you through the process. + +### When submitting PRs + + - Please fill in the pull request template as appropriate. It is usually + helpful, it speeds up the review process and helps understanding the changes + brought by the PR. + + - Write clear commit messages that start with a one-line summary of the + change (and if it's difficult to summarize in one line, consider + splitting the change into multiple PRs), optionally followed by + additional context. Good things to mention include which areas of the + code are affected, which features are affected, and anything that + reviewers might want to pay special attention to. + + - If there is code which needs explanation, prefer to put the explanation in + a comment in the code, or in documentation, rather than in the commit + message. + + - For pull requests that fix existing issues, use [issue keywords]. Note that + not all pull requests need to have accompanying issues. + + - Assign the review to somebody from the [Core Team], either using suggestions + in the list proposed by Github, or somebody else if you have a specific + person in mind. + + - When updating your pull request, please make sure to re-request review if + the request has been cancelled. + +### Focused commits or squashing + +We generally squash sequences of incremental-development commits together into +logical commits (though keeping logical commits focused). Developers may do +this themselves before submitting a PR or during the PR process, or Core Team +members may do it when merging a PR. Ideally, the continuous-integration tests +should pass at each logical commit. + +### Review and merge + +Anyone may submit a pull request, and anyone may comment on or review others' +pull requests. However, one review from somebody in the [Core Team] is required +before the Core Team merges it. + +Even Core Team members should create PRs for every change, including minor work +items (version bump, removing warnings, etc.): this is helpful to keep track of +what has happened on the repository. Very minor changes may be merged without a +review, although it is always preferred to have one. + +[issues]: https://guides.github.com/features/issues/ +[pull requests]: https://help.github.com/articles/about-pull-requests/ +[issue keywords]: https://help.github.com/articles/closing-issues-using-keywords/ +[Core Team]: https://github.com/orgs/bytecodealliance/people/ +[newissue]: https://github.com/bytecodealliance/wasmtime/issues/new diff --git a/docs/contributing.md b/docs/contributing.md index 72c9a67762..427b0c7b78 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -1,17 +1,31 @@ # Contributing -We're excited to work on Wasmtime together with you! This guide should help you -get up and running with Wasmtime development. But first, make sure you've read -the [Code of Conduct](./contributing-coc.html)! +We're excited to work on Wasmtime and/or Cranelift together with you! This guide +should help you get up and running with Wasmtime and Cranelift development. But +first, make sure you've read the [Code of Conduct](./contributing-coc.html)! + +Wasmtime and Cranelift are very ambitious projects with many goals, and while +we're confident we can achieve some of them, we see many opportunities for +people to get involved and help us achieve even more. ## Join Our Chat -We chat about Wasmtime development on Zulip — [join -us!](https://bytecodealliance.zulipchat.com/#narrow/stream/217126-wasmtime) +We chat about Wasmtime and Cranelift development on Zulip — [join +us!](https://bytecodealliance.zulipchat.com/). You can also join specific +streams: -If you're having trouble building Wasmtime, aren't sure why a test is failing, -or have any other questions, feel free to ask here. You can also [open an -issue](https://github.com/bytecodealliance/wasmtime/issues/new)! +* [#wasmtime](https://bytecodealliance.zulipchat.com/#narrow/stream/217126-wasmtime) +* [#cranelift](https://bytecodealliance.zulipchat.com/#narrow/stream/217117-cranelift) + +If you're having trouble building Wasmtime or Cranelift, aren't sure why a test +is failing, or have any other questions, feel free to ask on Zulip. Not +everything we hope to do with these projects is reflected in the code or +documentation yet, so if you see things that seem missing or that don't make +sense, or even that just don't work the way you expect them to, we're also +interested to hear about that! + +As always, you're more than welcome to [open an +issue](https://github.com/bytecodealliance/wasmtime/issues/new) too! ## Finding Something to Hack On @@ -29,3 +43,28 @@ If you're looking for something to do, these are great places to start: If you're unsure if an issue is a good fit for you or not, feel free to ask in a comment on the issue, or in chat. + +### Mentoring + +We're happy to mentor people, whether you're learning Rust, learning about +compiler backends, learning about machine code, learning about wasm, learning +about how Cranelift does things, or all together at once. + +We categorize issues in the issue tracker using a tag scheme inspired by +[Rust's issue tags]. For example, the [E-easy] marks good beginner issues, +and [E-rust] marks issues which likely require some familiarity with Rust, +though not necessarily Cranelift-specific or even compiler-specific +experience. [E-compiler-easy] marks issues good for beginners who have +some familiarity with compilers, or are interested in gaining some :-). + +See also the [full list of labels]. + +Also, we encourage people to just look around and find things they're +interested in. This a good time to get involved, as there aren't a lot of +things set in stone yet. + +[Rust's issue tags]: https://github.com/rust-lang/rust/blob/master/CONTRIBUTING.md#issue-triage +[E-easy]: https://github.com/bytecodealliance/cranelift/labels/E-easy +[E-rust]: https://github.com/bytecodealliance/cranelift/labels/E-rust +[E-compiler-easy]: https://github.com/bytecodealliance/cranelift/labels/E-compiler-easy +[full list of labels]: https://github.com/bytecodealliance/cranelift/labels diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index ec70d6c695..6a24baa0d8 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -10,9 +10,13 @@ cargo-fuzz = true [dependencies] arbitrary = "0.2.0" -wasmtime-fuzzing = { path = "../crates/fuzzing" } -wasmtime = { path = "../crates/api" } +cranelift-codegen = { path = "../cranelift/codegen" } +cranelift-reader = { path = "../cranelift/reader" } +cranelift-wasm = { path = "../cranelift/wasm" } libfuzzer-sys = "0.2.1" +target-lexicon = "0.10" +wasmtime = { path = "../crates/api" } +wasmtime-fuzzing = { path = "../crates/fuzzing" } [[bin]] name = "compile" @@ -43,3 +47,15 @@ name = "differential" path = "fuzz_targets/differential.rs" test = false doc = false + +[[bin]] +name = "translate_module" +path = "fuzz_targets/translate_module.rs" +test = false +doc = false + +[[bin]] +name = "reader_parse_test" +path = "fuzz_targets/reader_parse_test.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/reader_parse_test.rs b/fuzz/fuzz_targets/reader_parse_test.rs new file mode 100644 index 0000000000..9770606f13 --- /dev/null +++ b/fuzz/fuzz_targets/reader_parse_test.rs @@ -0,0 +1,12 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +use std::str; + +fuzz_target!(|data: &[u8]| { + if let Ok(s) = str::from_utf8(data) { + let options = cranelift_reader::ParseOptions::default(); + let _ = cranelift_reader::parse_test(s, options); + } +}); diff --git a/fuzz/fuzz_targets/translate_module.rs b/fuzz/fuzz_targets/translate_module.rs new file mode 100644 index 0000000000..2965c63c4c --- /dev/null +++ b/fuzz/fuzz_targets/translate_module.rs @@ -0,0 +1,17 @@ +#![no_main] + +use cranelift_codegen::{isa, settings}; +use cranelift_wasm::{translate_module, DummyEnvironment, ReturnMode}; +use libfuzzer_sys::fuzz_target; +use std::str::FromStr; +use target_lexicon::triple; +use wasmtime_fuzzing::generators; + +fuzz_target!(|data: generators::WasmOptTtf| { + let flags = settings::Flags::new(settings::builder()); + let triple = triple!("x86_64"); + let isa = isa::lookup(triple).unwrap().finish(flags); + let mut dummy_environ = + DummyEnvironment::new(isa.frontend_config(), ReturnMode::NormalReturns, false); + translate_module(&data.wasm, &mut dummy_environ).unwrap(); +});