diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bf9e849931..c38b7cc1a3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -131,7 +131,6 @@ jobs: # Check some feature combinations of the `wasmtime` crate - run: cargo check -p wasmtime --no-default-features - run: cargo check -p wasmtime --no-default-features --features wat - - run: cargo check -p wasmtime --no-default-features --features lightbeam - run: cargo check -p wasmtime --no-default-features --features jitdump - run: cargo check -p wasmtime --no-default-features --features vtune - run: cargo check -p wasmtime --no-default-features --features cache @@ -236,6 +235,12 @@ jobs: qemu_target: aarch64-linux-user # FIXME(#3183) shouldn't be necessary to specify this qemu_flags: -cpu max,pauth=off + - os: ubuntu-latest + target: s390x-unknown-linux-gnu + gcc_package: gcc-s390x-linux-gnu + gcc: s390x-linux-gnu-gcc + qemu: qemu-s390x -L /usr/s390x-linux-gnu + qemu_target: s390x-linux-user steps: - uses: actions/checkout@v2 with: @@ -301,7 +306,7 @@ jobs: RUST_BACKTRACE: 1 if: matrix.target == '' - # Build and test all features except for lightbeam + # Build and test all features - run: ./ci/run-tests.sh --locked env: RUST_BACKTRACE: 1 @@ -323,38 +328,6 @@ jobs: env: RUST_BACKTRACE: 1 - # Build and test lightbeam. Note that - # Lightbeam tests fail right now, but we don't want to block on that. - - run: cargo build --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' - - run: cargo test --package lightbeam - if: matrix.target != 'aarch64-unknown-linux-gnu' - continue-on-error: true - env: - RUST_BACKTRACE: 1 - - # Perform all tests (debug mode) for `wasmtime` with the old x86 backend. - test_x86: - name: Test old x86 backend - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - uses: ./.github/actions/install-rust - with: - toolchain: stable - - # Install wasm32 targets in order to build various tests throughout the - # repo. - - run: rustup target add wasm32-wasi - - run: rustup target add wasm32-unknown-unknown - - # Run the old x86 backend CI (we will eventually remove this). - - run: ./ci/run-tests.sh --features old-x86-backend --locked - env: - RUST_BACKTRACE: 1 - # Build and test the wasi-nn module. test_wasi_nn: name: Test wasi-nn module @@ -434,6 +407,11 @@ jobs: target: aarch64-unknown-linux-gnu gcc_package: gcc-aarch64-linux-gnu gcc: aarch64-linux-gnu-gcc + - build: s390x-linux + os: ubuntu-latest + target: s390x-unknown-linux-gnu + gcc_package: gcc-s390x-linux-gnu + gcc: s390x-linux-gnu-gcc steps: - uses: actions/checkout@v2 with: diff --git a/Cargo.lock b/Cargo.lock index 5e791a4ced..7fab29c6be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,29 +19,30 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aead" -version = "0.3.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fc95d1bdb8e6666b2b217308eeeb09f2d6728d104be3e31916cc74d15420331" +checksum = "0b613b8e1e3cf911a086f53f03bf286f52fd7a7258e4fa606f0ef220d39d8877" dependencies = [ "generic-array", ] [[package]] name = "aes" -version = "0.6.0" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "884391ef1066acaa41e766ba8f596341b96e93ce34f9a43e7d24bf0a0eaf0561" +checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" dependencies = [ - "aes-soft", - "aesni", + "cfg-if 1.0.0", "cipher", + "cpufeatures 0.2.1", + "opaque-debug", ] [[package]] name = "aes-gcm" -version = "0.8.0" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5278b5fabbb9bd46e24aa69b2fdea62c99088e0a950a9be40e3e0101298f88da" +checksum = "df5f85a83a7d8b0442b6aa7b504b8212c1733da07b98aae43d4bc21b2cb3cdf6" dependencies = [ "aead", "aes", @@ -51,26 +52,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "aes-soft" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be14c7498ea50828a38d0e24a765ed2effe92a705885b57d029cd67d45744072" -dependencies = [ - "cipher", - "opaque-debug", -] - -[[package]] -name = "aesni" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2e11f5e94c2f7d386164cc2aa1f97823fed6f259e486940a71c174dd01b0ce" -dependencies = [ - "cipher", - "opaque-debug", -] - [[package]] name = "ahash" version = "0.4.7" @@ -125,12 +106,6 @@ dependencies = [ "derive_arbitrary", ] -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "async-trait" version = "0.1.50" @@ -186,6 +161,12 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "base64ct" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a96587c05c810ddbb79e2674d519cff1379517e7b91d166dff7a7cc0e9af6e" + [[package]] name = "bincode" version = "1.3.3" @@ -235,17 +216,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitvec" -version = "0.18.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98fcd36dda4e17b7d7abc64cb549bf0201f4ab71e00700c798ca7e62ed3761fa" -dependencies = [ - "funty", - "radium", - "wyz", -] - [[package]] name = "block-buffer" version = "0.9.0" @@ -427,19 +397,21 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chacha20" -version = "0.6.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed8738f14471a99f0e316c327e68fc82a3611cc2895fcb604b89eedaf8f39d95" +checksum = "01b72a433d0cf2aef113ba70f62634c56fddb0f244e6377185c56a7cadbd8f91" dependencies = [ + "cfg-if 1.0.0", "cipher", + "cpufeatures 0.2.1", "zeroize", ] [[package]] name = "chacha20poly1305" -version = "0.7.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1fc18e6d90c40164bf6c317476f2a98f04661e310e79830366b7e914c58a8e" +checksum = "3b84ed6d1d5f7aa9bdde921a5090e0ca4d934d250ea3b402a5fab3a994e28a2a" dependencies = [ "aead", "chacha20", @@ -457,15 +429,14 @@ dependencies = [ "libc", "num-integer", "num-traits", - "time", "winapi", ] [[package]] name = "cipher" -version = "0.2.5" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12f8e7987cbd042a63249497f41aed09f8e65add917ea6566effbc56578d6801" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" dependencies = [ "generic-array", ] @@ -522,15 +493,9 @@ dependencies = [ [[package]] name = "const-oid" -version = "0.4.5" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f6b64db6932c7e49332728e3a6bd82c6b7e16016607d20923b537c3bc4c0d5f" - -[[package]] -name = "convert_case" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +checksum = "fdab415d6744056100f40250a66bc430c1a46f7a02e20bc11c94c79a0f0464df" [[package]] name = "cpp_demangle" @@ -552,10 +517,13 @@ dependencies = [ ] [[package]] -name = "cpuid-bool" -version = "0.2.0" +name = "cpufeatures" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb25d077389e53838a8158c8e99174c5a9d902dee4904320db714f3c653ffba" +checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +dependencies = [ + "libc", +] [[package]] name = "cranelift" @@ -708,7 +676,7 @@ name = "cranelift-native" version = "0.77.0" dependencies = [ "cranelift-codegen", - "rsix", + "libc", "target-lexicon", ] @@ -900,10 +868,22 @@ dependencies = [ ] [[package]] -name = "crypto-mac" -version = "0.10.0" +name = "crypto-bigint" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4857fd85a0c34b3c3297875b747c1e02e06b6a0ea32dd892d8192b9ce0813ea6" +checksum = "d12477e115c0d570c12a2dfd859f80b55b60ddb5075df210d3af06d133a69f45" +dependencies = [ + "generic-array", + "rand_core 0.6.3", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-mac" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714" dependencies = [ "generic-array", "subtle", @@ -933,9 +913,9 @@ dependencies = [ [[package]] name = "ctr" -version = "0.6.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb4a30d54f7443bf3d6191dcd486aca19e67cb3c49fa7a06a319966346707e7f" +checksum = "049bb91fb4aaf0e3c7efa6cd5ef877dbbbd15b39dad06d9948de4ec8a75761ea" dependencies = [ "cipher", ] @@ -961,11 +941,12 @@ dependencies = [ [[package]] name = "der" -version = "0.1.0" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f59c66c30bb7445c8320a5f9233e437e3572368099f25532a59054328899b4" +checksum = "28e98c534e9c8a0483aa01d6f6913bc063de254311bd267c9cf535e9b70e15b2" dependencies = [ "const-oid", + "crypto-bigint", ] [[package]] @@ -990,29 +971,6 @@ dependencies = [ "syn", ] -[[package]] -name = "derive_more" -version = "0.99.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc7b9cef1e351660e5443924e4f43ab25fbbed3e9a5f052df3677deb4d6b320" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "derive_utils" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532b4c15dccee12c7044f1fcad956e98410860b22231e44a3b827464797ca7bf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "digest" version = "0.9.0" @@ -1059,38 +1017,13 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" -[[package]] -name = "dynasm" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdc2d9a5e44da60059bd38db2d05cbb478619541b8c79890547861ec1e3194f0" -dependencies = [ - "bitflags", - "byteorder", - "lazy_static", - "proc-macro-error", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "dynasmrt" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42276e3f205fe63887cca255aa9a65a63fb72764c30b9a6252a7c7e46994f689" -dependencies = [ - "byteorder", - "dynasm", - "memmap2", -] - [[package]] name = "ecdsa" -version = "0.10.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fbdb4ff710acb4db8ca29f93b897529ea6d6a45626d5183b47e012aa6ae7e4" +checksum = "43ee23aa5b4f68c7a092b5c3beb25f50c406adc75e2363634f242f28ab255372" dependencies = [ + "der", "elliptic-curve", "hmac", "signature", @@ -1127,18 +1060,16 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "elliptic-curve" -version = "0.8.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2db227e61a43a34915680bdda462ec0e212095518020a88a1f91acd16092c39" +checksum = "beca177dcb8eb540133e7680baff45e7cc4d93bf22002676cec549f82343721b" dependencies = [ - "bitvec", - "digest", + "crypto-bigint", "ff", - "funty", "generic-array", "group", "pkcs8", - "rand_core 0.5.1", + "rand_core 0.6.3", "subtle", "zeroize", ] @@ -1216,12 +1147,11 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "ff" -version = "0.8.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01646e077d4ebda82b73f1bca002ea1e91561a77df2431a9e79729bcc31950ef" +checksum = "d0f40b2dcd8bc322217a5f6559ae5f9e9d1de202a2ecee2e9eafcbece7562a4f" dependencies = [ - "bitvec", - "rand_core 0.5.1", + "rand_core 0.6.3", "subtle", ] @@ -1290,12 +1220,6 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e398fae362f4124bbe630d99519fb2d68a03e2e3a23b441028cdcdc4f4895687" -[[package]] -name = "funty" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" - [[package]] name = "gcc" version = "0.3.55" @@ -1336,9 +1260,9 @@ dependencies = [ [[package]] name = "ghash" -version = "0.3.1" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97304e4cd182c3846f7575ced3890c53012ce534ad9114046b0a9e00bb30a375" +checksum = "1583cc1656d7839fd3732b80cf4f38850336cdb9b8ded1cd399ca62958de3c99" dependencies = [ "opaque-debug", "polyval", @@ -1363,12 +1287,12 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "group" -version = "0.8.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc11f9f5fbf1943b48ae7c2bf6846e7d827a512d1be4f23af708f5ca5d01dde1" +checksum = "1c363a5301b8f153d80747126a04b3c82073b9fe3130571a9d170cacdeaf7912" dependencies = [ "ff", - "rand_core 0.5.1", + "rand_core 0.6.3", "subtle", ] @@ -1407,9 +1331,9 @@ dependencies = [ [[package]] name = "hkdf" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51ab2f639c231793c5f6114bdb9bbe50a7dbbfcd7c7c6bd8475dec2d991e964f" +checksum = "01706d578d5c281058480e673ae4086a9f4710d8df1ad80a5b03e39ece5f886b" dependencies = [ "digest", "hmac", @@ -1417,9 +1341,9 @@ dependencies = [ [[package]] name = "hmac" -version = "0.10.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15" +checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b" dependencies = [ "crypto-mac", "digest", @@ -1494,17 +1418,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135" -[[package]] -name = "iter-enum" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f947f0d9df7e69c4df60a950c0a83741455bb9ebd8fd9b5a87994dda4dbb005" -dependencies = [ - "derive_utils", - "quote", - "syn", -] - [[package]] name = "itertools" version = "0.9.0" @@ -1558,9 +1471,9 @@ dependencies = [ [[package]] name = "k256" -version = "0.7.3" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4476a0808212a9e81ce802eb1a0cfc60e73aea296553bacc0fac7e1268bc572a" +checksum = "903ae2481bcdfdb7b68e0a9baa4b7c9aff600b9ae2e8e5bb5833b8c91ab851ea" dependencies = [ "cfg-if 1.0.0", "ecdsa", @@ -1591,9 +1504,9 @@ checksum = "3576a87f2ba00f6f106fdfcd16db1d698d648a26ad8e0573cad8537c3c362d2a" [[package]] name = "libc" -version = "0.2.99" +version = "0.2.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7f823d141fe0a24df1e23b4af4e3c7ba9e5966ec514ea068c93024aa7deb765" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" [[package]] name = "libfuzzer-sys" @@ -1622,30 +1535,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a" -[[package]] -name = "lightbeam" -version = "0.30.0" -dependencies = [ - "anyhow", - "arrayvec", - "capstone", - "cranelift-codegen", - "derive_more", - "dynasm", - "dynasmrt", - "iter-enum", - "itertools 0.10.0", - "lazy_static", - "memoffset", - "more-asserts", - "quickcheck", - "smallvec", - "thiserror", - "typemap", - "wasmparser", - "wat", -] - [[package]] name = "linux-raw-sys" version = "0.0.24" @@ -1805,22 +1694,11 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-bigint" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d047c1062aa51e256408c560894e5251f08925980e53cf1aa5bd00eec6512" -dependencies = [ - "autocfg 1.0.1", - "num-integer", - "num-traits", -] - [[package]] name = "num-bigint-dig" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d51546d704f52ef14b3c962b5776e53d5b862e5790e40a350d366c209bd7f7a" +checksum = "4547ee5541c18742396ae2c895d0717d0f886d8823b8399cdaf7b07d63ad0480" dependencies = [ "autocfg 0.1.7", "byteorder", @@ -1829,7 +1707,7 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "rand 0.7.3", + "rand 0.8.3", "serde", "smallvec", "zeroize", @@ -1863,7 +1741,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef" dependencies = [ "autocfg 1.0.1", - "num-bigint 0.2.6", + "num-bigint", "num-integer", "num-traits", ] @@ -1875,6 +1753,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" dependencies = [ "autocfg 1.0.1", + "libm", ] [[package]] @@ -1991,9 +1870,9 @@ dependencies = [ [[package]] name = "p256" -version = "0.7.3" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8adcc06fe90ec8fb2d2ad46746d2cbd639b158d4240364aa832da7e263dbee91" +checksum = "d053368e1bae4c8a672953397bd1bd7183dde1c72b0b7612a15719173148d186" dependencies = [ "ecdsa", "elliptic-curve", @@ -2148,14 +2027,12 @@ name = "peepmatic-traits" version = "0.77.0" [[package]] -name = "pem" -version = "0.8.3" +name = "pem-rfc7468" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd56cbd21fea48d0c440b41cd69c589faacade08c992d9a54e471b79d0fd13eb" +checksum = "e71fb2d401a15271d52aade6d9410fb4ead603a86da5503f92e872e1df790265" dependencies = [ - "base64", - "once_cell", - "regex", + "base64ct", ] [[package]] @@ -2165,13 +2042,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc0e1f259c92177c30a4c9d177246edd0a3568b25756a977d0632cf8fa37e905" [[package]] -name = "pkcs8" -version = "0.3.3" +name = "pkcs1" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4839a901843f3942576e65857f0ebf2e190ef7024d3c62a94099ba3f819ad1d" +checksum = "116bee8279d783c0cf370efa1a94632f2108e5ef0bb32df31f051647810a4e2c" dependencies = [ "der", - "subtle-encoding", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "pkcs8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee3ef9b64d26bad0536099c816c6734379e45bbd5f14798def6809e5cc350447" +dependencies = [ + "der", + "pem-rfc7468", + "pkcs1", + "spki", "zeroize", ] @@ -2205,21 +2095,23 @@ dependencies = [ [[package]] name = "poly1305" -version = "0.6.2" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b7456bc1ad2d4cf82b3a016be4c2ac48daf11bf990c1603ebd447fe6f30fca8" +checksum = "048aeb476be11a4b6ca432ca569e375810de9294ae78f4774e78ea98a9246ede" dependencies = [ - "cpuid-bool", + "cpufeatures 0.2.1", + "opaque-debug", "universal-hash", ] [[package]] name = "polyval" -version = "0.4.5" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eebcc4aa140b9abd2bc40d9c3f7ccec842679cd79045ac3a7ac698c1a064b7cd" +checksum = "8419d2b623c7c0896ff2d5d96e2cb4ede590fed28fcc34934f4c33c036e620a1" dependencies = [ - "cpuid-bool", + "cfg-if 1.0.0", + "cpufeatures 0.2.1", "opaque-debug", "universal-hash", ] @@ -2232,31 +2124,43 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "pqcrypto" -version = "0.12.2" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d3874384bf37d988b83f806d632e2f7fca69a8cd0338efaa64e8e7664573052" +checksum = "9da39bd0587bff4189521766c34f3203263926f7527906578a96d22a81a700d5" dependencies = [ "pqcrypto-kyber", "pqcrypto-traits", ] [[package]] -name = "pqcrypto-kyber" -version = "0.6.7" +name = "pqcrypto-internals" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33550a5b6e0844d1b2363f67e15e4ca64586bb4fb2363a83af762e6c2d092bff" +checksum = "c5397335b92875d36fb30f91557c3769517c9cfbc212568a5b8ceafd304eca84" +dependencies = [ + "cc", + "getrandom 0.2.3", + "libc", +] + +[[package]] +name = "pqcrypto-kyber" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a2b5431714840eb2c0ccc167d3d17940e66f48da9ab96c51ea1d4e0aa46d6a1" dependencies = [ "cc", "glob", "libc", + "pqcrypto-internals", "pqcrypto-traits", ] [[package]] name = "pqcrypto-traits" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e1563eff60a9ae869cacee0a33fa5c4ba27861fec6e3e23de95eb0ae805e4b" +checksum = "97e91cb6af081c6daad5fa705f8adb0634c027662052cb3174bdf2957bf07e25" [[package]] name = "pretty_env_logger" @@ -2342,17 +2246,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" -[[package]] -name = "quickcheck" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "env_logger 0.8.3", - "log", - "rand 0.8.3", -] - [[package]] name = "quote" version = "1.0.9" @@ -2362,12 +2255,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "radium" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "def50a86306165861203e7f84ecffbbdfdea79f0e51039b33de1e952358c47ac" - [[package]] name = "rand" version = "0.7.3" @@ -2389,7 +2276,7 @@ checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" dependencies = [ "libc", "rand_chacha 0.3.0", - "rand_core 0.6.2", + "rand_core 0.6.3", "rand_hc 0.3.0", ] @@ -2410,7 +2297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" dependencies = [ "ppv-lite86", - "rand_core 0.6.2", + "rand_core 0.6.3", ] [[package]] @@ -2424,9 +2311,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ "getrandom 0.2.3", ] @@ -2446,7 +2333,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" dependencies = [ - "rand_core 0.6.2", + "rand_core 0.6.3", ] [[package]] @@ -2455,7 +2342,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ - "rand_core 0.6.2", + "rand_core 0.6.3", ] [[package]] @@ -2569,9 +2456,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3648b669b10afeab18972c105e284a7b953a669b0be3514c27f9b17acab2f9cd" +checksum = "e05c2603e2823634ab331437001b411b9ed11660fbc4066f3908c84a9439260d" dependencies = [ "byteorder", "digest", @@ -2580,27 +2467,14 @@ dependencies = [ "num-integer", "num-iter", "num-traits", - "pem", - "rand 0.7.3", - "sha2", - "simple_asn1 0.4.1", + "pkcs1", + "pkcs8", + "rand 0.8.3", + "serde", "subtle", - "thiserror", "zeroize", ] -[[package]] -name = "rsa-export" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fce6de48e7cae950d65a62e67da01c1bb44576f0c3ea3e5749088cf0205a7263" -dependencies = [ - "num-bigint-dig", - "pem", - "rsa", - "simple_asn1 0.5.3", -] - [[package]] name = "rsix" version = "0.23.2" @@ -2757,7 +2631,7 @@ checksum = "b362ae5752fd2137731f9fa25fd4d9058af34666ca1966fb969119cc35719f12" dependencies = [ "block-buffer", "cfg-if 1.0.0", - "cpufeatures", + "cpufeatures 0.1.4", "digest", "opaque-debug", ] @@ -2800,35 +2674,12 @@ dependencies = [ [[package]] name = "signature" -version = "1.2.2" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f060a7d147e33490ec10da418795238fd7545bba241504d6b31a409f2e6210" +checksum = "c19772be3c4dd2ceaacf03cb41d5885f2a02c4d8804884918e3a258480803335" dependencies = [ "digest", - "rand_core 0.5.1", -] - -[[package]] -name = "simple_asn1" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692ca13de57ce0613a363c8c2f1de925adebc81b04c923ac60c5488bb44abe4b" -dependencies = [ - "chrono", - "num-bigint 0.2.6", - "num-traits", -] - -[[package]] -name = "simple_asn1" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc31e6cf34ad4321d3a2b8f934949b429e314519f753a77962f16c664dca8e13" -dependencies = [ - "chrono", - "num-bigint 0.4.0", - "num-traits", - "thiserror", + "rand_core 0.6.3", ] [[package]] @@ -2852,6 +2703,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "spki" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c01a0c15da1b0b0e1494112e7af814a678fec9bd157881b49beac661e9b6f32" +dependencies = [ + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -2900,15 +2760,6 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e81da0851ada1f3e9d4312c704aa4f8806f0f9d69faaf8df2f3464b4a9437c2" -[[package]] -name = "subtle-encoding" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dcb1ed7b8330c5eed5441052651dd7a12c75e2ed88f2ec024ae1fa3a5e59945" -dependencies = [ - "zeroize", -] - [[package]] name = "syn" version = "1.0.72" @@ -3045,16 +2896,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "time" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "tinytemplate" version = "1.2.1" @@ -3178,21 +3019,6 @@ dependencies = [ "tracing-serde", ] -[[package]] -name = "traitobject" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd1f82c56340fdf16f2a953d7bda4f8fdffba13d93b00844c25572110b26079" - -[[package]] -name = "typemap" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "653be63c80a3296da5551e1bfd2cca35227e13cdd08c6668903ae2f4f77aa1f6" -dependencies = [ - "unsafe-any", -] - [[package]] name = "typenum" version = "1.13.0" @@ -3227,15 +3053,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "unsafe-any" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f30360d7979f5e9c6e6cea48af192ea8fab4afb3cf72597154b8f08935bc9c7f" -dependencies = [ - "traitobject", -] - [[package]] name = "unsafe-io" version = "0.9.1" @@ -3371,7 +3188,7 @@ dependencies = [ [[package]] name = "wasi-crypto" -version = "0.1.4" +version = "0.1.5" dependencies = [ "aes-gcm", "anyhow", @@ -3388,8 +3205,8 @@ dependencies = [ "parking_lot", "pqcrypto", "rand_core 0.5.1", + "rand_core 0.6.3", "rsa", - "rsa-export", "serde", "sha2", "subtle", @@ -3797,20 +3614,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "wasmtime-lightbeam" -version = "0.30.0" -dependencies = [ - "anyhow", - "cranelift-codegen", - "gimli", - "lightbeam", - "object", - "target-lexicon", - "wasmparser", - "wasmtime-environ", -] - [[package]] name = "wasmtime-runtime" version = "0.30.0" @@ -4047,12 +3850,6 @@ dependencies = [ "wast 35.0.2", ] -[[package]] -name = "wyz" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" - [[package]] name = "xoodyak" version = "0.7.2" @@ -4085,18 +3882,18 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.3.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4756f7db3f7b5574938c3eb1c117038b8e07f95ee6718c0efad4ac21508f1efd" +checksum = "bf68b08513768deaa790264a7fac27a58cbf2705cfcdc9448362229217d7e970" dependencies = [ "zeroize_derive", ] [[package]] name = "zeroize_derive" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2c1e130bebaeab2f23886bf9acbaca14b092408c452543c857f66399cd6dab1" +checksum = "bdff2024a851a322b08f179173ae2ba620445aef1e838f0c196820eade4ae0c7" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index bb7806a973..d69e3ac062 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,7 +75,6 @@ members = [ "cranelift", "crates/bench-api", "crates/c-api", - "crates/lightbeam/wasmtime", "crates/misc/run-examples", "examples/fib-debug/wasm", "examples/wasi/wasm", @@ -83,13 +82,12 @@ members = [ "fuzz", ] exclude = [ - 'crates/wasi-common/WASI/tools/witx-cli', + 'crates/wasi-common/WASI/tools/witx-cli', 'docs/rust_wasi_markdown_parser' ] [features] default = ["jitdump", "wasmtime/wat", "wasmtime/parallel-compilation", "wasi-nn"] -lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] wasi-crypto = ["wasmtime-wasi-crypto"] @@ -102,9 +100,6 @@ posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"] # backend is the default now. experimental_x64 = [] -# Use the old x86 backend. -old-x86-backend = ["wasmtime/old-x86-backend"] - [badges] maintenance = { status = "actively-developed" } diff --git a/README.md b/README.md index 302883cd26..f5151c2acd 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,8 @@ Hello, world! quickly generate high-quality machine code at runtime. * **Configurable**. Whether you need to precompile your wasm ahead of time, - generate code blazingly fast with Lightbeam, or interpret it at runtime, - Wasmtime has you covered for all your wasm-executing needs. + or interpret it at runtime, Wasmtime has you covered for all your + wasm-executing needs. * **WASI**. Wasmtime supports a rich set of APIs for interacting with the host environment through the [WASI standard](https://wasi.dev). diff --git a/RELEASES.md b/RELEASES.md index 68bc93de1c..501391e458 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -4,6 +4,43 @@ ## Unreleased +### Added + +* New `Func::new_unchecked` and `Func::call_unchecked` APIs have been added with + accompanying functions in the C API to improve the performance of calls into + wasm and the host in the C API. + [#3350](https://github.com/bytecodealliance/wasmtime/pull/3350) + +* Release binaries are now available for the s390x-unknown-linux-gnu + architecture. + [#3372](https://github.com/bytecodealliance/wasmtime/pull/3372) + +### Changed + +* The `Func::call` method now takes a slice to write the results into rather + than returning a boxed slice. + [#3319](https://github.com/bytecodealliance/wasmtime/pull/3319) + +* Trampolines are now covered when jitdump profiling is enabled. + [#3344](https://github.com/bytecodealliance/wasmtime/pull/3344) + +### Fixed + +* Debugging with GDB has been fixed on Windows. + [#3373](https://github.com/bytecodealliance/wasmtime/pull/3373) + +### Removed + +* The Lightbeam backend has been removed, as per [RFC 14]. + [#3390](https://github.com/bytecodealliance/wasmtime/pull/3390) + +[RFC 14]: https://github.com/bytecodealliance/rfcs/pull/14 + +* Cranelift's old x86 backend has been removed, as per [RFC 12]. + [#3309](https://github.com/bytecodealliance/wasmtime/pull/3009) + +[RFC 12]: https://github.com/bytecodealliance/rfcs/pull/12 + ## 0.30.0 Released 2021-09-17. diff --git a/build.rs b/build.rs index b3dc1fd8ea..d5f21e032f 100644 --- a/build.rs +++ b/build.rs @@ -17,11 +17,7 @@ fn main() -> anyhow::Result<()> { ); let mut out = String::new(); - for strategy in &[ - "Cranelift", - #[cfg(feature = "lightbeam")] - "Lightbeam", - ] { + for strategy in &["Cranelift"] { writeln!(out, "#[cfg(test)]")?; writeln!(out, "#[allow(non_snake_case)]")?; writeln!(out, "mod {} {{", strategy)?; @@ -185,24 +181,12 @@ fn write_testsuite_tests( /// Ignore tests that aren't supported yet. fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { match strategy { - #[cfg(feature = "lightbeam")] - "Lightbeam" => match (testsuite, testname) { - ("simd", _) => return true, - ("multi_value", _) => return true, - ("reference_types", _) => return true, - ("bulk_memory_operations", _) => return true, - _ => (), - }, "Cranelift" => match (testsuite, testname) { - // Skip all reference types tests on the old backend. The modern - // implementation of reference types uses atomic instructions - // for reference counts on `externref`, but the old backend does not - // implement atomic instructions. - ("reference_types", _) if cfg!(feature = "old-x86-backend") => return true, // No simd support yet for s390x. ("simd", _) if platform_is_s390x() => return true, - // No memory64 support yet for s390x. - ("memory64", _) if platform_is_s390x() => return true, + ("memory64", "simd") if platform_is_s390x() => return true, + // No full atomics support yet for s390x. + ("memory64", "threads") if platform_is_s390x() => return true, _ => {} }, _ => panic!("unrecognized strategy"), diff --git a/ci/run-tests.sh b/ci/run-tests.sh index 15b2cabc03..7bd661ffe5 100755 --- a/ci/run-tests.sh +++ b/ci/run-tests.sh @@ -3,7 +3,6 @@ cargo test \ --features "test-programs/test_programs" \ --workspace \ - --exclude '*lightbeam*' \ --exclude 'wasmtime-wasi-*' \ --exclude 'peepmatic*' \ --exclude wasi-crypto \ diff --git a/ci/setup_centos6_python3.sh b/ci/setup_centos6_python3.sh deleted file mode 100644 index 35ab465469..0000000000 --- a/ci/setup_centos6_python3.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -set -e - -VERSION=${1:-3.7.3} - -# Python 3.6 stands in our way -- nuking it -yum erase -y rh-python36 -rm -rf /opt/rh/rh-python36 - -yum install -y gcc bzip2-devel libffi-devel zlib-devel - -cd /usr/src/ - -# pip3.7 needs new openssl -curl -O -L https://github.com/openssl/openssl/archive/OpenSSL_1_1_1c.tar.gz -tar -zxvf OpenSSL_1_1_1c.tar.gz -cd openssl-OpenSSL_1_1_1c -./Configure shared zlib linux-x86_64 -make -sj4 -make install -cd .. -rm -rf openssl-OpenSSL_1_1_1c - -# Fixing libssl.so.1.1: cannot open shared object file -echo "/usr/local/lib64" >> /etc/ld.so.conf && ldconfig - -curl -O -L https://www.python.org/ftp/python/${VERSION}/Python-${VERSION}.tgz -tar xzf Python-${VERSION}.tgz -cd Python-${VERSION} -./configure -make -sj4 -make install -cd .. -rm -rf Python-${VERSION} diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 5bc6b2c9ca..984d696373 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -63,7 +63,6 @@ unwind = ["gimli"] # If no ISA targets are explicitly enabled, the ISA target for the host machine is enabled. x86 = [] arm64 = [] -riscv = [] s390x = [] arm32 = [] # Work-in-progress codegen backend for ARM. @@ -71,14 +70,10 @@ arm32 = [] # Work-in-progress codegen backend for ARM. # backend is the default now. experimental_x64 = [] -# Make the old x86 backend the default. -old-x86-backend = [] - # Option to enable all architectures. all-arch = [ "x86", "arm64", - "riscv", "s390x" ] diff --git a/cranelift/codegen/benches/x64-evex-encoding.rs b/cranelift/codegen/benches/x64-evex-encoding.rs index e5c1434181..550e88a0d9 100644 --- a/cranelift/codegen/benches/x64-evex-encoding.rs +++ b/cranelift/codegen/benches/x64-evex-encoding.rs @@ -5,12 +5,9 @@ #[cfg(feature = "x86")] mod x86 { use cranelift_codegen::isa::x64::encoding::{ - evex::{EvexContext, EvexInstruction, EvexMasking, EvexVectorLength, Register}, - rex::OpcodeMap, - rex::{encode_modrm, LegacyPrefixes}, - ByteSink, + evex::{EvexInstruction, EvexVectorLength, Register}, + rex::{LegacyPrefixes, OpcodeMap}, }; - use cranelift_codegen_shared::isa::x86::EncodingBits; use criterion::{criterion_group, Criterion}; // Define the benchmarks. @@ -34,26 +31,6 @@ mod x86 { .encode(&mut sink); }); }); - - group.bench_function("encode_evex (function pattern)", |b| { - let mut sink = vec![]; - let bits = EncodingBits::new(&[0x66, 0x0f, 0x38, 0x1f], 0, 1); - let vvvvv = Register::from(0); - b.iter(|| { - sink.clear(); - encode_evex( - bits, - rax, - vvvvv, - rdx, - EvexContext::Other { - length: EvexVectorLength::V128, - }, - EvexMasking::default(), - &mut sink, - ); - }) - }); } criterion_group!(benches, x64_evex_encoding_benchmarks); @@ -65,66 +42,6 @@ mod x86 { benches(); Criterion::default().configure_from_args().final_summary(); } - - /// From the legacy x86 backend: a mechanism for encoding an EVEX - /// instruction, including the prefixes, the instruction opcode, and the - /// ModRM byte. This EVEX encoding function only encodes the `reg` (operand - /// 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are possible - /// (see section 2.6.2, Intel Software Development Manual, volume 2A), - /// requiring refactoring of this function or separate functions for each - /// form (e.g. as for the REX prefix). - #[inline(always)] - pub fn encode_evex( - enc: EncodingBits, - reg: Register, - vvvvv: Register, - rm: Register, - context: EvexContext, - masking: EvexMasking, - sink: &mut CS, - ) { - let reg: u8 = reg.into(); - let rm: u8 = rm.into(); - let vvvvv: u8 = vvvvv.into(); - - // EVEX prefix. - sink.put1(0x62); - - debug_assert!(enc.mm() < 0b100); - let mut p0 = enc.mm() & 0b11; - p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset - sink.put1(p0); - - let mut p1 = enc.pp() | 0b100; // bit 2 is always set - p1 |= (!(vvvvv) & 0b1111) << 3; - p1 |= (enc.rex_w() & 0b1) << 7; - sink.put1(p1); - - let mut p2 = masking.aaa_bits(); - p2 |= (!(vvvvv >> 4) & 0b1) << 3; - p2 |= context.bits() << 4; - p2 |= masking.z_bit() << 7; - sink.put1(p2); - - // Opcode. - sink.put1(enc.opcode_byte()); - - // ModR/M byte. - sink.put1(encode_modrm(3, reg & 7, rm & 7)) - } - - /// From the legacy x86 backend: encode the RXBR' bits of the EVEX P0 byte. - /// For an explanation of these bits, see section 2.6.1 in the Intel - /// Software Development Manual, volume 2A. These bits can be used by - /// different addressing modes (see section 2.6.2), requiring different - /// `vex*` functions than this one. - fn evex2(rm: u8, reg: u8) -> u8 { - let b = !(rm >> 3) & 1; - let x = !(rm >> 4) & 1; - let r = !(reg >> 3) & 1; - let r_ = !(reg >> 4) & 1; - 0x00 | r_ | (b << 1) | (x << 2) | (r << 3) - } } fn main() { diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index dd6cfc999c..10def102be 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -27,15 +27,6 @@ fn main() { let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set"); let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set"); - let new_backend_isas = if env::var("CARGO_FEATURE_X64").is_ok() { - // The x64 (new backend for x86_64) is a bit particular: it only requires generating - // the shared meta code; the only ISA-specific code is for settings. - vec![meta::isa::Isa::X86] - } else { - Vec::new() - }; - - // Configure isa targets using the old backend. let isa_targets = meta::isa::Isa::all() .iter() .cloned() @@ -45,7 +36,7 @@ fn main() { }) .collect::>(); - let old_backend_isas = if new_backend_isas.is_empty() && isa_targets.is_empty() { + let isas = if isa_targets.is_empty() { // Try to match native target. let target_name = target_triple.split('-').next().unwrap(); let isa = meta::isa_from_arch(&target_name).expect("error when identifying target"); @@ -65,23 +56,14 @@ fn main() { crate_dir.join("build.rs").to_str().unwrap() ); - if let Err(err) = meta::generate(&old_backend_isas, &new_backend_isas, &out_dir) { + if let Err(err) = meta::generate(&isas, &out_dir) { eprintln!("Error: {}", err); process::exit(1); } if env::var("CRANELIFT_VERBOSE").is_ok() { - for isa in &old_backend_isas { - println!( - "cargo:warning=Includes old-backend support for {} ISA", - isa.to_string() - ); - } - for isa in &new_backend_isas { - println!( - "cargo:warning=Includes new-backend support for {} ISA", - isa.to_string() - ); + for isa in &isas { + println!("cargo:warning=Includes support for {} ISA", isa.to_string()); } println!( "cargo:warning=Build step took {:?}.", diff --git a/cranelift/codegen/meta/src/cdsl/ast.rs b/cranelift/codegen/meta/src/cdsl/ast.rs deleted file mode 100644 index 6bfd1721ae..0000000000 --- a/cranelift/codegen/meta/src/cdsl/ast.rs +++ /dev/null @@ -1,755 +0,0 @@ -use crate::cdsl::instructions::{InstSpec, Instruction, InstructionPredicate}; -use crate::cdsl::operands::{OperandKind, OperandKindFields}; -use crate::cdsl::types::ValueType; -use crate::cdsl::typevar::{TypeSetBuilder, TypeVar}; - -use cranelift_entity::{entity_impl, PrimaryMap, SparseMap, SparseMapValue}; - -use std::fmt; -use std::iter::IntoIterator; - -pub(crate) enum Expr { - Var(VarIndex), - Literal(Literal), -} - -impl Expr { - pub fn maybe_literal(&self) -> Option<&Literal> { - match &self { - Expr::Literal(lit) => Some(lit), - _ => None, - } - } - - pub fn maybe_var(&self) -> Option { - if let Expr::Var(var) = &self { - Some(*var) - } else { - None - } - } - - pub fn unwrap_var(&self) -> VarIndex { - self.maybe_var() - .expect("tried to unwrap a non-Var content in Expr::unwrap_var") - } - - pub fn to_rust_code(&self, var_pool: &VarPool) -> String { - match self { - Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(), - Expr::Literal(literal) => literal.to_rust_code(), - } - } -} - -/// An AST definition associates a set of variables with the values produced by an expression. -pub(crate) struct Def { - pub apply: Apply, - pub defined_vars: Vec, -} - -impl Def { - pub fn to_comment_string(&self, var_pool: &VarPool) -> String { - let results = self - .defined_vars - .iter() - .map(|&x| var_pool.get(x).name.as_str()) - .collect::>(); - - let results = if results.len() == 1 { - results[0].to_string() - } else { - format!("({})", results.join(", ")) - }; - - format!("{} := {}", results, self.apply.to_comment_string(var_pool)) - } -} - -pub(crate) struct DefPool { - pool: PrimaryMap, -} - -impl DefPool { - pub fn new() -> Self { - Self { - pool: PrimaryMap::new(), - } - } - pub fn get(&self, index: DefIndex) -> &Def { - self.pool.get(index).unwrap() - } - pub fn next_index(&self) -> DefIndex { - self.pool.next_key() - } - pub fn create_inst(&mut self, apply: Apply, defined_vars: Vec) -> DefIndex { - self.pool.push(Def { - apply, - defined_vars, - }) - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct DefIndex(u32); -entity_impl!(DefIndex); - -/// A definition which would lead to generate a block creation. -#[derive(Clone)] -pub(crate) struct Block { - /// Instruction index after which the block entry is set. - pub location: DefIndex, - /// Variable holding the new created block. - pub name: VarIndex, -} - -pub(crate) struct BlockPool { - pool: SparseMap, -} - -impl SparseMapValue for Block { - fn key(&self) -> DefIndex { - self.location - } -} - -impl BlockPool { - pub fn new() -> Self { - Self { - pool: SparseMap::new(), - } - } - pub fn get(&self, index: DefIndex) -> Option<&Block> { - self.pool.get(index) - } - pub fn create_block(&mut self, name: VarIndex, location: DefIndex) { - if self.pool.contains_key(location) { - panic!("Attempt to insert 2 blocks after the same instruction") - } - self.pool.insert(Block { location, name }); - } - pub fn is_empty(&self) -> bool { - self.pool.is_empty() - } -} - -// Implement IntoIterator such that we can iterate over blocks which are in the block pool. -impl<'a> IntoIterator for &'a BlockPool { - type Item = <&'a SparseMap as IntoIterator>::Item; - type IntoIter = <&'a SparseMap as IntoIterator>::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.pool.into_iter() - } -} - -#[derive(Clone, Debug)] -pub(crate) enum Literal { - /// A value of an enumerated immediate operand. - /// - /// Some immediate operand kinds like `intcc` and `floatcc` have an enumerated range of values - /// corresponding to a Rust enum type. An `Enumerator` object is an AST leaf node representing one - /// of the values. - Enumerator { - rust_type: &'static str, - value: &'static str, - }, - - /// A bitwise value of an immediate operand, used for bitwise exact floating point constants. - Bits { rust_type: &'static str, value: u64 }, - - /// A value of an integer immediate operand. - Int(i64), - - /// A empty list of variable set of arguments. - EmptyVarArgs, -} - -impl Literal { - pub fn enumerator_for(kind: &OperandKind, value: &'static str) -> Self { - let value = match &kind.fields { - OperandKindFields::ImmEnum(values) => values.get(value).unwrap_or_else(|| { - panic!( - "nonexistent value '{}' in enumeration '{}'", - value, kind.rust_type - ) - }), - _ => panic!("enumerator is for enum values"), - }; - Literal::Enumerator { - rust_type: kind.rust_type, - value, - } - } - - pub fn bits(kind: &OperandKind, bits: u64) -> Self { - match kind.fields { - OperandKindFields::ImmValue => {} - _ => panic!("bits_of is for immediate scalar types"), - } - Literal::Bits { - rust_type: kind.rust_type, - value: bits, - } - } - - pub fn constant(kind: &OperandKind, value: i64) -> Self { - match kind.fields { - OperandKindFields::ImmValue => {} - _ => panic!("constant is for immediate scalar types"), - } - Literal::Int(value) - } - - pub fn empty_vararg() -> Self { - Literal::EmptyVarArgs - } - - pub fn to_rust_code(&self) -> String { - match self { - Literal::Enumerator { rust_type, value } => format!("{}::{}", rust_type, value), - Literal::Bits { rust_type, value } => format!("{}::with_bits({:#x})", rust_type, value), - Literal::Int(val) => val.to_string(), - Literal::EmptyVarArgs => "&[]".into(), - } - } -} - -#[derive(Clone, Copy, Debug)] -pub(crate) enum PatternPosition { - Source, - Destination, -} - -/// A free variable. -/// -/// When variables are used in `XForms` with source and destination patterns, they are classified -/// as follows: -/// -/// Input values: Uses in the source pattern with no preceding def. These may appear as inputs in -/// the destination pattern too, but no new inputs can be introduced. -/// -/// Output values: Variables that are defined in both the source and destination pattern. These -/// values may have uses outside the source pattern, and the destination pattern must compute the -/// same value. -/// -/// Intermediate values: Values that are defined in the source pattern, but not in the destination -/// pattern. These may have uses outside the source pattern, so the defining instruction can't be -/// deleted immediately. -/// -/// Temporary values are defined only in the destination pattern. -pub(crate) struct Var { - pub name: String, - - /// The `Def` defining this variable in a source pattern. - pub src_def: Option, - - /// The `Def` defining this variable in a destination pattern. - pub dst_def: Option, - - /// TypeVar representing the type of this variable. - type_var: Option, - - /// Is this the original type variable, or has it be redefined with set_typevar? - is_original_type_var: bool, -} - -impl Var { - fn new(name: String) -> Self { - Self { - name, - src_def: None, - dst_def: None, - type_var: None, - is_original_type_var: false, - } - } - - /// Is this an input value to the src pattern? - pub fn is_input(&self) -> bool { - self.src_def.is_none() && self.dst_def.is_none() - } - - /// Is this an output value, defined in both src and dst patterns? - pub fn is_output(&self) -> bool { - self.src_def.is_some() && self.dst_def.is_some() - } - - /// Is this an intermediate value, defined only in the src pattern? - pub fn is_intermediate(&self) -> bool { - self.src_def.is_some() && self.dst_def.is_none() - } - - /// Is this a temp value, defined only in the dst pattern? - pub fn is_temp(&self) -> bool { - self.src_def.is_none() && self.dst_def.is_some() - } - - /// Get the def of this variable according to the position. - pub fn get_def(&self, position: PatternPosition) -> Option { - match position { - PatternPosition::Source => self.src_def, - PatternPosition::Destination => self.dst_def, - } - } - - pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) { - assert!( - self.get_def(position).is_none(), - "redefinition of variable {}", - self.name - ); - match position { - PatternPosition::Source => { - self.src_def = Some(def); - } - PatternPosition::Destination => { - self.dst_def = Some(def); - } - } - } - - /// Get the type variable representing the type of this variable. - pub fn get_or_create_typevar(&mut self) -> TypeVar { - match &self.type_var { - Some(tv) => tv.clone(), - None => { - // Create a new type var in which we allow all types. - let tv = TypeVar::new( - format!("typeof_{}", self.name), - format!("Type of the pattern variable {:?}", self), - TypeSetBuilder::all(), - ); - self.type_var = Some(tv.clone()); - self.is_original_type_var = true; - tv - } - } - } - pub fn get_typevar(&self) -> Option { - self.type_var.clone() - } - pub fn set_typevar(&mut self, tv: TypeVar) { - self.is_original_type_var = if let Some(previous_tv) = &self.type_var { - *previous_tv == tv - } else { - false - }; - self.type_var = Some(tv); - } - - /// Check if this variable has a free type variable. If not, the type of this variable is - /// computed from the type of another variable. - pub fn has_free_typevar(&self) -> bool { - match &self.type_var { - Some(tv) => tv.base.is_none() && self.is_original_type_var, - None => false, - } - } - - pub fn to_rust_code(&self) -> String { - self.name.clone() - } - fn rust_type(&self) -> String { - self.type_var.as_ref().unwrap().to_rust_code() - } -} - -impl fmt::Debug for Var { - fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { - fmt.write_fmt(format_args!( - "Var({}{}{})", - self.name, - if self.src_def.is_some() { ", src" } else { "" }, - if self.dst_def.is_some() { ", dst" } else { "" } - )) - } -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct VarIndex(u32); -entity_impl!(VarIndex); - -pub(crate) struct VarPool { - pool: PrimaryMap, -} - -impl VarPool { - pub fn new() -> Self { - Self { - pool: PrimaryMap::new(), - } - } - pub fn get(&self, index: VarIndex) -> &Var { - self.pool.get(index).unwrap() - } - pub fn get_mut(&mut self, index: VarIndex) -> &mut Var { - self.pool.get_mut(index).unwrap() - } - pub fn create(&mut self, name: impl Into) -> VarIndex { - self.pool.push(Var::new(name.into())) - } -} - -/// Contains constants created in the AST that must be inserted into the true [ConstantPool] when -/// the legalizer code is generated. The constant data is named in the order it is inserted; -/// inserting data using [insert] will avoid duplicates. -/// -/// [ConstantPool]: ../../../cranelift_codegen/ir/constant/struct.ConstantPool.html -/// [insert]: ConstPool::insert -pub(crate) struct ConstPool { - pool: Vec>, -} - -impl ConstPool { - /// Create an empty constant pool. - pub fn new() -> Self { - Self { pool: vec![] } - } - - /// Create a name for a constant from its position in the pool. - fn create_name(position: usize) -> String { - format!("const{}", position) - } - - /// Insert constant data into the pool, returning the name of the variable used to reference it. - /// This method will search for data that matches the new data and return the existing constant - /// name to avoid duplicates. - pub fn insert(&mut self, data: Vec) -> String { - let possible_position = self.pool.iter().position(|d| d == &data); - let position = if let Some(found_position) = possible_position { - found_position - } else { - let new_position = self.pool.len(); - self.pool.push(data); - new_position - }; - ConstPool::create_name(position) - } - - /// Iterate over the name/value pairs in the pool. - pub fn iter(&self) -> impl Iterator)> { - self.pool - .iter() - .enumerate() - .map(|(i, v)| (ConstPool::create_name(i), v)) - } -} - -/// Apply an instruction to arguments. -/// -/// An `Apply` AST expression is created by using function call syntax on instructions. This -/// applies to both bound and unbound polymorphic instructions. -pub(crate) struct Apply { - pub inst: Instruction, - pub args: Vec, - pub value_types: Vec, -} - -impl Apply { - pub fn new(target: InstSpec, args: Vec) -> Self { - let (inst, value_types) = match target { - InstSpec::Inst(inst) => (inst, Vec::new()), - InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types), - }; - - // Apply should only operate on concrete value types, not "any". - let value_types = value_types - .into_iter() - .map(|vt| vt.expect("shouldn't be Any")) - .collect(); - - // Basic check on number of arguments. - assert!( - inst.operands_in.len() == args.len(), - "incorrect number of arguments in instruction {}", - inst.name - ); - - // Check that the kinds of Literals arguments match the expected operand. - for &imm_index in &inst.imm_opnums { - let arg = &args[imm_index]; - if let Some(literal) = arg.maybe_literal() { - let op = &inst.operands_in[imm_index]; - match &op.kind.fields { - OperandKindFields::ImmEnum(values) => { - if let Literal::Enumerator { value, .. } = literal { - assert!( - values.iter().any(|(_key, v)| v == value), - "Nonexistent enum value '{}' passed to field of kind '{}' -- \ - did you use the right enum?", - value, - op.kind.rust_type - ); - } else { - panic!( - "Passed non-enum field value {:?} to field of kind {}", - literal, op.kind.rust_type - ); - } - } - OperandKindFields::ImmValue => match &literal { - Literal::Enumerator { value, .. } => panic!( - "Expected immediate value in immediate field of kind '{}', \ - obtained enum value '{}'", - op.kind.rust_type, value - ), - Literal::Bits { .. } | Literal::Int(_) | Literal::EmptyVarArgs => {} - }, - _ => { - panic!( - "Literal passed to non-literal field of kind {}", - op.kind.rust_type - ); - } - } - } - } - - Self { - inst, - args, - value_types, - } - } - - fn to_comment_string(&self, var_pool: &VarPool) -> String { - let args = self - .args - .iter() - .map(|arg| arg.to_rust_code(var_pool)) - .collect::>() - .join(", "); - - let mut inst_and_bound_types = vec![self.inst.name.to_string()]; - inst_and_bound_types.extend(self.value_types.iter().map(|vt| vt.to_string())); - let inst_name = inst_and_bound_types.join("."); - - format!("{}({})", inst_name, args) - } - - pub fn inst_predicate(&self, var_pool: &VarPool) -> InstructionPredicate { - let mut pred = InstructionPredicate::new(); - for (format_field, &op_num) in self - .inst - .format - .imm_fields - .iter() - .zip(self.inst.imm_opnums.iter()) - { - let arg = &self.args[op_num]; - if arg.maybe_var().is_some() { - // Ignore free variables for now. - continue; - } - pred = pred.and(InstructionPredicate::new_is_field_equal_ast( - &*self.inst.format, - format_field, - arg.to_rust_code(var_pool), - )); - } - - // Add checks for any bound secondary type variables. We can't check the controlling type - // variable this way since it may not appear as the type of an operand. - if self.value_types.len() > 1 { - let poly = self - .inst - .polymorphic_info - .as_ref() - .expect("must have polymorphic info if it has bounded types"); - for (bound_type, type_var) in - self.value_types[1..].iter().zip(poly.other_typevars.iter()) - { - pred = pred.and(InstructionPredicate::new_typevar_check( - &self.inst, type_var, bound_type, - )); - } - } - - pred - } - - /// Same as `inst_predicate()`, but also check the controlling type variable. - pub fn inst_predicate_with_ctrl_typevar(&self, var_pool: &VarPool) -> InstructionPredicate { - let mut pred = self.inst_predicate(var_pool); - - if !self.value_types.is_empty() { - let bound_type = &self.value_types[0]; - let poly = self.inst.polymorphic_info.as_ref().unwrap(); - let type_check = if poly.use_typevar_operand { - InstructionPredicate::new_typevar_check(&self.inst, &poly.ctrl_typevar, bound_type) - } else { - InstructionPredicate::new_ctrl_typevar_check(&bound_type) - }; - pred = pred.and(type_check); - } - - pred - } - - pub fn rust_builder(&self, defined_vars: &[VarIndex], var_pool: &VarPool) -> String { - let mut args = self - .args - .iter() - .map(|expr| expr.to_rust_code(var_pool)) - .collect::>() - .join(", "); - - // Do we need to pass an explicit type argument? - if let Some(poly) = &self.inst.polymorphic_info { - if !poly.use_typevar_operand { - args = format!("{}, {}", var_pool.get(defined_vars[0]).rust_type(), args); - } - } - - format!("{}({})", self.inst.snake_name(), args) - } -} - -// Simple helpers for legalize actions construction. - -pub(crate) enum DummyExpr { - Var(DummyVar), - Literal(Literal), - Constant(DummyConstant), - Apply(InstSpec, Vec), - Block(DummyVar), -} - -#[derive(Clone)] -pub(crate) struct DummyVar { - pub name: String, -} - -impl Into for DummyVar { - fn into(self) -> DummyExpr { - DummyExpr::Var(self) - } -} -impl Into for Literal { - fn into(self) -> DummyExpr { - DummyExpr::Literal(self) - } -} - -#[derive(Clone)] -pub(crate) struct DummyConstant(pub(crate) Vec); - -pub(crate) fn constant(data: Vec) -> DummyConstant { - DummyConstant(data) -} - -impl Into for DummyConstant { - fn into(self) -> DummyExpr { - DummyExpr::Constant(self) - } -} - -pub(crate) fn var(name: &str) -> DummyVar { - DummyVar { - name: name.to_owned(), - } -} - -pub(crate) struct DummyDef { - pub expr: DummyExpr, - pub defined_vars: Vec, -} - -pub(crate) struct ExprBuilder { - expr: DummyExpr, -} - -impl ExprBuilder { - pub fn apply(inst: InstSpec, args: Vec) -> Self { - let expr = DummyExpr::Apply(inst, args); - Self { expr } - } - - pub fn assign_to(self, defined_vars: Vec) -> DummyDef { - DummyDef { - expr: self.expr, - defined_vars, - } - } - - pub fn block(name: DummyVar) -> Self { - let expr = DummyExpr::Block(name); - Self { expr } - } -} - -macro_rules! def_rhs { - // inst(a, b, c) - ($inst:ident($($src:expr),*)) => { - ExprBuilder::apply($inst.into(), vec![$($src.clone().into()),*]) - }; - - // inst.type(a, b, c) - ($inst:ident.$type:ident($($src:expr),*)) => { - ExprBuilder::apply($inst.bind($type).into(), vec![$($src.clone().into()),*]) - }; -} - -// Helper macro to define legalization recipes. -macro_rules! def { - // x = ... - ($dest:ident = $($tt:tt)*) => { - def_rhs!($($tt)*).assign_to(vec![$dest.clone()]) - }; - - // (x, y, ...) = ... - (($($dest:ident),*) = $($tt:tt)*) => { - def_rhs!($($tt)*).assign_to(vec![$($dest.clone()),*]) - }; - - // An instruction with no results. - ($($tt:tt)*) => { - def_rhs!($($tt)*).assign_to(Vec::new()) - } -} - -// Helper macro to define legalization recipes. -macro_rules! block { - // a basic block definition, splitting the current block in 2. - ($block: ident) => { - ExprBuilder::block($block).assign_to(Vec::new()) - }; -} - -#[cfg(test)] -mod tests { - use crate::cdsl::ast::ConstPool; - - #[test] - fn const_pool_returns_var_names() { - let mut c = ConstPool::new(); - assert_eq!(c.insert([0, 1, 2].to_vec()), "const0"); - assert_eq!(c.insert([1, 2, 3].to_vec()), "const1"); - } - - #[test] - fn const_pool_avoids_duplicates() { - let data = [0, 1, 2].to_vec(); - let mut c = ConstPool::new(); - assert_eq!(c.pool.len(), 0); - - assert_eq!(c.insert(data.clone()), "const0"); - assert_eq!(c.pool.len(), 1); - - assert_eq!(c.insert(data), "const0"); - assert_eq!(c.pool.len(), 1); - } - - #[test] - fn const_pool_iterates() { - let mut c = ConstPool::new(); - c.insert([0, 1, 2].to_vec()); - c.insert([3, 4, 5].to_vec()); - - let mut iter = c.iter(); - assert_eq!(iter.next(), Some(("const0".to_owned(), &vec![0, 1, 2]))); - assert_eq!(iter.next(), Some(("const1".to_owned(), &vec![3, 4, 5]))); - assert_eq!(iter.next(), None); - } -} diff --git a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs b/cranelift/codegen/meta/src/cdsl/cpu_modes.rs deleted file mode 100644 index 7d119b00ce..0000000000 --- a/cranelift/codegen/meta/src/cdsl/cpu_modes.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::collections::{hash_map, HashMap, HashSet}; -use std::iter::FromIterator; - -use crate::cdsl::encodings::Encoding; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::cdsl::xform::{TransformGroup, TransformGroupIndex}; - -pub(crate) struct CpuMode { - pub name: &'static str, - default_legalize: Option, - monomorphic_legalize: Option, - typed_legalize: HashMap, - pub encodings: Vec, -} - -impl CpuMode { - pub fn new(name: &'static str) -> Self { - Self { - name, - default_legalize: None, - monomorphic_legalize: None, - typed_legalize: HashMap::new(), - encodings: Vec::new(), - } - } - - pub fn set_encodings(&mut self, encodings: Vec) { - assert!(self.encodings.is_empty(), "clobbering encodings"); - self.encodings = encodings; - } - - pub fn legalize_monomorphic(&mut self, group: &TransformGroup) { - assert!(self.monomorphic_legalize.is_none()); - self.monomorphic_legalize = Some(group.id); - } - pub fn legalize_default(&mut self, group: &TransformGroup) { - assert!(self.default_legalize.is_none()); - self.default_legalize = Some(group.id); - } - pub fn legalize_value_type(&mut self, lane_type: impl Into, group: &TransformGroup) { - assert!(self - .typed_legalize - .insert(lane_type.into(), group.id) - .is_none()); - } - pub fn legalize_type(&mut self, lane_type: impl Into, group: &TransformGroup) { - assert!(self - .typed_legalize - .insert(lane_type.into().into(), group.id) - .is_none()); - } - - pub fn get_default_legalize_code(&self) -> TransformGroupIndex { - self.default_legalize - .expect("a finished CpuMode must have a default legalize code") - } - pub fn get_legalize_code_for(&self, typ: &Option) -> TransformGroupIndex { - match typ { - Some(typ) => self - .typed_legalize - .get(typ) - .copied() - .unwrap_or_else(|| self.get_default_legalize_code()), - None => self - .monomorphic_legalize - .unwrap_or_else(|| self.get_default_legalize_code()), - } - } - pub fn get_legalized_types(&self) -> hash_map::Keys { - self.typed_legalize.keys() - } - - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly - /// reachable set of TransformGroup this TargetIsa uses. - pub fn direct_transform_groups(&self) -> Vec { - let mut set = HashSet::new(); - if let Some(i) = &self.default_legalize { - set.insert(*i); - } - if let Some(i) = &self.monomorphic_legalize { - set.insert(*i); - } - set.extend(self.typed_legalize.values().cloned()); - let mut ret = Vec::from_iter(set); - ret.sort(); - ret - } -} diff --git a/cranelift/codegen/meta/src/cdsl/encodings.rs b/cranelift/codegen/meta/src/cdsl/encodings.rs deleted file mode 100644 index 4d11beb206..0000000000 --- a/cranelift/codegen/meta/src/cdsl/encodings.rs +++ /dev/null @@ -1,178 +0,0 @@ -use crate::cdsl::instructions::{ - InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode, - InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny, -}; -use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::SettingPredicateNumber; -use crate::cdsl::types::ValueType; -use std::rc::Rc; -use std::string::ToString; - -/// Encoding for a concrete instruction. -/// -/// An `Encoding` object ties an instruction opcode with concrete type variables together with an -/// encoding recipe and encoding encbits. -/// -/// The concrete instruction can be in three different forms: -/// -/// 1. A naked opcode: `trap` for non-polymorphic instructions. -/// 2. With bound type variables: `iadd.i32` for polymorphic instructions. -/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`. -/// -/// If the instruction is polymorphic, all type variables must be provided. -pub(crate) struct EncodingContent { - /// The `Instruction` or `BoundInstruction` being encoded. - inst: InstSpec, - - /// The `EncodingRecipe` to use. - pub recipe: EncodingRecipeNumber, - - /// Additional encoding bits to be interpreted by `recipe`. - pub encbits: u16, - - /// An instruction predicate that must be true to allow selecting this encoding. - pub inst_predicate: Option, - - /// An ISA predicate that must be true to allow selecting this encoding. - pub isa_predicate: Option, - - /// The value type this encoding has been bound to, for encodings of polymorphic instructions. - pub bound_type: Option, -} - -impl EncodingContent { - pub fn inst(&self) -> &Instruction { - self.inst.inst() - } - pub fn to_rust_comment(&self, recipes: &Recipes) -> String { - format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits) - } -} - -pub(crate) type Encoding = Rc; - -pub(crate) struct EncodingBuilder { - inst: InstSpec, - recipe: EncodingRecipeNumber, - encbits: u16, - inst_predicate: Option, - isa_predicate: Option, - bound_type: Option, -} - -impl EncodingBuilder { - pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self { - let (inst_predicate, bound_type) = match &inst { - InstSpec::Bound(inst) => { - let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars; - - assert_eq!( - inst.value_types.len(), - other_typevars.len() + 1, - "partially bound polymorphic instruction" - ); - - // Add secondary type variables to the instruction predicate. - let value_types = &inst.value_types; - let mut inst_predicate: Option = None; - for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) { - let value_type = match value_type { - ValueTypeOrAny::Any => continue, - ValueTypeOrAny::ValueType(vt) => vt, - }; - let type_predicate = - InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type); - inst_predicate = Some(type_predicate.into()); - } - - // Add immediate value predicates - for (immediate_value, immediate_operand) in inst - .immediate_values - .iter() - .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate())) - { - let immediate_predicate = InstructionPredicate::new_is_field_equal( - &inst.inst.format, - immediate_operand.kind.rust_field_name, - immediate_value.to_string(), - ); - inst_predicate = if let Some(type_predicate) = inst_predicate { - Some(type_predicate.and(immediate_predicate)) - } else { - Some(immediate_predicate.into()) - } - } - - let ctrl_type = value_types[0] - .clone() - .expect("Controlling type shouldn't be Any"); - (inst_predicate, Some(ctrl_type)) - } - - InstSpec::Inst(inst) => { - assert!( - inst.polymorphic_info.is_none(), - "unbound polymorphic instruction" - ); - (None, None) - } - }; - - Self { - inst, - recipe, - encbits, - inst_predicate, - isa_predicate: None, - bound_type, - } - } - - pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self { - let inst_predicate = Some(match self.inst_predicate { - Some(node) => node.and(inst_predicate), - None => inst_predicate.into(), - }); - self.inst_predicate = inst_predicate; - self - } - - pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self { - assert!(self.isa_predicate.is_none()); - self.isa_predicate = Some(isa_predicate); - self - } - - pub fn build( - self, - recipes: &Recipes, - inst_pred_reg: &mut InstructionPredicateRegistry, - ) -> Encoding { - let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred)); - - let inst = self.inst.inst(); - assert!( - Rc::ptr_eq(&inst.format, &recipes[self.recipe].format), - "Inst {} and recipe {} must have the same format!", - inst.name, - recipes[self.recipe].name - ); - - assert_eq!( - inst.is_branch && !inst.is_indirect_branch, - recipes[self.recipe].branch_range.is_some(), - "Inst {}'s is_branch contradicts recipe {} branch_range!", - inst.name, - recipes[self.recipe].name - ); - - Rc::new(EncodingContent { - inst: self.inst, - recipe: self.recipe, - encbits: self.encbits, - inst_predicate, - isa_predicate: self.isa_predicate, - bound_type: self.bound_type, - }) - } -} diff --git a/cranelift/codegen/meta/src/cdsl/formats.rs b/cranelift/codegen/meta/src/cdsl/formats.rs index e713a8bccb..d445a4ed7e 100644 --- a/cranelift/codegen/meta/src/cdsl/formats.rs +++ b/cranelift/codegen/meta/src/cdsl/formats.rs @@ -70,18 +70,6 @@ impl fmt::Display for InstructionFormat { } impl InstructionFormat { - pub fn imm_by_name(&self, name: &'static str) -> &FormatField { - self.imm_fields - .iter() - .find(|&field| field.member == name) - .unwrap_or_else(|| { - panic!( - "unexpected immediate field named {} in instruction format {}", - name, self.name - ) - }) - } - /// Returns a tuple that uniquely identifies the structure. pub fn structure(&self) -> FormatStructure { FormatStructure { diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index 489217033a..9db2bfba81 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -1,21 +1,14 @@ -use cranelift_codegen_shared::condcodes::IntCC; use cranelift_entity::{entity_impl, PrimaryMap}; -use std::collections::HashMap; use std::fmt; -use std::fmt::{Display, Error, Formatter}; use std::rc::Rc; use crate::cdsl::camel_case; -use crate::cdsl::formats::{FormatField, InstructionFormat}; +use crate::cdsl::formats::InstructionFormat; use crate::cdsl::operands::Operand; use crate::cdsl::type_inference::Constraint; -use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType}; use crate::cdsl::typevar::TypeVar; -use crate::shared::formats::Formats; -use crate::shared::types::{Bool, Float, Int, Reference}; - #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct OpcodeNumber(u32); entity_impl!(OpcodeNumber); @@ -24,61 +17,24 @@ pub(crate) type AllInstructions = PrimaryMap; pub(crate) struct InstructionGroupBuilder<'all_inst> { all_instructions: &'all_inst mut AllInstructions, - own_instructions: Vec, } impl<'all_inst> InstructionGroupBuilder<'all_inst> { pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self { - Self { - all_instructions, - own_instructions: Vec::new(), - } + Self { all_instructions } } pub fn push(&mut self, builder: InstructionBuilder) { let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32()); let inst = builder.build(opcode_number); - // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent. - self.own_instructions.push(inst.clone()); self.all_instructions.push(inst); } - - pub fn build(self) -> InstructionGroup { - InstructionGroup { - instructions: self.own_instructions, - } - } -} - -/// Every instruction must belong to exactly one instruction group. A given -/// target architecture can support instructions from multiple groups, and it -/// does not necessarily support all instructions in a group. -pub(crate) struct InstructionGroup { - instructions: Vec, -} - -impl InstructionGroup { - pub fn by_name(&self, name: &'static str) -> &Instruction { - self.instructions - .iter() - .find(|inst| inst.name == name) - .unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name)) - } -} - -/// Instructions can have parameters bound to them to specialize them for more specific encodings -/// (e.g. the encoding for adding two float types may be different than that of adding two -/// integer types) -pub(crate) trait Bindable { - /// Bind a parameter to an instruction - fn bind(&self, parameter: impl Into) -> BoundInstruction; } #[derive(Debug)] pub(crate) struct PolymorphicInfo { pub use_typevar_operand: bool, pub ctrl_typevar: TypeVar, - pub other_typevars: Vec, } #[derive(Debug)] @@ -95,8 +51,6 @@ pub(crate) struct InstructionContent { pub operands_in: Vec, /// Output operands. The output operands must be SSA values or `variable_args`. pub operands_out: Vec, - /// Instruction-specific TypeConstraints. - pub constraints: Vec, /// Instruction format, automatically derived from the input operands. pub format: Rc, @@ -146,27 +100,10 @@ impl InstructionContent { &self.name } } - - pub fn all_typevars(&self) -> Vec<&TypeVar> { - match &self.polymorphic_info { - Some(poly) => { - let mut result = vec![&poly.ctrl_typevar]; - result.extend(&poly.other_typevars); - result - } - None => Vec::new(), - } - } } pub(crate) type Instruction = Rc; -impl Bindable for Instruction { - fn bind(&self, parameter: impl Into) -> BoundInstruction { - BoundInstruction::new(self).bind(parameter) - } -} - impl fmt::Display for InstructionContent { fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { if !self.operands_out.is_empty() { @@ -317,11 +254,6 @@ impl InstructionBuilder { self } - pub fn clobbers_all_regs(mut self, val: bool) -> Self { - self.clobbers_all_regs = val; - self - } - fn build(self, opcode_number: OpcodeNumber) -> Instruction { let operands_in = self.operands_in.unwrap_or_else(Vec::new); let operands_out = self.operands_out.unwrap_or_else(Vec::new); @@ -361,7 +293,6 @@ impl InstructionBuilder { doc: self.doc, operands_in, operands_out, - constraints: self.constraints.unwrap_or_else(Vec::new), format: self.format, polymorphic_info, value_opnums, @@ -383,181 +314,6 @@ impl InstructionBuilder { } } -/// A thin wrapper like Option, but with more precise semantics. -#[derive(Clone)] -pub(crate) enum ValueTypeOrAny { - ValueType(ValueType), - Any, -} - -impl ValueTypeOrAny { - pub fn expect(self, msg: &str) -> ValueType { - match self { - ValueTypeOrAny::ValueType(vt) => vt, - ValueTypeOrAny::Any => panic!("Unexpected Any: {}", msg), - } - } -} - -/// The number of bits in the vector -type VectorBitWidth = u64; - -/// An parameter used for binding instructions to specific types or values -pub(crate) enum BindParameter { - Any, - Lane(LaneType), - Vector(LaneType, VectorBitWidth), - Reference(ReferenceType), - Immediate(Immediate), -} - -/// Constructor for more easily building vector parameters from any lane type -pub(crate) fn vector(parameter: impl Into, vector_size: VectorBitWidth) -> BindParameter { - BindParameter::Vector(parameter.into(), vector_size) -} - -impl From for BindParameter { - fn from(ty: Int) -> Self { - BindParameter::Lane(ty.into()) - } -} - -impl From for BindParameter { - fn from(ty: Bool) -> Self { - BindParameter::Lane(ty.into()) - } -} - -impl From for BindParameter { - fn from(ty: Float) -> Self { - BindParameter::Lane(ty.into()) - } -} - -impl From for BindParameter { - fn from(ty: LaneType) -> Self { - BindParameter::Lane(ty) - } -} - -impl From for BindParameter { - fn from(ty: Reference) -> Self { - BindParameter::Reference(ty.into()) - } -} - -impl From for BindParameter { - fn from(imm: Immediate) -> Self { - BindParameter::Immediate(imm) - } -} - -#[derive(Clone)] -pub(crate) enum Immediate { - // When needed, this enum should be expanded to include other immediate types (e.g. u8, u128). - IntCC(IntCC), -} - -impl Display for Immediate { - fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { - match self { - Immediate::IntCC(x) => write!(f, "IntCC::{:?}", x), - } - } -} - -#[derive(Clone)] -pub(crate) struct BoundInstruction { - pub inst: Instruction, - pub value_types: Vec, - pub immediate_values: Vec, -} - -impl BoundInstruction { - /// Construct a new bound instruction (with nothing bound yet) from an instruction - fn new(inst: &Instruction) -> Self { - BoundInstruction { - inst: inst.clone(), - value_types: vec![], - immediate_values: vec![], - } - } - - /// Verify that the bindings for a BoundInstruction are correct. - fn verify_bindings(&self) -> Result<(), String> { - // Verify that binding types to the instruction does not violate the polymorphic rules. - if !self.value_types.is_empty() { - match &self.inst.polymorphic_info { - Some(poly) => { - if self.value_types.len() > 1 + poly.other_typevars.len() { - return Err(format!( - "trying to bind too many types for {}", - self.inst.name - )); - } - } - None => { - return Err(format!( - "trying to bind a type for {} which is not a polymorphic instruction", - self.inst.name - )); - } - } - } - - // Verify that only the right number of immediates are bound. - let immediate_count = self - .inst - .operands_in - .iter() - .filter(|o| o.is_immediate_or_entityref()) - .count(); - if self.immediate_values.len() > immediate_count { - return Err(format!( - "trying to bind too many immediates ({}) to instruction {} which only expects {} \ - immediates", - self.immediate_values.len(), - self.inst.name, - immediate_count - )); - } - - Ok(()) - } -} - -impl Bindable for BoundInstruction { - fn bind(&self, parameter: impl Into) -> BoundInstruction { - let mut modified = self.clone(); - match parameter.into() { - BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any), - BindParameter::Lane(lane_type) => modified - .value_types - .push(ValueTypeOrAny::ValueType(lane_type.into())), - BindParameter::Vector(lane_type, vector_size_in_bits) => { - let num_lanes = vector_size_in_bits / lane_type.lane_bits(); - assert!( - num_lanes >= 2, - "Minimum lane number for bind_vector is 2, found {}.", - num_lanes, - ); - let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); - modified - .value_types - .push(ValueTypeOrAny::ValueType(vector_type)); - } - BindParameter::Reference(reference_type) => { - modified - .value_types - .push(ValueTypeOrAny::ValueType(reference_type.into())); - } - BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate), - } - modified.verify_bindings().unwrap(); - modified - } -} - /// Checks that the input operands actually match the given format. fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionFormat) { // A format is defined by: @@ -644,11 +400,10 @@ fn verify_polymorphic( || tv.singleton_type().is_some() { match is_ctrl_typevar_candidate(tv, &operands_in, &operands_out) { - Ok(other_typevars) => { + Ok(_other_typevars) => { return Some(PolymorphicInfo { use_typevar_operand: true, ctrl_typevar: tv.clone(), - other_typevars, }); } Err(error_message) => { @@ -679,12 +434,11 @@ fn verify_polymorphic( // At this point, if the next unwrap() fails, it means the output type couldn't be used as a // controlling type variable either; panicking is the right behavior. - let other_typevars = is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap(); + is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap(); Some(PolymorphicInfo { use_typevar_operand: false, ctrl_typevar: tv.clone(), - other_typevars, }) } @@ -762,634 +516,3 @@ fn is_ctrl_typevar_candidate( Ok(other_typevars) } - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) enum FormatPredicateKind { - /// Is the field member equal to the expected value (stored here)? - IsEqual(String), - - /// Is the immediate instruction format field representable as an n-bit two's complement - /// integer? (with width: first member, scale: second member). - /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a - /// multiple of `2^scale`. - IsSignedInt(usize, usize), - - /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with - /// width: first member, scale: second member). - /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of - /// `2^scale`. - IsUnsignedInt(usize, usize), - - /// Is the immediate format field member an integer equal to zero? - IsZeroInt, - /// Is the immediate format field member equal to zero? (float32 version) - IsZero32BitFloat, - - /// Is the immediate format field member equal to zero? (float64 version) - IsZero64BitFloat, - - /// Is the immediate format field member equal zero in all lanes? - IsAllZeroes, - - /// Does the immediate format field member have ones in all bits of all lanes? - IsAllOnes, - - /// Has the value list (in member_name) the size specified in parameter? - LengthEquals(usize), - - /// Is the referenced function colocated? - IsColocatedFunc, - - /// Is the referenced data object colocated? - IsColocatedData, -} - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) struct FormatPredicateNode { - format_name: &'static str, - member_name: &'static str, - kind: FormatPredicateKind, -} - -impl FormatPredicateNode { - fn new( - format: &InstructionFormat, - field_name: &'static str, - kind: FormatPredicateKind, - ) -> Self { - let member_name = format.imm_by_name(field_name).member; - Self { - format_name: format.name, - member_name, - kind, - } - } - - fn new_raw( - format: &InstructionFormat, - member_name: &'static str, - kind: FormatPredicateKind, - ) -> Self { - Self { - format_name: format.name, - member_name, - kind, - } - } - - fn destructuring_member_name(&self) -> &'static str { - match &self.kind { - FormatPredicateKind::LengthEquals(_) => { - // Length operates on the argument value list. - assert!(self.member_name == "args"); - "ref args" - } - _ => self.member_name, - } - } - - fn rust_predicate(&self) -> String { - match &self.kind { - FormatPredicateKind::IsEqual(arg) => { - format!("predicates::is_equal({}, {})", self.member_name, arg) - } - FormatPredicateKind::IsSignedInt(width, scale) => format!( - "predicates::is_signed_int({}, {}, {})", - self.member_name, width, scale - ), - FormatPredicateKind::IsUnsignedInt(width, scale) => format!( - "predicates::is_unsigned_int({}, {}, {})", - self.member_name, width, scale - ), - FormatPredicateKind::IsZeroInt => { - format!("predicates::is_zero_int({})", self.member_name) - } - FormatPredicateKind::IsZero32BitFloat => { - format!("predicates::is_zero_32_bit_float({})", self.member_name) - } - FormatPredicateKind::IsZero64BitFloat => { - format!("predicates::is_zero_64_bit_float({})", self.member_name) - } - FormatPredicateKind::IsAllZeroes => format!( - "predicates::is_all_zeroes(func.dfg.constants.get({}))", - self.member_name - ), - FormatPredicateKind::IsAllOnes => format!( - "predicates::is_all_ones(func.dfg.constants.get({}))", - self.member_name - ), - FormatPredicateKind::LengthEquals(num) => format!( - "predicates::has_length_of({}, {}, func)", - self.member_name, num - ), - FormatPredicateKind::IsColocatedFunc => { - format!("predicates::is_colocated_func({}, func)", self.member_name,) - } - FormatPredicateKind::IsColocatedData => { - format!("predicates::is_colocated_data({}, func)", self.member_name) - } - } - } -} - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) enum TypePredicateNode { - /// Is the value argument (at the index designated by the first member) the same type as the - /// type name (second member)? - TypeVarCheck(usize, String), - - /// Is the controlling type variable the same type as the one designated by the type name - /// (only member)? - CtrlTypeVarCheck(String), -} - -impl TypePredicateNode { - fn rust_predicate(&self, func_str: &str) -> String { - match self { - TypePredicateNode::TypeVarCheck(index, value_type_name) => format!( - "{}.dfg.value_type(args[{}]) == {}", - func_str, index, value_type_name - ), - TypePredicateNode::CtrlTypeVarCheck(value_type_name) => { - format!("{}.dfg.ctrl_typevar(inst) == {}", func_str, value_type_name) - } - } - } -} - -/// A basic node in an instruction predicate: either an atom, or an AND of two conditions. -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) enum InstructionPredicateNode { - FormatPredicate(FormatPredicateNode), - - TypePredicate(TypePredicateNode), - - /// An AND-combination of two or more other predicates. - And(Vec), - - /// An OR-combination of two or more other predicates. - Or(Vec), -} - -impl InstructionPredicateNode { - fn rust_predicate(&self, func_str: &str) -> String { - match self { - InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(), - InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(func_str), - InstructionPredicateNode::And(nodes) => nodes - .iter() - .map(|x| x.rust_predicate(func_str)) - .collect::>() - .join(" && "), - InstructionPredicateNode::Or(nodes) => nodes - .iter() - .map(|x| x.rust_predicate(func_str)) - .collect::>() - .join(" || "), - } - } - - pub fn format_destructuring_member_name(&self) -> &str { - match self { - InstructionPredicateNode::FormatPredicate(format_pred) => { - format_pred.destructuring_member_name() - } - _ => panic!("Only for leaf format predicates"), - } - } - - pub fn format_name(&self) -> &str { - match self { - InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name, - _ => panic!("Only for leaf format predicates"), - } - } - - pub fn is_type_predicate(&self) -> bool { - match self { - InstructionPredicateNode::FormatPredicate(_) - | InstructionPredicateNode::And(_) - | InstructionPredicateNode::Or(_) => false, - InstructionPredicateNode::TypePredicate(_) => true, - } - } - - fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { - let mut ret = Vec::new(); - match self { - InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => { - for node in nodes { - ret.extend(node.collect_leaves()); - } - } - _ => ret.push(self), - } - ret - } -} - -#[derive(Clone, Hash, PartialEq, Eq)] -pub(crate) struct InstructionPredicate { - node: Option, -} - -impl Into for InstructionPredicateNode { - fn into(self) -> InstructionPredicate { - InstructionPredicate { node: Some(self) } - } -} - -impl InstructionPredicate { - pub fn new() -> Self { - Self { node: None } - } - - pub fn unwrap(self) -> InstructionPredicateNode { - self.node.unwrap() - } - - pub fn new_typevar_check( - inst: &Instruction, - type_var: &TypeVar, - value_type: &ValueType, - ) -> InstructionPredicateNode { - let index = inst - .value_opnums - .iter() - .enumerate() - .find(|(_, &op_num)| inst.operands_in[op_num].type_var().unwrap() == type_var) - .unwrap() - .0; - InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck( - index, - value_type.rust_name(), - )) - } - - pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode { - InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck( - value_type.rust_name(), - )) - } - - pub fn new_is_field_equal( - format: &InstructionFormat, - field_name: &'static str, - imm_value: String, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsEqual(imm_value), - )) - } - - /// Used only for the AST module, which directly passes in the format field. - pub fn new_is_field_equal_ast( - format: &InstructionFormat, - field: &FormatField, - imm_value: String, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( - format, - field.member, - FormatPredicateKind::IsEqual(imm_value), - )) - } - - pub fn new_is_signed_int( - format: &InstructionFormat, - field_name: &'static str, - width: usize, - scale: usize, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsSignedInt(width, scale), - )) - } - - pub fn new_is_unsigned_int( - format: &InstructionFormat, - field_name: &'static str, - width: usize, - scale: usize, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsUnsignedInt(width, scale), - )) - } - - pub fn new_is_zero_int( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsZeroInt, - )) - } - - pub fn new_is_zero_32bit_float( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsZero32BitFloat, - )) - } - - pub fn new_is_zero_64bit_float( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsZero64BitFloat, - )) - } - - pub fn new_is_all_zeroes( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsAllZeroes, - )) - } - - pub fn new_is_all_ones( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsAllOnes, - )) - } - - pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode { - assert!( - format.has_value_list, - "the format must be variadic in number of arguments" - ); - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( - format, - "args", - FormatPredicateKind::LengthEquals(size), - )) - } - - pub fn new_is_colocated_func( - format: &InstructionFormat, - field_name: &'static str, - ) -> InstructionPredicateNode { - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - format, - field_name, - FormatPredicateKind::IsColocatedFunc, - )) - } - - pub fn new_is_colocated_data(formats: &Formats) -> InstructionPredicateNode { - let format = &formats.unary_global_value; - InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( - &*format, - "global_value", - FormatPredicateKind::IsColocatedData, - )) - } - - pub fn and(mut self, new_node: InstructionPredicateNode) -> Self { - let node = self.node; - let mut and_nodes = match node { - Some(node) => match node { - InstructionPredicateNode::And(nodes) => nodes, - InstructionPredicateNode::Or(_) => { - panic!("Can't mix and/or without implementing operator precedence!") - } - _ => vec![node], - }, - _ => Vec::new(), - }; - and_nodes.push(new_node); - self.node = Some(InstructionPredicateNode::And(and_nodes)); - self - } - - pub fn or(mut self, new_node: InstructionPredicateNode) -> Self { - let node = self.node; - let mut or_nodes = match node { - Some(node) => match node { - InstructionPredicateNode::Or(nodes) => nodes, - InstructionPredicateNode::And(_) => { - panic!("Can't mix and/or without implementing operator precedence!") - } - _ => vec![node], - }, - _ => Vec::new(), - }; - or_nodes.push(new_node); - self.node = Some(InstructionPredicateNode::Or(or_nodes)); - self - } - - pub fn rust_predicate(&self, func_str: &str) -> Option { - self.node.as_ref().map(|root| root.rust_predicate(func_str)) - } - - /// Returns the type predicate if this is one, or None otherwise. - pub fn type_predicate(&self, func_str: &str) -> Option { - let node = self.node.as_ref().unwrap(); - if node.is_type_predicate() { - Some(node.rust_predicate(func_str)) - } else { - None - } - } - - /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening - /// AND/OR). - pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { - self.node.as_ref().unwrap().collect_leaves() - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct InstructionPredicateNumber(u32); -entity_impl!(InstructionPredicateNumber); - -pub(crate) type InstructionPredicateMap = - PrimaryMap; - -/// A registry of predicates to help deduplicating them, during Encodings construction. When the -/// construction process is over, it needs to be extracted with `extract` and associated to the -/// TargetIsa. -pub(crate) struct InstructionPredicateRegistry { - /// Maps a predicate number to its actual predicate. - map: InstructionPredicateMap, - - /// Inverse map: maps a predicate to its predicate number. This is used before inserting a - /// predicate, to check whether it already exists. - inverted_map: HashMap, -} - -impl InstructionPredicateRegistry { - pub fn new() -> Self { - Self { - map: PrimaryMap::new(), - inverted_map: HashMap::new(), - } - } - pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber { - match self.inverted_map.get(&predicate) { - Some(&found) => found, - None => { - let key = self.map.push(predicate.clone()); - self.inverted_map.insert(predicate, key); - key - } - } - } - pub fn extract(self) -> InstructionPredicateMap { - self.map - } -} - -/// An instruction specification, containing an instruction that has bound types or not. -pub(crate) enum InstSpec { - Inst(Instruction), - Bound(BoundInstruction), -} - -impl InstSpec { - pub fn inst(&self) -> &Instruction { - match &self { - InstSpec::Inst(inst) => inst, - InstSpec::Bound(bound_inst) => &bound_inst.inst, - } - } -} - -impl Bindable for InstSpec { - fn bind(&self, parameter: impl Into) -> BoundInstruction { - match self { - InstSpec::Inst(inst) => inst.bind(parameter.into()), - InstSpec::Bound(inst) => inst.bind(parameter.into()), - } - } -} - -impl Into for &Instruction { - fn into(self) -> InstSpec { - InstSpec::Inst(self.clone()) - } -} - -impl Into for BoundInstruction { - fn into(self) -> InstSpec { - InstSpec::Bound(self) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::cdsl::formats::InstructionFormatBuilder; - use crate::cdsl::operands::{OperandKind, OperandKindFields}; - use crate::cdsl::typevar::TypeSetBuilder; - use crate::shared::types::Int::{I32, I64}; - - fn field_to_operand(index: usize, field: OperandKindFields) -> Operand { - // Pretend the index string is &'static. - let name = Box::leak(index.to_string().into_boxed_str()); - // Format's name / rust_type don't matter here. - let kind = OperandKind::new(name, name, field); - let operand = Operand::new(name, kind); - operand - } - - fn field_to_operands(types: Vec) -> Vec { - types - .iter() - .enumerate() - .map(|(i, f)| field_to_operand(i, f.clone())) - .collect() - } - - fn build_fake_instruction( - inputs: Vec, - outputs: Vec, - ) -> Instruction { - // Setup a format from the input operands. - let mut format = InstructionFormatBuilder::new("fake"); - for (i, f) in inputs.iter().enumerate() { - match f { - OperandKindFields::TypeVar(_) => format = format.value(), - OperandKindFields::ImmValue => { - format = format.imm(&field_to_operand(i, f.clone()).kind) - } - _ => {} - }; - } - let format = format.build(); - - // Create the fake instruction. - InstructionBuilder::new("fake", "A fake instruction for testing.", &format) - .operands_in(field_to_operands(inputs).iter().collect()) - .operands_out(field_to_operands(outputs).iter().collect()) - .build(OpcodeNumber(42)) - } - - #[test] - fn ensure_bound_instructions_can_bind_lane_types() { - let type1 = TypeSetBuilder::new().ints(8..64).build(); - let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); - let inst = build_fake_instruction(vec![in1], vec![]); - inst.bind(LaneType::Int(I32)); - } - - #[test] - fn ensure_bound_instructions_can_bind_immediates() { - let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); - let bound_inst = inst.bind(Immediate::IntCC(IntCC::Equal)); - assert!(bound_inst.verify_bindings().is_ok()); - } - - #[test] - #[should_panic] - fn ensure_instructions_fail_to_bind() { - let inst = build_fake_instruction(vec![], vec![]); - inst.bind(BindParameter::Lane(LaneType::Int(I32))); - // Trying to bind to an instruction with no inputs should fail. - } - - #[test] - #[should_panic] - fn ensure_bound_instructions_fail_to_bind_too_many_types() { - let type1 = TypeSetBuilder::new().ints(8..64).build(); - let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); - let inst = build_fake_instruction(vec![in1], vec![]); - inst.bind(LaneType::Int(I32)).bind(LaneType::Int(I64)); - } - - #[test] - #[should_panic] - fn ensure_instructions_fail_to_bind_too_many_immediates() { - let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); - inst.bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))) - .bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))); - // Trying to bind too many immediates to an instruction should fail; note that the immediate - // values are nonsensical but irrelevant to the purpose of this test. - } -} diff --git a/cranelift/codegen/meta/src/cdsl/isa.rs b/cranelift/codegen/meta/src/cdsl/isa.rs index f6ac9f2493..b595ffa99f 100644 --- a/cranelift/codegen/meta/src/cdsl/isa.rs +++ b/cranelift/codegen/meta/src/cdsl/isa.rs @@ -1,96 +1,12 @@ -use std::collections::HashSet; -use std::iter::FromIterator; - -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::instructions::InstructionPredicateMap; -use crate::cdsl::recipes::Recipes; -use crate::cdsl::regs::IsaRegs; use crate::cdsl::settings::SettingGroup; -use crate::cdsl::xform::{TransformGroupIndex, TransformGroups}; pub(crate) struct TargetIsa { pub name: &'static str, pub settings: SettingGroup, - pub regs: IsaRegs, - pub recipes: Recipes, - pub cpu_modes: Vec, - pub encodings_predicates: InstructionPredicateMap, - - /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the - /// local array of transform groups that are directly used. We use this map to get this - /// information. - pub local_transform_groups: Vec, } impl TargetIsa { - pub fn new( - name: &'static str, - settings: SettingGroup, - regs: IsaRegs, - recipes: Recipes, - cpu_modes: Vec, - encodings_predicates: InstructionPredicateMap, - ) -> Self { - // Compute the local TransformGroup index. - let mut local_transform_groups = Vec::new(); - for cpu_mode in &cpu_modes { - let transform_groups = cpu_mode.direct_transform_groups(); - for group_index in transform_groups { - // find() is fine here: the number of transform group is < 5 as of June 2019. - if local_transform_groups - .iter() - .find(|&val| group_index == *val) - .is_none() - { - local_transform_groups.push(group_index); - } - } - } - - Self { - name, - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - local_transform_groups, - } - } - - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the - /// transitive set of TransformGroup this TargetIsa uses. - pub fn transitive_transform_groups( - &self, - all_groups: &TransformGroups, - ) -> Vec { - let mut set = HashSet::new(); - - for &root in self.local_transform_groups.iter() { - set.insert(root); - let mut base = root; - // Follow the chain of chain_with. - while let Some(chain_with) = &all_groups.get(base).chain_with { - set.insert(*chain_with); - base = *chain_with; - } - } - - let mut vec = Vec::from_iter(set); - vec.sort(); - vec - } - - /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly - /// reachable set of TransformGroup this TargetIsa uses. - pub fn direct_transform_groups(&self) -> &Vec { - &self.local_transform_groups - } - - pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize { - self.local_transform_groups - .iter() - .position(|&val| val == group_index) - .expect("TransformGroup unused by this TargetIsa!") + pub fn new(name: &'static str, settings: SettingGroup) -> Self { + Self { name, settings } } } diff --git a/cranelift/codegen/meta/src/cdsl/mod.rs b/cranelift/codegen/meta/src/cdsl/mod.rs index 698b64dff3..bf7acbbeb4 100644 --- a/cranelift/codegen/meta/src/cdsl/mod.rs +++ b/cranelift/codegen/meta/src/cdsl/mod.rs @@ -3,21 +3,14 @@ //! This module defines the classes that are used to define Cranelift //! instructions and other entities. -#[macro_use] -pub mod ast; -pub mod cpu_modes; -pub mod encodings; pub mod formats; pub mod instructions; pub mod isa; pub mod operands; -pub mod recipes; -pub mod regs; pub mod settings; pub mod type_inference; pub mod types; pub mod typevar; -pub mod xform; /// A macro that converts boolean settings into predicates to look more natural. #[macro_export] diff --git a/cranelift/codegen/meta/src/cdsl/recipes.rs b/cranelift/codegen/meta/src/cdsl/recipes.rs deleted file mode 100644 index e03b951f4d..0000000000 --- a/cranelift/codegen/meta/src/cdsl/recipes.rs +++ /dev/null @@ -1,297 +0,0 @@ -use std::rc::Rc; - -use cranelift_entity::{entity_impl, PrimaryMap}; - -use crate::cdsl::formats::InstructionFormat; -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::regs::RegClassIndex; -use crate::cdsl::settings::SettingPredicateNumber; - -/// A specific register in a register class. -/// -/// A register is identified by the top-level register class it belongs to and -/// its first register unit. -/// -/// Specific registers are used to describe constraints on instructions where -/// some operands must use a fixed register. -/// -/// Register instances can be created with the constructor, or accessed as -/// attributes on the register class: `GPR.rcx`. -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub(crate) struct Register { - pub regclass: RegClassIndex, - pub unit: u8, -} - -impl Register { - pub fn new(regclass: RegClassIndex, unit: u8) -> Self { - Self { regclass, unit } - } -} - -/// An operand that must be in a stack slot. -/// -/// A `Stack` object can be used to indicate an operand constraint for a value -/// operand that must live in a stack slot. -#[derive(Copy, Clone, Hash, PartialEq)] -pub(crate) struct Stack { - pub regclass: RegClassIndex, -} - -impl Stack { - pub fn new(regclass: RegClassIndex) -> Self { - Self { regclass } - } - pub fn stack_base_mask(self) -> &'static str { - // TODO: Make this configurable instead of just using the SP. - "StackBaseMask(1)" - } -} - -#[derive(Clone, Hash, PartialEq)] -pub(crate) struct BranchRange { - pub inst_size: u64, - pub range: u64, -} - -#[derive(Copy, Clone, Hash, PartialEq)] -pub(crate) enum OperandConstraint { - RegClass(RegClassIndex), - FixedReg(Register), - TiedInput(usize), - Stack(Stack), -} - -impl Into for RegClassIndex { - fn into(self) -> OperandConstraint { - OperandConstraint::RegClass(self) - } -} - -impl Into for Register { - fn into(self) -> OperandConstraint { - OperandConstraint::FixedReg(self) - } -} - -impl Into for usize { - fn into(self) -> OperandConstraint { - OperandConstraint::TiedInput(self) - } -} - -impl Into for Stack { - fn into(self) -> OperandConstraint { - OperandConstraint::Stack(self) - } -} - -/// A recipe for encoding instructions with a given format. -/// -/// Many different instructions can be encoded by the same recipe, but they -/// must all have the same instruction format. -/// -/// The `operands_in` and `operands_out` arguments are tuples specifying the register -/// allocation constraints for the value operands and results respectively. The -/// possible constraints for an operand are: -/// -/// - A `RegClass` specifying the set of allowed registers. -/// - A `Register` specifying a fixed-register operand. -/// - An integer indicating that this result is tied to a value operand, so -/// they must use the same register. -/// - A `Stack` specifying a value in a stack slot. -/// -/// The `branch_range` argument must be provided for recipes that can encode -/// branch instructions. It is an `(origin, bits)` tuple describing the exact -/// range that can be encoded in a branch instruction. -#[derive(Clone)] -pub(crate) struct EncodingRecipe { - /// Short mnemonic name for this recipe. - pub name: String, - - /// Associated instruction format. - pub format: Rc, - - /// Base number of bytes in the binary encoded instruction. - pub base_size: u64, - - /// Tuple of register constraints for value operands. - pub operands_in: Vec, - - /// Tuple of register constraints for results. - pub operands_out: Vec, - - /// Function name to use when computing actual size. - pub compute_size: &'static str, - - /// `(origin, bits)` range for branches. - pub branch_range: Option, - - /// This instruction clobbers `iflags` and `fflags`; true by default. - pub clobbers_flags: bool, - - /// Instruction predicate. - pub inst_predicate: Option, - - /// ISA predicate. - pub isa_predicate: Option, - - /// Rust code for binary emission. - pub emit: Option, -} - -// Implement PartialEq ourselves: take all the fields into account but the name. -impl PartialEq for EncodingRecipe { - fn eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.format, &other.format) - && self.base_size == other.base_size - && self.operands_in == other.operands_in - && self.operands_out == other.operands_out - && self.compute_size == other.compute_size - && self.branch_range == other.branch_range - && self.clobbers_flags == other.clobbers_flags - && self.inst_predicate == other.inst_predicate - && self.isa_predicate == other.isa_predicate - && self.emit == other.emit - } -} - -// To allow using it in a hashmap. -impl Eq for EncodingRecipe {} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct EncodingRecipeNumber(u32); -entity_impl!(EncodingRecipeNumber); - -pub(crate) type Recipes = PrimaryMap; - -#[derive(Clone)] -pub(crate) struct EncodingRecipeBuilder { - pub name: String, - format: Rc, - pub base_size: u64, - pub operands_in: Option>, - pub operands_out: Option>, - pub compute_size: Option<&'static str>, - pub branch_range: Option, - pub emit: Option, - clobbers_flags: Option, - inst_predicate: Option, - isa_predicate: Option, -} - -impl EncodingRecipeBuilder { - pub fn new(name: impl Into, format: &Rc, base_size: u64) -> Self { - Self { - name: name.into(), - format: format.clone(), - base_size, - operands_in: None, - operands_out: None, - compute_size: None, - branch_range: None, - emit: None, - clobbers_flags: None, - inst_predicate: None, - isa_predicate: None, - } - } - - // Setters. - pub fn operands_in(mut self, constraints: Vec>) -> Self { - assert!(self.operands_in.is_none()); - self.operands_in = Some( - constraints - .into_iter() - .map(|constr| constr.into()) - .collect(), - ); - self - } - pub fn operands_out(mut self, constraints: Vec>) -> Self { - assert!(self.operands_out.is_none()); - self.operands_out = Some( - constraints - .into_iter() - .map(|constr| constr.into()) - .collect(), - ); - self - } - pub fn clobbers_flags(mut self, flag: bool) -> Self { - assert!(self.clobbers_flags.is_none()); - self.clobbers_flags = Some(flag); - self - } - pub fn emit(mut self, code: impl Into) -> Self { - assert!(self.emit.is_none()); - self.emit = Some(code.into()); - self - } - pub fn branch_range(mut self, range: (u64, u64)) -> Self { - assert!(self.branch_range.is_none()); - self.branch_range = Some(BranchRange { - inst_size: range.0, - range: range.1, - }); - self - } - pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self { - assert!(self.isa_predicate.is_none()); - self.isa_predicate = Some(pred); - self - } - pub fn inst_predicate(mut self, inst_predicate: impl Into) -> Self { - assert!(self.inst_predicate.is_none()); - self.inst_predicate = Some(inst_predicate.into()); - self - } - pub fn compute_size(mut self, compute_size: &'static str) -> Self { - assert!(self.compute_size.is_none()); - self.compute_size = Some(compute_size); - self - } - - pub fn build(self) -> EncodingRecipe { - let operands_in = self.operands_in.unwrap_or_default(); - let operands_out = self.operands_out.unwrap_or_default(); - - // The number of input constraints must match the number of format input operands. - if !self.format.has_value_list { - assert!( - operands_in.len() == self.format.num_value_operands, - "missing operand constraints for recipe {} (format {})", - self.name, - self.format.name - ); - } - - // Ensure tied inputs actually refer to existing inputs. - for constraint in operands_in.iter().chain(operands_out.iter()) { - if let OperandConstraint::TiedInput(n) = *constraint { - assert!(n < operands_in.len()); - } - } - - let compute_size = match self.compute_size { - Some(compute_size) => compute_size, - None => "base_size", - }; - - let clobbers_flags = self.clobbers_flags.unwrap_or(true); - - EncodingRecipe { - name: self.name, - format: self.format, - base_size: self.base_size, - operands_in, - operands_out, - compute_size, - branch_range: self.branch_range, - clobbers_flags, - inst_predicate: self.inst_predicate, - isa_predicate: self.isa_predicate, - emit: self.emit, - } - } -} diff --git a/cranelift/codegen/meta/src/cdsl/regs.rs b/cranelift/codegen/meta/src/cdsl/regs.rs deleted file mode 100644 index 864826ee43..0000000000 --- a/cranelift/codegen/meta/src/cdsl/regs.rs +++ /dev/null @@ -1,412 +0,0 @@ -use cranelift_codegen_shared::constants; -use cranelift_entity::{entity_impl, EntityRef, PrimaryMap}; - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct RegBankIndex(u32); -entity_impl!(RegBankIndex); - -pub(crate) struct RegBank { - pub name: &'static str, - pub first_unit: u8, - pub units: u8, - pub names: Vec<&'static str>, - pub prefix: &'static str, - pub pressure_tracking: bool, - pub pinned_reg: Option, - pub toprcs: Vec, - pub classes: Vec, -} - -impl RegBank { - pub fn new( - name: &'static str, - first_unit: u8, - units: u8, - names: Vec<&'static str>, - prefix: &'static str, - pressure_tracking: bool, - pinned_reg: Option, - ) -> Self { - RegBank { - name, - first_unit, - units, - names, - prefix, - pressure_tracking, - pinned_reg, - toprcs: Vec::new(), - classes: Vec::new(), - } - } - - fn unit_by_name(&self, name: &'static str) -> u8 { - let unit = if let Some(found) = self.names.iter().position(|®_name| reg_name == name) { - found - } else { - // Try to match without the bank prefix. - assert!(name.starts_with(self.prefix)); - let name_without_prefix = &name[self.prefix.len()..]; - if let Some(found) = self - .names - .iter() - .position(|®_name| reg_name == name_without_prefix) - { - found - } else { - // Ultimate try: try to parse a number and use this in the array, eg r15 on x86. - if let Ok(as_num) = name_without_prefix.parse::() { - assert!( - as_num < self.units, - "trying to get {}, but bank only has {} registers!", - name, - self.units - ); - as_num as usize - } else { - panic!("invalid register name {}", name); - } - } - }; - self.first_unit + (unit as u8) - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] -pub(crate) struct RegClassIndex(u32); -entity_impl!(RegClassIndex); - -pub(crate) struct RegClass { - pub name: &'static str, - pub index: RegClassIndex, - pub width: u8, - pub bank: RegBankIndex, - pub toprc: RegClassIndex, - pub count: u8, - pub start: u8, - pub subclasses: Vec, -} - -impl RegClass { - pub fn new( - name: &'static str, - index: RegClassIndex, - width: u8, - bank: RegBankIndex, - toprc: RegClassIndex, - count: u8, - start: u8, - ) -> Self { - Self { - name, - index, - width, - bank, - toprc, - count, - start, - subclasses: Vec::new(), - } - } - - /// Compute a bit-mask of subclasses, including self. - pub fn subclass_mask(&self) -> u64 { - let mut m = 1 << self.index.index(); - for rc in self.subclasses.iter() { - m |= 1 << rc.index(); - } - m - } - - /// Compute a bit-mask of the register units allocated by this register class. - pub fn mask(&self, bank_first_unit: u8) -> Vec { - let mut u = (self.start + bank_first_unit) as usize; - let mut out_mask = vec![0, 0, 0]; - for _ in 0..self.count { - out_mask[u / 32] |= 1 << (u % 32); - u += self.width as usize; - } - out_mask - } -} - -pub(crate) enum RegClassProto { - TopLevel(RegBankIndex), - SubClass(RegClassIndex), -} - -pub(crate) struct RegClassBuilder { - pub name: &'static str, - pub width: u8, - pub count: u8, - pub start: u8, - pub proto: RegClassProto, -} - -impl RegClassBuilder { - pub fn new_toplevel(name: &'static str, bank: RegBankIndex) -> Self { - Self { - name, - width: 1, - count: 0, - start: 0, - proto: RegClassProto::TopLevel(bank), - } - } - pub fn subclass_of( - name: &'static str, - parent_index: RegClassIndex, - start: u8, - stop: u8, - ) -> Self { - assert!(stop >= start); - Self { - name, - width: 0, - count: stop - start, - start, - proto: RegClassProto::SubClass(parent_index), - } - } - pub fn count(mut self, count: u8) -> Self { - self.count = count; - self - } - pub fn width(mut self, width: u8) -> Self { - match self.proto { - RegClassProto::TopLevel(_) => self.width = width, - RegClassProto::SubClass(_) => panic!("Subclasses inherit their parent's width."), - } - self - } -} - -pub(crate) struct RegBankBuilder { - pub name: &'static str, - pub units: u8, - pub names: Vec<&'static str>, - pub prefix: &'static str, - pub pressure_tracking: Option, - pub pinned_reg: Option, -} - -impl RegBankBuilder { - pub fn new(name: &'static str, prefix: &'static str) -> Self { - Self { - name, - units: 0, - names: vec![], - prefix, - pressure_tracking: None, - pinned_reg: None, - } - } - pub fn units(mut self, units: u8) -> Self { - self.units = units; - self - } - pub fn names(mut self, names: Vec<&'static str>) -> Self { - self.names = names; - self - } - pub fn track_pressure(mut self, track: bool) -> Self { - self.pressure_tracking = Some(track); - self - } - pub fn pinned_reg(mut self, unit: u16) -> Self { - assert!(unit < u16::from(self.units)); - self.pinned_reg = Some(unit); - self - } -} - -pub(crate) struct IsaRegsBuilder { - pub banks: PrimaryMap, - pub classes: PrimaryMap, -} - -impl IsaRegsBuilder { - pub fn new() -> Self { - Self { - banks: PrimaryMap::new(), - classes: PrimaryMap::new(), - } - } - - pub fn add_bank(&mut self, builder: RegBankBuilder) -> RegBankIndex { - let first_unit = if self.banks.is_empty() { - 0 - } else { - let last = &self.banks.last().unwrap(); - let first_available_unit = (last.first_unit + last.units) as i8; - let units = builder.units; - let align = if units.is_power_of_two() { - units - } else { - units.next_power_of_two() - } as i8; - (first_available_unit + align - 1) & -align - } as u8; - - self.banks.push(RegBank::new( - builder.name, - first_unit, - builder.units, - builder.names, - builder.prefix, - builder - .pressure_tracking - .expect("Pressure tracking must be explicitly set"), - builder.pinned_reg, - )) - } - - pub fn add_class(&mut self, builder: RegClassBuilder) -> RegClassIndex { - let class_index = self.classes.next_key(); - - // Finish delayed construction of RegClass. - let (bank, toprc, start, width) = match builder.proto { - RegClassProto::TopLevel(bank_index) => { - self.banks - .get_mut(bank_index) - .unwrap() - .toprcs - .push(class_index); - (bank_index, class_index, builder.start, builder.width) - } - RegClassProto::SubClass(parent_class_index) => { - assert!(builder.width == 0); - let (bank, toprc, start, width) = { - let parent = self.classes.get(parent_class_index).unwrap(); - (parent.bank, parent.toprc, parent.start, parent.width) - }; - for reg_class in self.classes.values_mut() { - if reg_class.toprc == toprc { - reg_class.subclasses.push(class_index); - } - } - let subclass_start = start + builder.start * width; - (bank, toprc, subclass_start, width) - } - }; - - let reg_bank_units = self.banks.get(bank).unwrap().units; - assert!(start < reg_bank_units); - - let count = if builder.count != 0 { - builder.count - } else { - reg_bank_units / width - }; - - let reg_class = RegClass::new(builder.name, class_index, width, bank, toprc, count, start); - self.classes.push(reg_class); - - let reg_bank = self.banks.get_mut(bank).unwrap(); - reg_bank.classes.push(class_index); - - class_index - } - - /// Checks that the set of register classes satisfies: - /// - /// 1. Closed under intersection: The intersection of any two register - /// classes in the set is either empty or identical to a member of the - /// set. - /// 2. There are no identical classes under different names. - /// 3. Classes are sorted topologically such that all subclasses have a - /// higher index that the superclass. - pub fn build(self) -> IsaRegs { - for reg_bank in self.banks.values() { - for i1 in reg_bank.classes.iter() { - for i2 in reg_bank.classes.iter() { - if i1 >= i2 { - continue; - } - - let rc1 = self.classes.get(*i1).unwrap(); - let rc2 = self.classes.get(*i2).unwrap(); - - let rc1_mask = rc1.mask(0); - let rc2_mask = rc2.mask(0); - - assert!( - rc1.width != rc2.width || rc1_mask != rc2_mask, - "no duplicates" - ); - if rc1.width != rc2.width { - continue; - } - - let mut intersect = Vec::new(); - for (a, b) in rc1_mask.iter().zip(rc2_mask.iter()) { - intersect.push(a & b); - } - if intersect == vec![0; intersect.len()] { - continue; - } - - // Classes must be topologically ordered, so the intersection can't be the - // superclass. - assert!(intersect != rc1_mask); - - // If the intersection is the second one, then it must be a subclass. - if intersect == rc2_mask { - assert!(self - .classes - .get(*i1) - .unwrap() - .subclasses - .iter() - .any(|x| *x == *i2)); - } - } - } - } - - assert!( - self.classes.len() <= constants::MAX_NUM_REG_CLASSES, - "Too many register classes" - ); - - let num_toplevel = self - .classes - .values() - .filter(|x| x.toprc == x.index && self.banks.get(x.bank).unwrap().pressure_tracking) - .count(); - - assert!( - num_toplevel <= constants::MAX_TRACKED_TOP_RCS, - "Too many top-level register classes" - ); - - IsaRegs::new(self.banks, self.classes) - } -} - -pub(crate) struct IsaRegs { - pub banks: PrimaryMap, - pub classes: PrimaryMap, -} - -impl IsaRegs { - fn new( - banks: PrimaryMap, - classes: PrimaryMap, - ) -> Self { - Self { banks, classes } - } - - pub fn class_by_name(&self, name: &str) -> RegClassIndex { - self.classes - .values() - .find(|&class| class.name == name) - .unwrap_or_else(|| panic!("register class {} not found", name)) - .index - } - - pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 { - let bank_index = self.classes.get(class_index).unwrap().bank; - self.banks.get(bank_index).unwrap().unit_by_name(name) - } -} diff --git a/cranelift/codegen/meta/src/cdsl/settings.rs b/cranelift/codegen/meta/src/cdsl/settings.rs index 52c51d54a8..c4e76b760f 100644 --- a/cranelift/codegen/meta/src/cdsl/settings.rs +++ b/cranelift/codegen/meta/src/cdsl/settings.rs @@ -150,14 +150,6 @@ impl SettingGroup { } panic!("Should have found bool setting by name."); } - - pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber { - self.predicates - .iter() - .find(|pred| pred.name == name) - .unwrap_or_else(|| panic!("unknown predicate {}", name)) - .number - } } /// This is the basic information needed to track the specific parts of a setting when building diff --git a/cranelift/codegen/meta/src/cdsl/type_inference.rs b/cranelift/codegen/meta/src/cdsl/type_inference.rs index e17c305f9c..76fc1284f2 100644 --- a/cranelift/codegen/meta/src/cdsl/type_inference.rs +++ b/cranelift/codegen/meta/src/cdsl/type_inference.rs @@ -1,8 +1,4 @@ -use crate::cdsl::ast::{Def, DefIndex, DefPool, Var, VarIndex, VarPool}; -use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar}; - -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; +use crate::cdsl::typevar::TypeVar; #[derive(Debug, Hash, PartialEq, Eq)] pub(crate) enum Constraint { @@ -11,651 +7,4 @@ pub(crate) enum Constraint { /// 1) They have the same number of lanes /// 2) In a lane tv1 has at least as many bits as tv2. WiderOrEq(TypeVar, TypeVar), - - /// Constraint specifying that two derived type vars must have the same runtime type. - Eq(TypeVar, TypeVar), - - /// Constraint specifying that a type var must belong to some typeset. - InTypeset(TypeVar, TypeSet), -} - -impl Constraint { - fn translate_with TypeVar>(&self, func: F) -> Constraint { - match self { - Constraint::WiderOrEq(lhs, rhs) => { - let lhs = func(&lhs); - let rhs = func(&rhs); - Constraint::WiderOrEq(lhs, rhs) - } - Constraint::Eq(lhs, rhs) => { - let lhs = func(&lhs); - let rhs = func(&rhs); - Constraint::Eq(lhs, rhs) - } - Constraint::InTypeset(tv, ts) => { - let tv = func(&tv); - Constraint::InTypeset(tv, ts.clone()) - } - } - } - - /// Creates a new constraint by replacing type vars by their hashmap equivalent. - fn translate_with_map( - &self, - original_to_own_typevar: &HashMap<&TypeVar, TypeVar>, - ) -> Constraint { - self.translate_with(|tv| substitute(original_to_own_typevar, tv)) - } - - /// Creates a new constraint by replacing type vars by their canonical equivalent. - fn translate_with_env(&self, type_env: &TypeEnvironment) -> Constraint { - self.translate_with(|tv| type_env.get_equivalent(tv)) - } - - fn is_trivial(&self) -> bool { - match self { - Constraint::WiderOrEq(lhs, rhs) => { - // Trivially true. - if lhs == rhs { - return true; - } - - let ts1 = lhs.get_typeset(); - let ts2 = rhs.get_typeset(); - - // Trivially true. - if ts1.is_wider_or_equal(&ts2) { - return true; - } - - // Trivially false. - if ts1.is_narrower(&ts2) { - return true; - } - - // Trivially false. - if (&ts1.lanes & &ts2.lanes).is_empty() { - return true; - } - - self.is_concrete() - } - Constraint::Eq(lhs, rhs) => lhs == rhs || self.is_concrete(), - Constraint::InTypeset(_, _) => { - // The way InTypeset are made, they would always be trivial if we were applying the - // same logic as the Python code did, so ignore this. - self.is_concrete() - } - } - } - - /// Returns true iff all the referenced type vars are singletons. - fn is_concrete(&self) -> bool { - match self { - Constraint::WiderOrEq(lhs, rhs) => { - lhs.singleton_type().is_some() && rhs.singleton_type().is_some() - } - Constraint::Eq(lhs, rhs) => { - lhs.singleton_type().is_some() && rhs.singleton_type().is_some() - } - Constraint::InTypeset(tv, _) => tv.singleton_type().is_some(), - } - } - - fn typevar_args(&self) -> Vec<&TypeVar> { - match self { - Constraint::WiderOrEq(lhs, rhs) => vec![lhs, rhs], - Constraint::Eq(lhs, rhs) => vec![lhs, rhs], - Constraint::InTypeset(tv, _) => vec![tv], - } - } -} - -#[derive(Clone, Copy)] -enum TypeEnvRank { - Singleton = 5, - Input = 4, - Intermediate = 3, - Output = 2, - Temp = 1, - Internal = 0, -} - -/// Class encapsulating the necessary bookkeeping for type inference. -pub(crate) struct TypeEnvironment { - vars: HashSet, - ranks: HashMap, - equivalency_map: HashMap, - pub constraints: Vec, -} - -impl TypeEnvironment { - fn new() -> Self { - TypeEnvironment { - vars: HashSet::new(), - ranks: HashMap::new(), - equivalency_map: HashMap::new(), - constraints: Vec::new(), - } - } - - fn register(&mut self, var_index: VarIndex, var: &mut Var) { - self.vars.insert(var_index); - let rank = if var.is_input() { - TypeEnvRank::Input - } else if var.is_intermediate() { - TypeEnvRank::Intermediate - } else if var.is_output() { - TypeEnvRank::Output - } else { - assert!(var.is_temp()); - TypeEnvRank::Temp - }; - self.ranks.insert(var.get_or_create_typevar(), rank); - } - - fn add_constraint(&mut self, constraint: Constraint) { - if self.constraints.iter().any(|item| *item == constraint) { - return; - } - - // Check extra conditions for InTypeset constraints. - if let Constraint::InTypeset(tv, _) = &constraint { - assert!( - tv.base.is_none(), - "type variable is {:?}, while expecting none", - tv - ); - assert!( - tv.name.starts_with("typeof_"), - "Name \"{}\" should start with \"typeof_\"", - tv.name - ); - } - - self.constraints.push(constraint); - } - - /// Returns the canonical representative of the equivalency class of the given argument, or - /// duplicates it if it's not there yet. - pub fn get_equivalent(&self, tv: &TypeVar) -> TypeVar { - let mut tv = tv; - while let Some(found) = self.equivalency_map.get(tv) { - tv = found; - } - match &tv.base { - Some(parent) => self - .get_equivalent(&parent.type_var) - .derived(parent.derived_func), - None => tv.clone(), - } - } - - /// Get the rank of tv in the partial order: - /// - TVs directly associated with a Var get their rank from the Var (see register()). - /// - Internally generated non-derived TVs implicitly get the lowest rank (0). - /// - Derived variables get their rank from their free typevar. - /// - Singletons have the highest rank. - /// - TVs associated with vars in a source pattern have a higher rank than TVs associated with - /// temporary vars. - fn rank(&self, tv: &TypeVar) -> u8 { - let actual_tv = match tv.base { - Some(_) => tv.free_typevar(), - None => Some(tv.clone()), - }; - - let rank = match actual_tv { - Some(actual_tv) => match self.ranks.get(&actual_tv) { - Some(rank) => Some(*rank), - None => { - assert!( - !actual_tv.name.starts_with("typeof_"), - "variable {} should be explicitly ranked", - actual_tv.name - ); - None - } - }, - None => None, - }; - - let rank = match rank { - Some(rank) => rank, - None => { - if tv.singleton_type().is_some() { - TypeEnvRank::Singleton - } else { - TypeEnvRank::Internal - } - } - }; - - rank as u8 - } - - /// Record the fact that the free tv1 is part of the same equivalence class as tv2. The - /// canonical representative of the merged class is tv2's canonical representative. - fn record_equivalent(&mut self, tv1: TypeVar, tv2: TypeVar) { - assert!(tv1.base.is_none()); - assert!(self.get_equivalent(&tv1) == tv1); - if let Some(tv2_base) = &tv2.base { - // Ensure there are no cycles. - assert!(self.get_equivalent(&tv2_base.type_var) != tv1); - } - self.equivalency_map.insert(tv1, tv2); - } - - /// Get the free typevars in the current type environment. - pub fn free_typevars(&self, var_pool: &mut VarPool) -> Vec { - let mut typevars = Vec::new(); - typevars.extend(self.equivalency_map.keys().cloned()); - typevars.extend( - self.vars - .iter() - .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), - ); - - let set: HashSet = HashSet::from_iter( - typevars - .iter() - .map(|tv| self.get_equivalent(tv).free_typevar()) - .filter(|opt_tv| { - // Filter out singleton types. - opt_tv.is_some() - }) - .map(|tv| tv.unwrap()), - ); - Vec::from_iter(set) - } - - /// Normalize by collapsing any roots that don't correspond to a concrete type var AND have a - /// single type var derived from them or equivalent to them. - /// - /// e.g. if we have a root of the tree that looks like: - /// - /// typeof_a typeof_b - /// \\ / - /// typeof_x - /// | - /// half_width(1) - /// | - /// 1 - /// - /// we want to collapse the linear path between 1 and typeof_x. The resulting graph is: - /// - /// typeof_a typeof_b - /// \\ / - /// typeof_x - fn normalize(&mut self, var_pool: &mut VarPool) { - let source_tvs: HashSet = HashSet::from_iter( - self.vars - .iter() - .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), - ); - - let mut children: HashMap> = HashMap::new(); - - // Insert all the parents found by the derivation relationship. - for type_var in self.equivalency_map.values() { - if type_var.base.is_none() { - continue; - } - - let parent_tv = type_var.free_typevar(); - if parent_tv.is_none() { - // Ignore this type variable, it's a singleton. - continue; - } - let parent_tv = parent_tv.unwrap(); - - children - .entry(parent_tv) - .or_insert_with(HashSet::new) - .insert(type_var.clone()); - } - - // Insert all the explicit equivalency links. - for (equivalent_tv, canon_tv) in self.equivalency_map.iter() { - children - .entry(canon_tv.clone()) - .or_insert_with(HashSet::new) - .insert(equivalent_tv.clone()); - } - - // Remove links that are straight paths up to typevar of variables. - for free_root in self.free_typevars(var_pool) { - let mut root = &free_root; - while !source_tvs.contains(&root) - && children.contains_key(&root) - && children.get(&root).unwrap().len() == 1 - { - let child = children.get(&root).unwrap().iter().next().unwrap(); - assert_eq!(self.equivalency_map[child], root.clone()); - self.equivalency_map.remove(child); - root = child; - } - } - } - - /// Extract a clean type environment from self, that only mentions type vars associated with - /// real variables. - fn extract(self, var_pool: &mut VarPool) -> TypeEnvironment { - let vars_tv: HashSet = HashSet::from_iter( - self.vars - .iter() - .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), - ); - - let mut new_equivalency_map: HashMap = HashMap::new(); - for tv in &vars_tv { - let canon_tv = self.get_equivalent(tv); - if *tv != canon_tv { - new_equivalency_map.insert(tv.clone(), canon_tv.clone()); - } - - // Sanity check: the translated type map should only refer to real variables. - assert!(vars_tv.contains(tv)); - let canon_free_tv = canon_tv.free_typevar(); - assert!(canon_free_tv.is_none() || vars_tv.contains(&canon_free_tv.unwrap())); - } - - let mut new_constraints: HashSet = HashSet::new(); - for constraint in &self.constraints { - let constraint = constraint.translate_with_env(&self); - if constraint.is_trivial() || new_constraints.contains(&constraint) { - continue; - } - - // Sanity check: translated constraints should refer only to real variables. - for arg in constraint.typevar_args() { - let arg_free_tv = arg.free_typevar(); - assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap())); - } - - new_constraints.insert(constraint); - } - - TypeEnvironment { - vars: self.vars, - ranks: self.ranks, - equivalency_map: new_equivalency_map, - constraints: Vec::from_iter(new_constraints), - } - } -} - -/// Replaces an external type variable according to the following rules: -/// - if a local copy is present in the map, return it. -/// - or if it's derived, create a local derived one that recursively substitutes the parent. -/// - or return itself. -fn substitute(map: &HashMap<&TypeVar, TypeVar>, external_type_var: &TypeVar) -> TypeVar { - match map.get(&external_type_var) { - Some(own_type_var) => own_type_var.clone(), - None => match &external_type_var.base { - Some(parent) => { - let parent_substitute = substitute(map, &parent.type_var); - TypeVar::derived(&parent_substitute, parent.derived_func) - } - None => external_type_var.clone(), - }, - } -} - -/// Normalize a (potentially derived) typevar using the following rules: -/// -/// - vector and width derived functions commute -/// {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) -> -/// {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base)) -/// -/// - half/double pairs collapse -/// {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base -/// {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base -fn canonicalize_derivations(tv: TypeVar) -> TypeVar { - let base = match &tv.base { - Some(base) => base, - None => return tv, - }; - - let derived_func = base.derived_func; - - if let Some(base_base) = &base.type_var.base { - let base_base_tv = &base_base.type_var; - match (derived_func, base_base.derived_func) { - (DerivedFunc::HalfWidth, DerivedFunc::DoubleWidth) - | (DerivedFunc::DoubleWidth, DerivedFunc::HalfWidth) - | (DerivedFunc::HalfVector, DerivedFunc::DoubleVector) - | (DerivedFunc::DoubleVector, DerivedFunc::HalfVector) => { - // Cancelling bijective transformations. This doesn't hide any overflow issues - // since derived type sets are checked upon derivaion, and base typesets are only - // allowed to shrink. - return canonicalize_derivations(base_base_tv.clone()); - } - (DerivedFunc::HalfWidth, DerivedFunc::HalfVector) - | (DerivedFunc::HalfWidth, DerivedFunc::DoubleVector) - | (DerivedFunc::DoubleWidth, DerivedFunc::DoubleVector) - | (DerivedFunc::DoubleWidth, DerivedFunc::HalfVector) => { - // Arbitrarily put WIDTH derivations before VECTOR derivations, since they commute. - return canonicalize_derivations( - base_base_tv - .derived(derived_func) - .derived(base_base.derived_func), - ); - } - _ => {} - }; - } - - canonicalize_derivations(base.type_var.clone()).derived(derived_func) -} - -/// Given typevars tv1 and tv2 (which could be derived from one another), constrain their typesets -/// to be the same. When one is derived from the other, repeat the constrain process until -/// a fixed point is reached. -fn constrain_fixpoint(tv1: &TypeVar, tv2: &TypeVar) { - loop { - let old_tv1_ts = tv1.get_typeset().clone(); - tv2.constrain_types(tv1.clone()); - if tv1.get_typeset() == old_tv1_ts { - break; - } - } - - let old_tv2_ts = tv2.get_typeset(); - tv1.constrain_types(tv2.clone()); - // The above loop should ensure that all reference cycles have been handled. - assert!(old_tv2_ts == tv2.get_typeset()); -} - -/// Unify tv1 and tv2 in the given type environment. tv1 must have a rank greater or equal to tv2's -/// one, modulo commutations. -fn unify(tv1: &TypeVar, tv2: &TypeVar, type_env: &mut TypeEnvironment) -> Result<(), String> { - let tv1 = canonicalize_derivations(type_env.get_equivalent(tv1)); - let tv2 = canonicalize_derivations(type_env.get_equivalent(tv2)); - - if tv1 == tv2 { - // Already unified. - return Ok(()); - } - - if type_env.rank(&tv2) < type_env.rank(&tv1) { - // Make sure tv1 always has the smallest rank, since real variables have the higher rank - // and we want them to be the canonical representatives of their equivalency classes. - return unify(&tv2, &tv1, type_env); - } - - constrain_fixpoint(&tv1, &tv2); - - if tv1.get_typeset().size() == 0 || tv2.get_typeset().size() == 0 { - return Err(format!( - "Error: empty type created when unifying {} and {}", - tv1.name, tv2.name - )); - } - - let base = match &tv1.base { - Some(base) => base, - None => { - type_env.record_equivalent(tv1, tv2); - return Ok(()); - } - }; - - if let Some(inverse) = base.derived_func.inverse() { - return unify(&base.type_var, &tv2.derived(inverse), type_env); - } - - type_env.add_constraint(Constraint::Eq(tv1, tv2)); - Ok(()) -} - -/// Perform type inference on one Def in the current type environment and return an updated type -/// environment or error. -/// -/// At a high level this works by creating fresh copies of each formal type var in the Def's -/// instruction's signature, and unifying the formal typevar with the corresponding actual typevar. -fn infer_definition( - def: &Def, - var_pool: &mut VarPool, - type_env: TypeEnvironment, - last_type_index: &mut usize, -) -> Result { - let apply = &def.apply; - let inst = &apply.inst; - - let mut type_env = type_env; - let free_formal_tvs = inst.all_typevars(); - - let mut original_to_own_typevar: HashMap<&TypeVar, TypeVar> = HashMap::new(); - for &tv in &free_formal_tvs { - assert!(original_to_own_typevar - .insert( - tv, - TypeVar::copy_from(tv, format!("own_{}", last_type_index)) - ) - .is_none()); - *last_type_index += 1; - } - - // Update the mapping with any explicity bound type vars: - for (i, value_type) in apply.value_types.iter().enumerate() { - let singleton = TypeVar::new_singleton(value_type.clone()); - assert!(original_to_own_typevar - .insert(free_formal_tvs[i], singleton) - .is_some()); - } - - // Get fresh copies for each typevar in the signature (both free and derived). - let mut formal_tvs = Vec::new(); - formal_tvs.extend(inst.value_results.iter().map(|&i| { - substitute( - &original_to_own_typevar, - inst.operands_out[i].type_var().unwrap(), - ) - })); - formal_tvs.extend(inst.value_opnums.iter().map(|&i| { - substitute( - &original_to_own_typevar, - inst.operands_in[i].type_var().unwrap(), - ) - })); - - // Get the list of actual vars. - let mut actual_vars = Vec::new(); - actual_vars.extend(inst.value_results.iter().map(|&i| def.defined_vars[i])); - actual_vars.extend( - inst.value_opnums - .iter() - .map(|&i| apply.args[i].unwrap_var()), - ); - - // Get the list of the actual TypeVars. - let mut actual_tvs = Vec::new(); - for var_index in actual_vars { - let var = var_pool.get_mut(var_index); - type_env.register(var_index, var); - actual_tvs.push(var.get_or_create_typevar()); - } - - // Make sure we start unifying with the control type variable first, by putting it at the - // front of both vectors. - if let Some(poly) = &inst.polymorphic_info { - let own_ctrl_tv = &original_to_own_typevar[&poly.ctrl_typevar]; - let ctrl_index = formal_tvs.iter().position(|tv| tv == own_ctrl_tv).unwrap(); - if ctrl_index != 0 { - formal_tvs.swap(0, ctrl_index); - actual_tvs.swap(0, ctrl_index); - } - } - - // Unify each actual type variable with the corresponding formal type variable. - for (actual_tv, formal_tv) in actual_tvs.iter().zip(&formal_tvs) { - if let Err(msg) = unify(actual_tv, formal_tv, &mut type_env) { - return Err(format!( - "fail ti on {} <: {}: {}", - actual_tv.name, formal_tv.name, msg - )); - } - } - - // Add any instruction specific constraints. - for constraint in &inst.constraints { - type_env.add_constraint(constraint.translate_with_map(&original_to_own_typevar)); - } - - Ok(type_env) -} - -/// Perform type inference on an transformation. Return an updated type environment or error. -pub(crate) fn infer_transform( - src: DefIndex, - dst: &[DefIndex], - def_pool: &DefPool, - var_pool: &mut VarPool, -) -> Result { - let mut type_env = TypeEnvironment::new(); - let mut last_type_index = 0; - - // Execute type inference on the source pattern. - type_env = infer_definition(def_pool.get(src), var_pool, type_env, &mut last_type_index) - .map_err(|err| format!("In src pattern: {}", err))?; - - // Collect the type sets once after applying the source patterm; we'll compare the typesets - // after we've also considered the destination pattern, and will emit supplementary InTypeset - // checks if they don't match. - let src_typesets = type_env - .vars - .iter() - .map(|&var_index| { - let var = var_pool.get_mut(var_index); - let tv = type_env.get_equivalent(&var.get_or_create_typevar()); - (var_index, tv.get_typeset()) - }) - .collect::>(); - - // Execute type inference on the destination pattern. - for (i, &def_index) in dst.iter().enumerate() { - let def = def_pool.get(def_index); - type_env = infer_definition(def, var_pool, type_env, &mut last_type_index) - .map_err(|err| format!("line {}: {}", i, err))?; - } - - for (var_index, src_typeset) in src_typesets { - let var = var_pool.get(var_index); - if !var.has_free_typevar() { - continue; - } - let tv = type_env.get_equivalent(&var.get_typevar().unwrap()); - let new_typeset = tv.get_typeset(); - assert!( - new_typeset.is_subset(&src_typeset), - "type sets can only get narrower" - ); - if new_typeset != src_typeset { - type_env.add_constraint(Constraint::InTypeset(tv.clone(), new_typeset.clone())); - } - } - - type_env.normalize(var_pool); - - Ok(type_env.extract(var_pool)) } diff --git a/cranelift/codegen/meta/src/cdsl/types.rs b/cranelift/codegen/meta/src/cdsl/types.rs index 7e03c873db..7d74d66724 100644 --- a/cranelift/codegen/meta/src/cdsl/types.rs +++ b/cranelift/codegen/meta/src/cdsl/types.rs @@ -71,12 +71,12 @@ impl ValueType { } /// Find the unique number associated with this type. - pub fn number(&self) -> Option { + pub fn number(&self) -> u8 { match *self { - ValueType::Lane(l) => Some(l.number()), - ValueType::Reference(r) => Some(r.number()), - ValueType::Special(s) => Some(s.number()), - ValueType::Vector(ref v) => Some(v.number()), + ValueType::Lane(l) => l.number(), + ValueType::Reference(r) => r.number(), + ValueType::Special(s) => s.number(), + ValueType::Vector(ref v) => v.number(), } } @@ -237,20 +237,6 @@ impl LaneType { ValueType::Vector(VectorType::new(self, lanes.into())) } } - - pub fn is_float(self) -> bool { - match self { - LaneType::Float(_) => true, - _ => false, - } - } - - pub fn is_int(self) -> bool { - match self { - LaneType::Int(_) => true, - _ => false, - } - } } impl fmt::Display for LaneType { @@ -407,8 +393,6 @@ impl fmt::Debug for VectorType { #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub(crate) enum SpecialType { Flag(shared_types::Flag), - // FIXME remove once the old style backends are removed. - StructArgument, } impl SpecialType { @@ -423,9 +407,6 @@ impl SpecialType { "CPU flags representing the result of a floating point comparison. These flags can be tested with a :type:`floatcc` condition code.", ), - SpecialType::StructArgument => { - String::from("After legalization sarg_t arguments will get this type.") - } } } @@ -433,7 +414,6 @@ impl SpecialType { pub fn lane_bits(self) -> u64 { match self { SpecialType::Flag(_) => 0, - SpecialType::StructArgument => 0, } } @@ -442,7 +422,6 @@ impl SpecialType { match self { SpecialType::Flag(shared_types::Flag::IFlags) => 1, SpecialType::Flag(shared_types::Flag::FFlags) => 2, - SpecialType::StructArgument => 3, } } } @@ -452,7 +431,6 @@ impl fmt::Display for SpecialType { match *self { SpecialType::Flag(shared_types::Flag::IFlags) => write!(f, "iflags"), SpecialType::Flag(shared_types::Flag::FFlags) => write!(f, "fflags"), - SpecialType::StructArgument => write!(f, "sarg_t"), } } } @@ -464,7 +442,6 @@ impl fmt::Debug for SpecialType { "{}", match *self { SpecialType::Flag(_) => format!("FlagsType({})", self), - SpecialType::StructArgument => format!("StructArgument"), } ) } @@ -478,14 +455,12 @@ impl From for SpecialType { pub(crate) struct SpecialTypeIterator { flag_iter: shared_types::FlagIterator, - done: bool, } impl SpecialTypeIterator { fn new() -> Self { Self { flag_iter: shared_types::FlagIterator::new(), - done: false, } } } @@ -493,16 +468,7 @@ impl SpecialTypeIterator { impl Iterator for SpecialTypeIterator { type Item = SpecialType; fn next(&mut self) -> Option { - if let Some(f) = self.flag_iter.next() { - Some(SpecialType::from(f)) - } else { - if !self.done { - self.done = true; - Some(SpecialType::StructArgument) - } else { - None - } - } + self.flag_iter.next().map(SpecialType::from) } } diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index c1027bf847..eea3e2724c 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -1,5 +1,5 @@ use std::cell::RefCell; -use std::collections::{BTreeSet, HashSet}; +use std::collections::BTreeSet; use std::fmt; use std::hash; use std::iter::FromIterator; @@ -269,52 +269,6 @@ impl TypeVar { pub fn merge_lanes(&self) -> TypeVar { self.derived(DerivedFunc::MergeLanes) } - - /// Constrain the range of types this variable can assume to a subset of those in the typeset - /// ts. - /// May mutate itself if it's not derived, or its parent if it is. - pub fn constrain_types_by_ts(&self, type_set: TypeSet) { - match &self.base { - Some(base) => { - base.type_var - .constrain_types_by_ts(type_set.preimage(base.derived_func)); - } - None => { - self.content - .borrow_mut() - .type_set - .inplace_intersect_with(&type_set); - } - } - } - - /// Constrain the range of types this variable can assume to a subset of those `other` can - /// assume. - /// May mutate itself if it's not derived, or its parent if it is. - pub fn constrain_types(&self, other: TypeVar) { - if self == &other { - return; - } - self.constrain_types_by_ts(other.get_typeset()); - } - - /// Get a Rust expression that computes the type of this type variable. - pub fn to_rust_code(&self) -> String { - match &self.base { - Some(base) => format!( - "{}.{}().unwrap()", - base.type_var.to_rust_code(), - base.derived_func.name() - ), - None => { - if let Some(singleton) = self.singleton_type() { - singleton.rust_name() - } else { - self.name.clone() - } - } - } - } } impl Into for &TypeVar { @@ -392,19 +346,6 @@ impl DerivedFunc { DerivedFunc::MergeLanes => "merge_lanes", } } - - /// Returns the inverse function of this one, if it is a bijection. - pub fn inverse(self) -> Option { - match self { - DerivedFunc::HalfWidth => Some(DerivedFunc::DoubleWidth), - DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth), - DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector), - DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector), - DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes), - DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes), - _ => None, - } - } } #[derive(Debug, Hash)] @@ -594,94 +535,6 @@ impl TypeSet { assert_eq!(types.len(), 1); types.remove(0) } - - /// Return the inverse image of self across the derived function func. - fn preimage(&self, func: DerivedFunc) -> TypeSet { - if self.size() == 0 { - // The inverse of the empty set is itself. - return self.clone(); - } - - match func { - DerivedFunc::LaneOf => { - let mut copy = self.clone(); - copy.lanes = - NumSet::from_iter((0..=MAX_LANES.trailing_zeros()).map(|i| u16::pow(2, i))); - copy - } - DerivedFunc::AsBool => { - let mut copy = self.clone(); - if self.bools.contains(&1) { - copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]); - copy.floats = NumSet::from_iter(vec![32, 64]); - } else { - copy.ints = &self.bools - &NumSet::from_iter(vec![1]); - copy.floats = &self.bools & &NumSet::from_iter(vec![32, 64]); - // If b1 is not in our typeset, than lanes=1 cannot be in the pre-image, as - // as_bool() of scalars is always b1. - copy.lanes = &self.lanes - &NumSet::from_iter(vec![1]); - } - copy - } - DerivedFunc::HalfWidth => self.double_width(), - DerivedFunc::DoubleWidth => self.half_width(), - DerivedFunc::HalfVector => self.double_vector(), - DerivedFunc::DoubleVector => self.half_vector(), - DerivedFunc::SplitLanes => self.double_width().half_vector(), - DerivedFunc::MergeLanes => self.half_width().double_vector(), - } - } - - pub fn inplace_intersect_with(&mut self, other: &TypeSet) { - self.lanes = &self.lanes & &other.lanes; - self.ints = &self.ints & &other.ints; - self.floats = &self.floats & &other.floats; - self.bools = &self.bools & &other.bools; - self.refs = &self.refs & &other.refs; - - let mut new_specials = Vec::new(); - for spec in &self.specials { - if let Some(spec) = other.specials.iter().find(|&other_spec| other_spec == spec) { - new_specials.push(*spec); - } - } - self.specials = new_specials; - } - - pub fn is_subset(&self, other: &TypeSet) -> bool { - self.lanes.is_subset(&other.lanes) - && self.ints.is_subset(&other.ints) - && self.floats.is_subset(&other.floats) - && self.bools.is_subset(&other.bools) - && self.refs.is_subset(&other.refs) - && { - let specials: HashSet = HashSet::from_iter(self.specials.clone()); - let other_specials = HashSet::from_iter(other.specials.clone()); - specials.is_subset(&other_specials) - } - } - - pub fn is_wider_or_equal(&self, other: &TypeSet) -> bool { - set_wider_or_equal(&self.ints, &other.ints) - && set_wider_or_equal(&self.floats, &other.floats) - && set_wider_or_equal(&self.bools, &other.bools) - && set_wider_or_equal(&self.refs, &other.refs) - } - - pub fn is_narrower(&self, other: &TypeSet) -> bool { - set_narrower(&self.ints, &other.ints) - && set_narrower(&self.floats, &other.floats) - && set_narrower(&self.bools, &other.bools) - && set_narrower(&self.refs, &other.refs) - } -} - -fn set_wider_or_equal(s1: &NumSet, s2: &NumSet) -> bool { - !s1.is_empty() && !s2.is_empty() && s1.iter().min() >= s2.iter().max() -} - -fn set_narrower(s1: &NumSet, s2: &NumSet) -> bool { - !s1.is_empty() && !s2.is_empty() && s1.iter().min() < s2.iter().max() } impl fmt::Debug for TypeSet { @@ -806,18 +659,6 @@ impl TypeSetBuilder { self.specials, ) } - - pub fn all() -> TypeSet { - TypeSetBuilder::new() - .ints(Interval::All) - .floats(Interval::All) - .bools(Interval::All) - .refs(Interval::All) - .simd_lanes(Interval::All) - .specials(ValueType::all_special_types().collect()) - .includes_scalars(true) - .build() - } } #[derive(PartialEq)] @@ -1054,136 +895,6 @@ fn test_forward_images() { ); } -#[test] -fn test_backward_images() { - let empty_set = TypeSetBuilder::new().build(); - - // LaneOf. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..1) - .ints(8..8) - .floats(32..32) - .build() - .preimage(DerivedFunc::LaneOf), - TypeSetBuilder::new() - .simd_lanes(Interval::All) - .ints(8..8) - .floats(32..32) - .build() - ); - assert_eq!(empty_set.preimage(DerivedFunc::LaneOf), empty_set); - - // AsBool. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..4) - .bools(1..128) - .build() - .preimage(DerivedFunc::AsBool), - TypeSetBuilder::new() - .simd_lanes(1..4) - .ints(Interval::All) - .bools(Interval::All) - .floats(Interval::All) - .build() - ); - - // Double vector. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..1) - .ints(8..8) - .build() - .preimage(DerivedFunc::DoubleVector) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..16) - .ints(8..16) - .floats(32..32) - .build() - .preimage(DerivedFunc::DoubleVector), - TypeSetBuilder::new() - .simd_lanes(1..8) - .ints(8..16) - .floats(32..32) - .build(), - ); - - // Half vector. - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(256..256) - .ints(8..8) - .build() - .preimage(DerivedFunc::HalfVector) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(64..128) - .bools(1..32) - .build() - .preimage(DerivedFunc::HalfVector), - TypeSetBuilder::new() - .simd_lanes(128..256) - .bools(1..32) - .build(), - ); - - // Half width. - assert_eq!( - TypeSetBuilder::new() - .ints(128..128) - .floats(64..64) - .bools(128..128) - .build() - .preimage(DerivedFunc::HalfWidth) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(64..256) - .bools(1..64) - .build() - .preimage(DerivedFunc::HalfWidth), - TypeSetBuilder::new() - .simd_lanes(64..256) - .bools(16..128) - .build(), - ); - - // Double width. - assert_eq!( - TypeSetBuilder::new() - .ints(8..8) - .floats(32..32) - .bools(1..8) - .build() - .preimage(DerivedFunc::DoubleWidth) - .size(), - 0 - ); - assert_eq!( - TypeSetBuilder::new() - .simd_lanes(1..16) - .ints(8..16) - .floats(32..64) - .build() - .preimage(DerivedFunc::DoubleWidth), - TypeSetBuilder::new() - .simd_lanes(1..16) - .ints(8..8) - .floats(32..32) - .build() - ); -} - #[test] #[should_panic] fn test_typeset_singleton_panic_nonsingleton_types() { diff --git a/cranelift/codegen/meta/src/cdsl/xform.rs b/cranelift/codegen/meta/src/cdsl/xform.rs deleted file mode 100644 index 95b7af867c..0000000000 --- a/cranelift/codegen/meta/src/cdsl/xform.rs +++ /dev/null @@ -1,484 +0,0 @@ -use crate::cdsl::ast::{ - Apply, BlockPool, ConstPool, DefIndex, DefPool, DummyDef, DummyExpr, Expr, PatternPosition, - VarIndex, VarPool, -}; -use crate::cdsl::instructions::Instruction; -use crate::cdsl::type_inference::{infer_transform, TypeEnvironment}; -use crate::cdsl::typevar::TypeVar; - -use cranelift_entity::{entity_impl, PrimaryMap}; - -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; - -/// An instruction transformation consists of a source and destination pattern. -/// -/// Patterns are expressed in *register transfer language* as tuples of Def or Expr nodes. A -/// pattern may optionally have a sequence of TypeConstraints, that additionally limit the set of -/// cases when it applies. -/// -/// The source pattern can contain only a single instruction. -pub(crate) struct Transform { - pub src: DefIndex, - pub dst: Vec, - pub var_pool: VarPool, - pub def_pool: DefPool, - pub block_pool: BlockPool, - pub const_pool: ConstPool, - pub type_env: TypeEnvironment, -} - -type SymbolTable = HashMap; - -impl Transform { - fn new(src: DummyDef, dst: Vec) -> Self { - let mut var_pool = VarPool::new(); - let mut def_pool = DefPool::new(); - let mut block_pool = BlockPool::new(); - let mut const_pool = ConstPool::new(); - - let mut input_vars: Vec = Vec::new(); - let mut defined_vars: Vec = Vec::new(); - - // Maps variable names to our own Var copies. - let mut symbol_table: SymbolTable = SymbolTable::new(); - - // Rewrite variables in src and dst using our own copies. - let src = rewrite_def_list( - PatternPosition::Source, - vec![src], - &mut symbol_table, - &mut input_vars, - &mut defined_vars, - &mut var_pool, - &mut def_pool, - &mut block_pool, - &mut const_pool, - )[0]; - - let num_src_inputs = input_vars.len(); - - let dst = rewrite_def_list( - PatternPosition::Destination, - dst, - &mut symbol_table, - &mut input_vars, - &mut defined_vars, - &mut var_pool, - &mut def_pool, - &mut block_pool, - &mut const_pool, - ); - - // Sanity checks. - for &var_index in &input_vars { - assert!( - var_pool.get(var_index).is_input(), - "'{:?}' used as both input and def", - var_pool.get(var_index) - ); - } - assert!( - input_vars.len() == num_src_inputs, - "extra input vars in dst pattern: {:?}", - input_vars - .iter() - .map(|&i| var_pool.get(i)) - .skip(num_src_inputs) - .collect::>() - ); - - // Perform type inference and cleanup. - let type_env = infer_transform(src, &dst, &def_pool, &mut var_pool).unwrap(); - - // Sanity check: the set of inferred free type variables should be a subset of the type - // variables corresponding to Vars appearing in the source pattern. - { - let free_typevars: HashSet = - HashSet::from_iter(type_env.free_typevars(&mut var_pool)); - let src_tvs = HashSet::from_iter( - input_vars - .clone() - .iter() - .chain( - defined_vars - .iter() - .filter(|&&var_index| !var_pool.get(var_index).is_temp()), - ) - .map(|&var_index| var_pool.get(var_index).get_typevar()) - .filter(|maybe_var| maybe_var.is_some()) - .map(|var| var.unwrap()), - ); - if !free_typevars.is_subset(&src_tvs) { - let missing_tvs = (&free_typevars - &src_tvs) - .iter() - .map(|tv| tv.name.clone()) - .collect::>() - .join(", "); - panic!("Some free vars don't appear in src: {}", missing_tvs); - } - } - - for &var_index in input_vars.iter().chain(defined_vars.iter()) { - let var = var_pool.get_mut(var_index); - let canon_tv = type_env.get_equivalent(&var.get_or_create_typevar()); - var.set_typevar(canon_tv); - } - - Self { - src, - dst, - var_pool, - def_pool, - block_pool, - const_pool, - type_env, - } - } - - fn verify_legalize(&self) { - let def = self.def_pool.get(self.src); - for &var_index in def.defined_vars.iter() { - let defined_var = self.var_pool.get(var_index); - assert!( - defined_var.is_output(), - "{:?} not defined in the destination pattern", - defined_var - ); - } - } -} - -/// Inserts, if not present, a name in the `symbol_table`. Then returns its index in the variable -/// pool `var_pool`. If the variable was not present in the symbol table, then add it to the list of -/// `defined_vars`. -fn var_index( - name: &str, - symbol_table: &mut SymbolTable, - defined_vars: &mut Vec, - var_pool: &mut VarPool, -) -> VarIndex { - let name = name.to_string(); - match symbol_table.get(&name) { - Some(&existing_var) => existing_var, - None => { - // Materialize the variable. - let new_var = var_pool.create(name.clone()); - symbol_table.insert(name, new_var); - defined_vars.push(new_var); - new_var - } - } -} - -/// Given a list of symbols defined in a Def, rewrite them to local symbols. Yield the new locals. -fn rewrite_defined_vars( - position: PatternPosition, - dummy_def: &DummyDef, - def_index: DefIndex, - symbol_table: &mut SymbolTable, - defined_vars: &mut Vec, - var_pool: &mut VarPool, -) -> Vec { - let mut new_defined_vars = Vec::new(); - for var in &dummy_def.defined_vars { - let own_var = var_index(&var.name, symbol_table, defined_vars, var_pool); - var_pool.get_mut(own_var).set_def(position, def_index); - new_defined_vars.push(own_var); - } - new_defined_vars -} - -/// Find all uses of variables in `expr` and replace them with our own local symbols. -fn rewrite_expr( - position: PatternPosition, - dummy_expr: DummyExpr, - symbol_table: &mut SymbolTable, - input_vars: &mut Vec, - var_pool: &mut VarPool, - const_pool: &mut ConstPool, -) -> Apply { - let (apply_target, dummy_args) = if let DummyExpr::Apply(apply_target, dummy_args) = dummy_expr - { - (apply_target, dummy_args) - } else { - panic!("we only rewrite apply expressions"); - }; - - assert_eq!( - apply_target.inst().operands_in.len(), - dummy_args.len(), - "number of arguments in instruction {} is incorrect\nexpected: {:?}", - apply_target.inst().name, - apply_target - .inst() - .operands_in - .iter() - .map(|operand| format!("{}: {}", operand.name, operand.kind.rust_type)) - .collect::>(), - ); - - let mut args = Vec::new(); - for (i, arg) in dummy_args.into_iter().enumerate() { - match arg { - DummyExpr::Var(var) => { - let own_var = var_index(&var.name, symbol_table, input_vars, var_pool); - let var = var_pool.get(own_var); - assert!( - var.is_input() || var.get_def(position).is_some(), - "{:?} used as both input and def", - var - ); - args.push(Expr::Var(own_var)); - } - DummyExpr::Literal(literal) => { - assert!(!apply_target.inst().operands_in[i].is_value()); - args.push(Expr::Literal(literal)); - } - DummyExpr::Constant(constant) => { - let const_name = const_pool.insert(constant.0); - // Here we abuse var_index by passing an empty, immediately-dropped vector to - // `defined_vars`; the reason for this is that unlike the `Var` case above, - // constants will create a variable that is not an input variable (it is tracked - // instead by ConstPool). - let const_var = var_index(&const_name, symbol_table, &mut vec![], var_pool); - args.push(Expr::Var(const_var)); - } - DummyExpr::Apply(..) => { - panic!("Recursive apply is not allowed."); - } - DummyExpr::Block(_block) => { - panic!("Blocks are not valid arguments."); - } - } - } - - Apply::new(apply_target, args) -} - -#[allow(clippy::too_many_arguments)] -fn rewrite_def_list( - position: PatternPosition, - dummy_defs: Vec, - symbol_table: &mut SymbolTable, - input_vars: &mut Vec, - defined_vars: &mut Vec, - var_pool: &mut VarPool, - def_pool: &mut DefPool, - block_pool: &mut BlockPool, - const_pool: &mut ConstPool, -) -> Vec { - let mut new_defs = Vec::new(); - // Register variable names of new blocks first as a block name can be used to jump forward. Thus - // the name has to be registered first to avoid misinterpreting it as an input-var. - for dummy_def in dummy_defs.iter() { - if let DummyExpr::Block(ref var) = dummy_def.expr { - var_index(&var.name, symbol_table, defined_vars, var_pool); - } - } - - // Iterate over the definitions and blocks, to map variables names to inputs or outputs. - for dummy_def in dummy_defs { - let def_index = def_pool.next_index(); - - let new_defined_vars = rewrite_defined_vars( - position, - &dummy_def, - def_index, - symbol_table, - defined_vars, - var_pool, - ); - if let DummyExpr::Block(var) = dummy_def.expr { - let var_index = *symbol_table - .get(&var.name) - .or_else(|| { - panic!( - "Block {} was not registered during the first visit", - var.name - ) - }) - .unwrap(); - var_pool.get_mut(var_index).set_def(position, def_index); - block_pool.create_block(var_index, def_index); - } else { - let new_apply = rewrite_expr( - position, - dummy_def.expr, - symbol_table, - input_vars, - var_pool, - const_pool, - ); - - assert!( - def_pool.next_index() == def_index, - "shouldn't have created new defs in the meanwhile" - ); - assert_eq!( - new_apply.inst.value_results.len(), - new_defined_vars.len(), - "number of Var results in instruction is incorrect" - ); - - new_defs.push(def_pool.create_inst(new_apply, new_defined_vars)); - } - } - new_defs -} - -/// A group of related transformations. -pub(crate) struct TransformGroup { - pub name: &'static str, - pub doc: &'static str, - pub chain_with: Option, - pub isa_name: Option<&'static str>, - pub id: TransformGroupIndex, - - /// Maps Instruction camel_case names to custom legalization functions names. - pub custom_legalizes: HashMap, - pub transforms: Vec, -} - -impl TransformGroup { - pub fn rust_name(&self) -> String { - match self.isa_name { - Some(_) => { - // This is a function in the same module as the LEGALIZE_ACTIONS table referring to - // it. - self.name.to_string() - } - None => format!("crate::legalizer::{}", self.name), - } - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct TransformGroupIndex(u32); -entity_impl!(TransformGroupIndex); - -pub(crate) struct TransformGroupBuilder { - name: &'static str, - doc: &'static str, - chain_with: Option, - isa_name: Option<&'static str>, - pub custom_legalizes: HashMap, - pub transforms: Vec, -} - -impl TransformGroupBuilder { - pub fn new(name: &'static str, doc: &'static str) -> Self { - Self { - name, - doc, - chain_with: None, - isa_name: None, - custom_legalizes: HashMap::new(), - transforms: Vec::new(), - } - } - - pub fn chain_with(mut self, next_id: TransformGroupIndex) -> Self { - assert!(self.chain_with.is_none()); - self.chain_with = Some(next_id); - self - } - - pub fn isa(mut self, isa_name: &'static str) -> Self { - assert!(self.isa_name.is_none()); - self.isa_name = Some(isa_name); - self - } - - /// Add a custom legalization action for `inst`. - /// - /// The `func_name` parameter is the fully qualified name of a Rust function which takes the - /// same arguments as the `isa::Legalize` actions. - /// - /// The custom function will be called to legalize `inst` and any return value is ignored. - pub fn custom_legalize(&mut self, inst: &Instruction, func_name: &'static str) { - assert!( - self.custom_legalizes - .insert(inst.camel_name.clone(), func_name) - .is_none(), - "custom legalization action for {} inserted twice", - inst.name - ); - } - - /// Add a legalization pattern to this group. - pub fn legalize(&mut self, src: DummyDef, dst: Vec) { - let transform = Transform::new(src, dst); - transform.verify_legalize(); - self.transforms.push(transform); - } - - pub fn build_and_add_to(self, owner: &mut TransformGroups) -> TransformGroupIndex { - let next_id = owner.next_key(); - owner.add(TransformGroup { - name: self.name, - doc: self.doc, - isa_name: self.isa_name, - id: next_id, - chain_with: self.chain_with, - custom_legalizes: self.custom_legalizes, - transforms: self.transforms, - }) - } -} - -pub(crate) struct TransformGroups { - groups: PrimaryMap, -} - -impl TransformGroups { - pub fn new() -> Self { - Self { - groups: PrimaryMap::new(), - } - } - pub fn add(&mut self, new_group: TransformGroup) -> TransformGroupIndex { - for group in self.groups.values() { - assert!( - group.name != new_group.name, - "trying to insert {} for the second time", - new_group.name - ); - } - self.groups.push(new_group) - } - pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup { - &self.groups[id] - } - fn next_key(&self) -> TransformGroupIndex { - self.groups.next_key() - } - pub fn by_name(&self, name: &'static str) -> &TransformGroup { - for group in self.groups.values() { - if group.name == name { - return group; - } - } - panic!("transform group with name {} not found", name); - } -} - -#[test] -#[should_panic] -fn test_double_custom_legalization() { - use crate::cdsl::formats::InstructionFormatBuilder; - use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder}; - - let nullary = InstructionFormatBuilder::new("nullary").build(); - - let mut dummy_all = AllInstructions::new(); - let mut inst_group = InstructionGroupBuilder::new(&mut dummy_all); - inst_group.push(InstructionBuilder::new("dummy", "doc", &nullary)); - - let inst_group = inst_group.build(); - let dummy_inst = inst_group.by_name("dummy"); - - let mut transform_group = TransformGroupBuilder::new("test", "doc"); - transform_group.custom_legalize(&dummy_inst, "custom 1"); - transform_group.custom_legalize(&dummy_inst, "custom 2"); -} diff --git a/cranelift/codegen/meta/src/gen_binemit.rs b/cranelift/codegen/meta/src/gen_binemit.rs deleted file mode 100644 index f67aa9b5a9..0000000000 --- a/cranelift/codegen/meta/src/gen_binemit.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! Generate binary emission code for each ISA. - -use cranelift_entity::EntityRef; - -use crate::error; -use crate::srcgen::Formatter; - -use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes}; - -/// Generate code to handle a single recipe. -/// -/// - Unpack the instruction data, knowing the format. -/// - Determine register locations for operands with register constraints. -/// - Determine stack slot locations for operands with stack constraints. -/// - Call hand-written code for the actual emission. -fn gen_recipe(recipe: &EncodingRecipe, fmt: &mut Formatter) { - let inst_format = &recipe.format; - let num_value_ops = inst_format.num_value_operands; - - // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value - // list. - let want_args = inst_format.has_value_list - || recipe.operands_in.iter().any(|c| match c { - OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, - OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, - }); - assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list); - - let want_outs = recipe.operands_out.iter().any(|c| match c { - OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, - OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, - }); - - let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name); - - // Unpack the instruction data. - fmtln!(fmt, "if let InstructionData::{} {{", inst_format.name); - fmt.indent(|fmt| { - fmt.line("opcode,"); - for f in &inst_format.imm_fields { - fmtln!(fmt, "{},", f.member); - } - if want_args { - if inst_format.has_value_list || num_value_ops > 1 { - fmt.line("ref args,"); - } else { - fmt.line("arg,"); - } - } - fmt.line(".."); - - fmt.outdented_line("} = *inst_data {"); - - // Pass recipe arguments in this order: inputs, imm_fields, outputs. - let mut args = String::new(); - - if want_args && !is_regmove { - if inst_format.has_value_list { - fmt.line("let args = args.as_slice(&func.dfg.value_lists);"); - } else if num_value_ops == 1 { - fmt.line("let args = [arg];"); - } - args += &unwrap_values(&recipe.operands_in, "in", "args", fmt); - } - - for f in &inst_format.imm_fields { - args += &format!(", {}", f.member); - } - - // Unwrap interesting output arguments. - if want_outs { - if recipe.operands_out.len() == 1 { - fmt.line("let results = [func.dfg.first_result(inst)];") - } else { - fmt.line("let results = func.dfg.inst_results(inst);"); - } - args += &unwrap_values(&recipe.operands_out, "out", "results", fmt); - } - - // Optimization: Only update the register diversion tracker for regmove instructions. - if is_regmove { - fmt.line("divert.apply(inst_data);") - } - - match &recipe.emit { - Some(emit) => { - fmt.multi_line(emit); - fmt.line("return;"); - } - None => { - fmtln!( - fmt, - "return recipe_{}(func, inst, sink, bits{});", - recipe.name.to_lowercase(), - args - ); - } - } - }); - fmt.line("}"); -} - -/// Emit code that unwraps values living in registers or stack slots. -/// -/// :param args: Input or output constraints. -/// :param prefix: Prefix to be used for the generated local variables. -/// :param values: Name of slice containing the values to be unwrapped. -/// :returns: Comma separated list of the generated variables -fn unwrap_values( - args: &[OperandConstraint], - prefix: &str, - values_slice: &str, - fmt: &mut Formatter, -) -> String { - let mut varlist = String::new(); - for (i, cst) in args.iter().enumerate() { - match cst { - OperandConstraint::RegClass(_reg_class) => { - let v = format!("{}_reg{}", prefix, i); - varlist += &format!(", {}", v); - fmtln!( - fmt, - "let {} = divert.reg({}[{}], &func.locations);", - v, - values_slice, - i - ); - } - OperandConstraint::Stack(stack) => { - let v = format!("{}_stk{}", prefix, i); - varlist += &format!(", {}", v); - fmtln!(fmt, "let {} = StackRef::masked(", v); - fmt.indent(|fmt| { - fmtln!( - fmt, - "divert.stack({}[{}], &func.locations),", - values_slice, - i - ); - fmt.line(format!("{},", stack.stack_base_mask())); - fmt.line("&func.stack_slots,"); - }); - fmt.line(").unwrap();"); - } - _ => {} - } - } - varlist -} - -fn gen_isa(isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) { - fmt.doc_comment(format!( - "Emit binary machine code for `inst` for the {} ISA.", - isa_name - )); - - if recipes.is_empty() { - fmt.line("pub fn emit_inst("); - fmt.indent(|fmt| { - fmt.line("func: &Function,"); - fmt.line("inst: Inst,"); - fmt.line("_divert: &mut RegDiversions,"); - fmt.line("_sink: &mut CS,"); - fmt.line("_isa: &dyn TargetIsa,"); - }); - fmt.line(") {"); - fmt.indent(|fmt| { - // No encoding recipes: Emit a stub. - fmt.line("bad_encoding(func, inst)"); - }); - fmt.line("}"); - return; - } - - fmt.line("#[allow(unused_variables, unreachable_code)]"); - fmt.line("pub fn emit_inst("); - fmt.indent(|fmt| { - fmt.line("func: &Function,"); - fmt.line("inst: Inst,"); - fmt.line("divert: &mut RegDiversions,"); - fmt.line("sink: &mut CS,"); - fmt.line("isa: &dyn TargetIsa,") - }); - - fmt.line(") {"); - fmt.indent(|fmt| { - fmt.line("let encoding = func.encodings[inst];"); - fmt.line("let bits = encoding.bits();"); - fmt.line("let inst_data = &func.dfg[inst];"); - fmt.line("match encoding.recipe() {"); - fmt.indent(|fmt| { - for (i, recipe) in recipes.iter() { - fmt.comment(format!("Recipe {}", recipe.name)); - fmtln!(fmt, "{} => {{", i.index()); - fmt.indent(|fmt| { - gen_recipe(recipe, fmt); - }); - fmt.line("}"); - } - fmt.line("_ => {},"); - }); - fmt.line("}"); - - // Allow for unencoded ghost instructions. The verifier will check details. - fmt.line("if encoding.is_legal() {"); - fmt.indent(|fmt| { - fmt.line("bad_encoding(func, inst);"); - }); - fmt.line("}"); - }); - fmt.line("}"); -} - -pub(crate) fn generate( - isa_name: &str, - recipes: &Recipes, - binemit_filename: &str, - out_dir: &str, -) -> Result<(), error::Error> { - let mut fmt = Formatter::new(); - gen_isa(isa_name, recipes, &mut fmt); - fmt.update_file(binemit_filename, out_dir)?; - Ok(()) -} diff --git a/cranelift/codegen/meta/src/gen_encodings.rs b/cranelift/codegen/meta/src/gen_encodings.rs deleted file mode 100644 index d7bb289bd2..0000000000 --- a/cranelift/codegen/meta/src/gen_encodings.rs +++ /dev/null @@ -1,1139 +0,0 @@ -//! Generate sources for instruction encoding. -//! -//! The tables and functions generated here support the `TargetISA::encode()` function which -//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a -//! *recipe* and some *encoding* bits. -//! -//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a -//! corresponding hand-written function to do that after registers are allocated. -//! -//! This is the information available to us: -//! -//! - The instruction to be encoded as an `InstructionData` reference. -//! - The controlling type variable. -//! - The data-flow graph giving us access to the types of all values involved. This is needed for -//! testing any secondary type variables. -//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates. -//! - The currently active CPU mode is determined by the ISA. -//! -//! ## Level 1 table lookup -//! -//! The CPU mode provides the first table. The key is the instruction's controlling type variable. -//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values -//! are level 2 tables. -//! -//! ## Level 2 table lookup -//! -//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*. -//! -//! The two-level table lookup allows the level 2 tables to be much smaller with good locality. -//! Code in any given function usually only uses a few different types, so many of the level 2 -//! tables will be cold. -//! -//! ## Encoding lists -//! -//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms: -//! -//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied. -//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied. -//! Otherwise, stop with the default legalization code. -//! 3. Stop with legalization code. -//! 4. Predicate + skip count. Test predicate and skip N entries if it is false. -//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false. -//! -//! The instruction predicate is also used to distinguish between polymorphic instructions with -//! different types for secondary type variables. - -use std::collections::btree_map; -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::convert::TryFrom; -use std::iter::FromIterator; - -use cranelift_codegen_shared::constant_hash::generate_table; -use cranelift_entity::EntityRef; - -use crate::error; -use crate::srcgen::Formatter; - -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::encodings::Encoding; -use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber}; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register}; -use crate::cdsl::regs::IsaRegs; -use crate::cdsl::settings::SettingPredicateNumber; -use crate::cdsl::types::ValueType; -use crate::cdsl::xform::TransformGroupIndex; - -use crate::shared::Definitions as SharedDefinitions; - -use crate::default_map::MapWithDefault; -use crate::unique_table::UniqueSeqTable; - -/// Emit code for matching an instruction predicate against an `InstructionData` reference called -/// `inst`. -/// -/// The generated code is an `if let` pattern match that falls through if the instruction has an -/// unexpected format. This should lead to a panic. -fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) { - if let Some(type_predicate) = instp.type_predicate("func") { - fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); - fmt.line(type_predicate); - return; - } - - let leaves = instp.collect_leaves(); - - let mut has_type_check = false; - let mut format_name = None; - let mut field_names = HashSet::new(); - - for leaf in leaves { - if leaf.is_type_predicate() { - has_type_check = true; - } else { - field_names.insert(leaf.format_destructuring_member_name()); - let leaf_format_name = leaf.format_name(); - match format_name { - None => format_name = Some(leaf_format_name), - Some(previous_format_name) => { - assert!( - previous_format_name == leaf_format_name, - "Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name - ); - } - } - } - } - - let mut fields = Vec::from_iter(field_names); - fields.sort(); - let fields = fields.join(", "); - - let format_name = format_name.expect("There should be a format name!"); - - fmtln!( - fmt, - "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{", - format_name, - fields - ); - fmt.indent(|fmt| { - if has_type_check { - // We could implement this. - assert!(has_func, "recipe predicates can't check type variables."); - fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); - } else if has_func { - // Silence dead argument. - fmt.line("let _ = func;"); - } - fmtln!(fmt, "return {};", instp.rust_predicate("func").unwrap()); - }); - fmtln!(fmt, "}"); - - fmt.line("unreachable!();"); -} - -/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES` -/// array indexed by recipe number. -/// -/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many -/// recipes have identical predicates. -fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) { - let mut predicate_names = HashMap::new(); - - fmt.comment(format!("{} recipe predicates.", isa.name)); - for recipe in isa.recipes.values() { - let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) { - (None, None) => continue, - (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue, - (isap, instp) => (isap, instp), - }; - - let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase()); - predicate_names.insert((isap, instp), func_name.clone()); - - // Generate the predicate function. - fmtln!( - fmt, - "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{", - func_name, - if isap.is_some() { "isap" } else { "_" }, - if instp.is_some() { "inst" } else { "_" } - ); - fmt.indent(|fmt| { - match (isap, instp) { - (Some(isap), None) => { - fmtln!(fmt, "isap.test({})", isap); - } - (None, Some(instp)) => { - emit_instp(instp, /* has func */ false, fmt); - } - (Some(isap), Some(instp)) => { - fmtln!(fmt, "isap.test({}) &&", isap); - emit_instp(instp, /* has func */ false, fmt); - } - _ => panic!("skipped above"), - } - }); - fmtln!(fmt, "}"); - } - fmt.empty_line(); - - // Generate the static table. - fmt.doc_comment(format!( - r#"{} recipe predicate table. - - One entry per recipe, set to Some only when the recipe is guarded by a predicate."#, - isa.name - )); - fmtln!( - fmt, - "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - match (&recipe.isa_predicate, &recipe.inst_predicate) { - (None, None) => fmt.line("None,"), - key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()), - } - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Emit private functions for matching instruction predicates as well as a static -/// `INST_PREDICATES` array indexed by predicate number. -fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.comment(format!("{} instruction predicates.", isa.name)); - for (id, instp) in isa.encodings_predicates.iter() { - fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index()); - fmt.indent(|fmt| { - emit_instp(instp, /* has func */ true, fmt); - }); - fmtln!(fmt, "}"); - } - fmt.empty_line(); - - // Generate the static table. - fmt.doc_comment(format!( - r#"{} instruction predicate table. - - One entry per instruction predicate, so the encoding bytecode can embed indexes into this - table."#, - isa.name - )); - fmtln!( - fmt, - "pub static INST_PREDICATES: [InstPredicate; {}] = [", - isa.encodings_predicates.len() - ); - fmt.indent(|fmt| { - for id in isa.encodings_predicates.keys() { - fmtln!(fmt, "inst_predicate_{},", id.index()); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Emit a table of encoding recipe names keyed by recipe number. -/// -/// This is used for pretty-printing encodings. -fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.doc_comment(format!( - r#"{} recipe names, using the same recipe index spaces as the one specified by the - corresponding binemit file."#, - isa.name - )); - fmtln!( - fmt, - "static RECIPE_NAMES: [&str; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - fmtln!(fmt, r#""{}","#, recipe.name); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Returns a set of all the registers involved in fixed register constraints. -fn get_fixed_registers(operands_in: &[OperandConstraint]) -> HashSet { - HashSet::from_iter( - operands_in - .iter() - .map(|constraint| { - if let OperandConstraint::FixedReg(reg) = &constraint { - Some(*reg) - } else { - None - } - }) - .filter(|opt| opt.is_some()) - .map(|opt| opt.unwrap()), - ) -} - -/// Emit a struct field initializer for an array of operand constraints. -/// -/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on -/// inputs, fixed_registers must contain the fixed output registers). -fn emit_operand_constraints( - registers: &IsaRegs, - recipe: &EncodingRecipe, - constraints: &[OperandConstraint], - field_name: &'static str, - tied_operands: &HashMap, - fixed_registers: &HashSet, - fmt: &mut Formatter, -) { - if constraints.is_empty() { - fmtln!(fmt, "{}: &[],", field_name); - return; - } - - fmtln!(fmt, "{}: &[", field_name); - fmt.indent(|fmt| { - for (n, constraint) in constraints.iter().enumerate() { - fmt.line("OperandConstraint {"); - fmt.indent(|fmt| { - match constraint { - OperandConstraint::RegClass(reg_class) => { - if let Some(tied_input) = tied_operands.get(&n) { - fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); - } else { - fmt.line("kind: ConstraintKind::Reg,"); - } - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[*reg_class].name - ); - } - OperandConstraint::FixedReg(reg) => { - assert!(!tied_operands.contains_key(&n), "can't tie fixed registers"); - let constraint_kind = if fixed_registers.contains(®) { - "FixedTied" - } else { - "FixedReg" - }; - fmtln!( - fmt, - "kind: ConstraintKind::{}({}),", - constraint_kind, - reg.unit - ); - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[reg.regclass].name - ); - } - OperandConstraint::TiedInput(tied_input) => { - // This is a tied output constraint. It should never happen - // for input constraints. - assert!( - tied_input == tied_operands.get(&n).unwrap(), - "invalid tied constraint" - ); - fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); - - let tied_class = if let OperandConstraint::RegClass(tied_class) = - recipe.operands_in[*tied_input] - { - tied_class - } else { - panic!("tied constraints relate only to register inputs"); - }; - - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[tied_class].name - ); - } - OperandConstraint::Stack(stack) => { - assert!(!tied_operands.contains_key(&n), "can't tie stack operand"); - fmt.line("kind: ConstraintKind::Stack,"); - fmtln!( - fmt, - "regclass: &{}_DATA,", - registers.classes[stack.regclass].name - ); - } - } - }); - fmt.line("},"); - } - }); - fmtln!(fmt, "],"); -} - -/// Emit a table of encoding recipe operand constraints keyed by recipe number. -/// -/// These are used by the register allocator to pick registers that can be properly encoded. -fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.doc_comment(format!( - r#"{} recipe constraints list, using the same recipe index spaces as the one - specified by the corresponding binemit file. These constraints are used by register - allocation to select the right location to use for input and output values."#, - isa.name - )); - fmtln!( - fmt, - "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - // Compute a mapping of tied operands in both directions (input tied to outputs and - // conversely). - let mut tied_in_to_out = HashMap::new(); - let mut tied_out_to_in = HashMap::new(); - for (out_index, constraint) in recipe.operands_out.iter().enumerate() { - if let OperandConstraint::TiedInput(in_index) = &constraint { - tied_in_to_out.insert(*in_index, out_index); - tied_out_to_in.insert(out_index, *in_index); - } - } - - // Find the sets of registers involved in fixed register constraints. - let fixed_inputs = get_fixed_registers(&recipe.operands_in); - let fixed_outputs = get_fixed_registers(&recipe.operands_out); - - fmt.comment(format!("Constraints for recipe {}:", recipe.name)); - fmt.line("RecipeConstraints {"); - fmt.indent(|fmt| { - emit_operand_constraints( - &isa.regs, - recipe, - &recipe.operands_in, - "ins", - &tied_in_to_out, - &fixed_outputs, - fmt, - ); - emit_operand_constraints( - &isa.regs, - recipe, - &recipe.operands_out, - "outs", - &tied_out_to_in, - &fixed_inputs, - fmt, - ); - fmtln!( - fmt, - "fixed_ins: {},", - if !fixed_inputs.is_empty() { - "true" - } else { - "false" - } - ); - fmtln!( - fmt, - "fixed_outs: {},", - if !fixed_outputs.is_empty() { - "true" - } else { - "false" - } - ); - fmtln!( - fmt, - "tied_ops: {},", - if !tied_in_to_out.is_empty() { - "true" - } else { - "false" - } - ); - fmtln!( - fmt, - "clobbers_flags: {},", - if recipe.clobbers_flags { - "true" - } else { - "false" - } - ); - }); - fmt.line("},"); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Emit a table of encoding recipe code size information. -fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) { - fmt.doc_comment(format!( - r#"{} recipe sizing descriptors, using the same recipe index spaces as the one - specified by the corresponding binemit file. These are used to compute the final size of an - instruction, as well as to compute the range of branches."#, - isa.name - )); - fmtln!( - fmt, - "static RECIPE_SIZING: [RecipeSizing; {}] = [", - isa.recipes.len() - ); - fmt.indent(|fmt| { - for recipe in isa.recipes.values() { - fmt.comment(format!("Code size information for recipe {}:", recipe.name)); - fmt.line("RecipeSizing {"); - fmt.indent(|fmt| { - fmtln!(fmt, "base_size: {},", recipe.base_size); - fmtln!(fmt, "compute_size: {},", recipe.compute_size); - if let Some(range) = &recipe.branch_range { - fmtln!( - fmt, - "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),", - range.inst_size, - range.range - ); - } else { - fmt.line("branch_range: None,"); - } - }); - fmt.line("},"); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); -} - -/// Level 1 table mapping types to `Level2` objects. -struct Level1Table<'cpu_mode> { - cpu_mode: &'cpu_mode CpuMode, - legalize_code: TransformGroupIndex, - - table_map: HashMap, usize>, - table_vec: Vec, -} - -impl<'cpu_mode> Level1Table<'cpu_mode> { - fn new(cpu_mode: &'cpu_mode CpuMode) -> Self { - Self { - cpu_mode, - legalize_code: cpu_mode.get_default_legalize_code(), - table_map: HashMap::new(), - table_vec: Vec::new(), - } - } - - /// Returns the level2 table for the given type; None means monomorphic, in this context. - fn l2table_for(&mut self, typ: Option) -> &mut Level2Table { - let cpu_mode = &self.cpu_mode; - let index = match self.table_map.get(&typ) { - Some(&index) => index, - None => { - let legalize_code = cpu_mode.get_legalize_code_for(&typ); - let table = Level2Table::new(typ.clone(), legalize_code); - let index = self.table_vec.len(); - self.table_map.insert(typ, index); - self.table_vec.push(table); - index - } - }; - self.table_vec.get_mut(index).unwrap() - } - - fn l2tables(&mut self) -> Vec<&mut Level2Table> { - self.table_vec - .iter_mut() - .filter(|table| !table.is_empty()) - .collect::>() - } -} - -struct Level2HashTableEntry { - inst_name: String, - offset: usize, -} - -/// Level 2 table mapping instruction opcodes to `EncList` objects. -/// -/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`. -struct Level2Table { - typ: Option, - legalize_code: TransformGroupIndex, - inst_to_encodings: BTreeMap, - hash_table_offset: Option, - hash_table_len: Option, -} - -impl Level2Table { - fn new(typ: Option, legalize_code: TransformGroupIndex) -> Self { - Self { - typ, - legalize_code, - inst_to_encodings: BTreeMap::new(), - hash_table_offset: None, - hash_table_len: None, - } - } - - fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList { - let copied_typ = self.typ.clone(); - self.inst_to_encodings - .entry(inst.name.clone()) - .or_insert_with(|| EncodingList::new(inst, copied_typ)) - } - - fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> { - self.inst_to_encodings.values_mut() - } - - fn is_empty(&self) -> bool { - self.inst_to_encodings.is_empty() - } - - fn layout_hashtable( - &mut self, - level2_hashtables: &mut Vec>, - level2_doc: &mut HashMap>, - ) { - let hash_table = generate_table( - self.inst_to_encodings.values(), - self.inst_to_encodings.len(), - // TODO the Python code wanted opcode numbers to start from 1. - |enc_list| enc_list.inst.opcode_number.index() + 1, - ); - - let hash_table_offset = level2_hashtables.len(); - let hash_table_len = hash_table.len(); - - assert!(self.hash_table_offset.is_none()); - assert!(self.hash_table_len.is_none()); - self.hash_table_offset = Some(hash_table_offset); - self.hash_table_len = Some(hash_table_len); - - level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| { - opt_enc_list.map(|enc_list| Level2HashTableEntry { - inst_name: enc_list.inst.camel_name.clone(), - offset: enc_list.offset.unwrap(), - }) - })); - - let typ_comment = match &self.typ { - Some(ty) => ty.to_string(), - None => "typeless".into(), - }; - - level2_doc.get_or_default(hash_table_offset).push(format!( - "{:06x}: {}, {} entries", - hash_table_offset, typ_comment, hash_table_len - )); - } -} - -/// The u16 values in an encoding list entry are interpreted as follows: -/// -/// NR = len(all_recipes) -/// -/// entry < 2*NR -/// Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied. -/// If bit 0 is set, stop with the default legalization code. -/// If bit 0 is clear, keep going down the list. -/// entry < PRED_START -/// Stop with legalization code `entry - 2*NR`. -/// -/// Remaining entries are interpreted as (skip, pred) pairs, where: -/// -/// skip = (entry - PRED_START) >> PRED_BITS -/// pred = (entry - PRED_START) & PRED_MASK -/// -/// If the predicate is satisfied, keep going. Otherwise skip over the next -/// `skip` entries. If skip == 0, stop with the default legalization code. -/// -/// The `pred` predicate number is interpreted as an instruction predicate if it -/// is in range, otherwise an ISA predicate. - -/// Encoding lists are represented as u16 arrays. -const CODE_BITS: usize = 16; - -/// Beginning of the predicate code words. -const PRED_START: u16 = 0x1000; - -/// Number of bits used to hold a predicate number (instruction + ISA predicates). -const PRED_BITS: usize = 12; - -/// Mask for extracting the predicate number. -const PRED_MASK: usize = (1 << PRED_BITS) - 1; - -/// Encoder for the list format above. -struct Encoder { - num_instruction_predicates: usize, - - /// u16 encoding list words. - words: Vec, - - /// Documentation comments: Index into `words` + comment. - docs: Vec<(usize, String)>, -} - -impl Encoder { - fn new(num_instruction_predicates: usize) -> Self { - Self { - num_instruction_predicates, - words: Vec::new(), - docs: Vec::new(), - } - } - - /// Add a recipe+bits entry to the list. - fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) { - let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16; - assert!(code < PRED_START); - - let doc = format!( - "--> {}{}", - enc.to_rust_comment(recipes), - if is_final { " and stop" } else { "" } - ); - self.docs.push((self.words.len(), doc)); - - self.words.push(code); - self.words.push(enc.encbits); - } - - /// Add a predicate entry. - fn pred(&mut self, pred_comment: String, skip: usize, n: usize) { - assert!(n <= PRED_MASK); - let entry = (PRED_START as usize) + (n | (skip << PRED_BITS)); - assert!(entry < (1 << CODE_BITS)); - let entry = entry as u16; - - let doc = if skip == 0 { - "stop".to_string() - } else { - format!("skip {}", skip) - }; - let doc = format!("{} unless {}", doc, pred_comment); - - self.docs.push((self.words.len(), doc)); - self.words.push(entry); - } - - /// Add an instruction predicate entry. - fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) { - let number = pred.index(); - let pred_comment = format!("inst_predicate_{}", number); - self.pred(pred_comment, skip, number); - } - - /// Add an ISA predicate entry. - fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) { - // ISA predicates follow the instruction predicates. - let n = self.num_instruction_predicates + (pred as usize); - let pred_comment = format!("PredicateView({})", pred); - self.pred(pred_comment, skip, n); - } -} - -/// List of instructions for encoding a given type + opcode pair. -/// -/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16 -/// values. -struct EncodingList { - inst: Instruction, - typ: Option, - encodings: Vec, - offset: Option, -} - -impl EncodingList { - fn new(inst: &Instruction, typ: Option) -> Self { - Self { - inst: inst.clone(), - typ, - encodings: Default::default(), - offset: None, - } - } - - /// Encode this list as a sequence of u16 numbers. - /// - /// Adds the sequence to `enc_lists` and records the returned offset as - /// `self.offset`. - /// - /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets. - fn encode( - &mut self, - isa: &TargetIsa, - cpu_mode: &CpuMode, - enc_lists: &mut UniqueSeqTable, - enc_lists_doc: &mut HashMap>, - ) { - assert!(!self.encodings.is_empty()); - - let mut encoder = Encoder::new(isa.encodings_predicates.len()); - - let mut index = 0; - while index < self.encodings.len() { - let encoding = &self.encodings[index]; - - // Try to see how many encodings are following and have the same ISA predicate and - // instruction predicate, so as to reduce the number of tests carried out by the - // encoding list interpreter.. - // - // Encodings with similar tests are hereby called a group. The group includes the - // current encoding we're looking at. - let (isa_predicate, inst_predicate) = - (&encoding.isa_predicate, &encoding.inst_predicate); - - let group_size = { - let mut group_size = 1; - while index + group_size < self.encodings.len() { - let next_encoding = &self.encodings[index + group_size]; - if &next_encoding.inst_predicate != inst_predicate - || &next_encoding.isa_predicate != isa_predicate - { - break; - } - group_size += 1; - } - group_size - }; - - let is_last_group = index + group_size == self.encodings.len(); - - // The number of entries to skip when a predicate isn't satisfied is the size of both - // predicates + the size of the group, minus one (for this predicate). Each recipe - // entry has a size of two u16 (recipe index + bits). - let mut skip = if is_last_group { - 0 - } else { - let isap_size = match isa_predicate { - Some(_) => 1, - None => 0, - }; - let instp_size = match inst_predicate { - Some(_) => 1, - None => 0, - }; - isap_size + instp_size + group_size * 2 - 1 - }; - - if let Some(pred) = isa_predicate { - encoder.isa_predicate(*pred, skip); - if !is_last_group { - skip -= 1; - } - } - - if let Some(pred) = inst_predicate { - encoder.inst_predicate(*pred, skip); - // No need to update skip, it's dead after this point. - } - - for i in 0..group_size { - let encoding = &self.encodings[index + i]; - let is_last_encoding = index + i == self.encodings.len() - 1; - encoder.recipe(&isa.recipes, encoding, is_last_encoding); - } - - index += group_size; - } - - assert!(self.offset.is_none()); - let offset = enc_lists.add(&encoder.words); - self.offset = Some(offset); - - // Doc comments. - let recipe_typ_mode_name = format!( - "{}{} ({})", - self.inst.name, - if let Some(typ) = &self.typ { - format!(".{}", typ.to_string()) - } else { - "".into() - }, - cpu_mode.name - ); - - enc_lists_doc - .get_or_default(offset) - .push(format!("{:06x}: {}", offset, recipe_typ_mode_name)); - for (pos, doc) in encoder.docs { - enc_lists_doc.get_or_default(offset + pos).push(doc); - } - enc_lists_doc - .get_or_default(offset + encoder.words.len()) - .insert(0, format!("end of {}", recipe_typ_mode_name)); - } -} - -fn make_tables(cpu_mode: &CpuMode) -> Level1Table { - let mut table = Level1Table::new(cpu_mode); - - for encoding in &cpu_mode.encodings { - table - .l2table_for(encoding.bound_type.clone()) - .enclist_for(encoding.inst()) - .encodings - .push(encoding.clone()); - } - - // Ensure there are level 1 table entries for all types with a custom legalize action. - for value_type in cpu_mode.get_legalized_types() { - table.l2table_for(Some(value_type.clone())); - } - // ... and also for monomorphic instructions. - table.l2table_for(None); - - table -} - -/// Compute encodings and doc comments for encoding lists in `level1`. -fn encode_enclists( - isa: &TargetIsa, - cpu_mode: &CpuMode, - level1: &mut Level1Table, - enc_lists: &mut UniqueSeqTable, - enc_lists_doc: &mut HashMap>, -) { - for level2 in level1.l2tables() { - for enclist in level2.enclists() { - enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc); - } - } -} - -fn encode_level2_hashtables<'a>( - level1: &'a mut Level1Table, - level2_hashtables: &mut Vec>, - level2_doc: &mut HashMap>, -) { - for level2 in level1.l2tables() { - level2.layout_hashtable(level2_hashtables, level2_doc); - } -} - -fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { - // Level 1 tables, one per CPU mode. - let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new(); - - // Single table containing all the level2 hash tables. - let mut level2_hashtables = Vec::new(); - let mut level2_doc: HashMap> = HashMap::new(); - - // Tables for encoding lists with comments. - let mut enc_lists = UniqueSeqTable::new(); - let mut enc_lists_doc = HashMap::new(); - - for cpu_mode in &isa.cpu_modes { - level2_doc - .get_or_default(level2_hashtables.len()) - .push(cpu_mode.name.into()); - - let mut level1 = make_tables(cpu_mode); - - encode_enclists( - isa, - cpu_mode, - &mut level1, - &mut enc_lists, - &mut enc_lists_doc, - ); - encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc); - - level1_tables.insert(cpu_mode.name, level1); - } - - // Compute an appropriate Rust integer type to use for offsets into a table of the given length. - let offset_type = |length: usize| { - if length <= 0x10000 { - "u16" - } else { - assert!(u32::try_from(length).is_ok(), "table too big!"); - "u32" - } - }; - - let level1_offset_type = offset_type(level2_hashtables.len()); - let level2_offset_type = offset_type(enc_lists.len()); - - // Emit encoding lists. - fmt.doc_comment( - format!(r#"{} encoding lists. - - This contains the entire encodings bytecode for every single instruction; the encodings - interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2 - table entries below."#, isa.name) - ); - fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len()); - fmt.indent(|fmt| { - let mut line = Vec::new(); - for (index, entry) in enc_lists.iter().enumerate() { - if let Some(comments) = enc_lists_doc.get(&index) { - if !line.is_empty() { - fmtln!(fmt, "{},", line.join(", ")); - line.clear(); - } - for comment in comments { - fmt.comment(comment); - } - } - line.push(format!("{:#06x}", entry)); - } - if !line.is_empty() { - fmtln!(fmt, "{},", line.join(", ")); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); - - // Emit the full concatenation of level 2 hash tables. - fmt.doc_comment(format!( - r#"{} level 2 hash tables. - - This hash table, keyed by instruction opcode, contains all the starting offsets for the - encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the - instruction's controlling type in the level 1 hash table."#, - isa.name - )); - fmtln!( - fmt, - "pub static LEVEL2: [Level2Entry<{}>; {}] = [", - level2_offset_type, - level2_hashtables.len() - ); - fmt.indent(|fmt| { - for (offset, entry) in level2_hashtables.iter().enumerate() { - if let Some(comments) = level2_doc.get(&offset) { - for comment in comments { - fmt.comment(comment); - } - } - if let Some(entry) = entry { - fmtln!( - fmt, - "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},", - entry.inst_name, - entry.offset - ); - } else { - fmt.line("Level2Entry { opcode: None, offset: 0 },"); - } - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); - - // Emit a level 1 hash table for each CPU mode. - for cpu_mode in &isa.cpu_modes { - let level1 = &level1_tables.get(cpu_mode.name).unwrap(); - let hash_table = generate_table( - level1.table_vec.iter(), - level1.table_vec.len(), - |level2_table| { - if let Some(typ) = &level2_table.typ { - typ.number().expect("type without a number") as usize - } else { - 0 - } - }, - ); - - fmt.doc_comment(format!( - r#"{} level 1 hash table for the CPU mode {}. - - This hash table, keyed by instruction controlling type, contains all the level 2 - hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating - which legalization scheme to apply when the instruction doesn't have any valid encoding for - this CPU mode. - "#, - isa.name, cpu_mode.name - )); - fmtln!( - fmt, - "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [", - cpu_mode.name.to_uppercase(), - level1_offset_type, - hash_table.len() - ); - fmt.indent(|fmt| { - for opt_level2 in hash_table { - let level2 = match opt_level2 { - None => { - // Empty hash table entry. Include the default legalization action. - fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},", - isa.translate_group_index(level1.legalize_code)); - continue; - } - Some(level2) => level2, - }; - - let legalize_comment = defs.transform_groups.get(level2.legalize_code).name; - let legalize_code = isa.translate_group_index(level2.legalize_code); - - let typ_name = if let Some(typ) = &level2.typ { - typ.rust_name() - } else { - "ir::types::INVALID".into() - }; - - if level2.is_empty() { - // Empty level 2 table: Only a specialized legalization action, no actual - // table. - // Set an offset that is out of bounds, but make sure it doesn't overflow its - // type when adding `1< 0, "Level2 hash table was too small."); - fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}", - typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment); - } - }); - fmtln!(fmt, "];"); - fmt.empty_line(); - } -} - -fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { - // Make the `RECIPE_PREDICATES` table. - emit_recipe_predicates(isa, fmt); - - // Make the `INST_PREDICATES` table. - emit_inst_predicates(isa, fmt); - - emit_encoding_tables(defs, isa, fmt); - - emit_recipe_names(isa, fmt); - emit_recipe_constraints(isa, fmt); - emit_recipe_sizing(isa, fmt); - - // Finally, tie it all together in an `EncInfo`. - fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {"); - fmt.indent(|fmt| { - fmt.line("constraints: &RECIPE_CONSTRAINTS,"); - fmt.line("sizing: &RECIPE_SIZING,"); - fmt.line("names: &RECIPE_NAMES,"); - }); - fmt.line("};"); -} - -pub(crate) fn generate( - defs: &SharedDefinitions, - isa: &TargetIsa, - filename: &str, - out_dir: &str, -) -> Result<(), error::Error> { - let mut fmt = Formatter::new(); - gen_isa(defs, isa, &mut fmt); - fmt.update_file(filename, out_dir)?; - Ok(()) -} diff --git a/cranelift/codegen/meta/src/gen_legalizer.rs b/cranelift/codegen/meta/src/gen_legalizer.rs deleted file mode 100644 index 7b56b8db48..0000000000 --- a/cranelift/codegen/meta/src/gen_legalizer.rs +++ /dev/null @@ -1,734 +0,0 @@ -//! Generate transformations to legalize instructions without encodings. -use crate::cdsl::ast::{Def, DefPool, Expr, VarPool}; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::operands::Operand; -use crate::cdsl::type_inference::Constraint; -use crate::cdsl::typevar::{TypeSet, TypeVar}; -use crate::cdsl::xform::{Transform, TransformGroup, TransformGroups}; - -use crate::error; -use crate::gen_inst::gen_typesets_table; -use crate::srcgen::Formatter; -use crate::unique_table::UniqueTable; - -use std::collections::{HashMap, HashSet}; -use std::iter::FromIterator; - -/// Given a `Def` node, emit code that extracts all the instruction fields from -/// `pos.func.dfg[iref]`. -/// -/// Create local variables named after the `Var` instances in `node`. -/// -/// Also create a local variable named `predicate` with the value of the evaluated instruction -/// predicate, or `true` if the node has no predicate. -fn unwrap_inst(transform: &Transform, fmt: &mut Formatter) -> bool { - let var_pool = &transform.var_pool; - let def_pool = &transform.def_pool; - - let def = def_pool.get(transform.src); - let apply = &def.apply; - let inst = &apply.inst; - let iform = &inst.format; - - fmt.comment(format!( - "Unwrap fields from instruction format {}", - def.to_comment_string(&transform.var_pool) - )); - - // Extract the Var arguments. - let arg_names = apply - .args - .iter() - .enumerate() - .filter(|(arg_num, _)| { - // Variable args are specially handled after extracting args. - !inst.operands_in[*arg_num].is_varargs() - }) - .map(|(arg_num, arg)| match &arg { - Expr::Var(var_index) => var_pool.get(*var_index).name.as_ref(), - Expr::Literal(_) => { - let n = inst.imm_opnums.iter().position(|&i| i == arg_num).unwrap(); - iform.imm_fields[n].member - } - }) - .collect::>() - .join(", "); - - // May we need "args" in the values consumed by predicates? - let emit_args = iform.num_value_operands >= 1 || iform.has_value_list; - - // We need a tuple: - // - if there's at least one value operand, then we emit a variable for the value, and the - // value list as args. - // - otherwise, if there's the count of immediate operands added to the presence of a value list exceeds one. - let need_tuple = if iform.num_value_operands >= 1 { - true - } else { - let mut imm_and_varargs = inst - .operands_in - .iter() - .filter(|op| op.is_immediate_or_entityref()) - .count(); - if iform.has_value_list { - imm_and_varargs += 1; - } - imm_and_varargs > 1 - }; - - let maybe_args = if emit_args { ", args" } else { "" }; - let defined_values = format!("{}{}", arg_names, maybe_args); - - let tuple_or_value = if need_tuple { - format!("({})", defined_values) - } else { - defined_values - }; - - fmtln!( - fmt, - "let {} = if let ir::InstructionData::{} {{", - tuple_or_value, - iform.name - ); - - fmt.indent(|fmt| { - // Fields are encoded directly. - for field in &iform.imm_fields { - fmtln!(fmt, "{},", field.member); - } - - if iform.has_value_list || iform.num_value_operands > 1 { - fmt.line("ref args,"); - } else if iform.num_value_operands == 1 { - fmt.line("arg,"); - } - - fmt.line(".."); - fmt.outdented_line("} = pos.func.dfg[inst] {"); - - if iform.has_value_list { - fmt.line("let args = args.as_slice(&pos.func.dfg.value_lists);"); - } else if iform.num_value_operands == 1 { - fmt.line("let args = [arg];") - } - - // Generate the values for the tuple. - let emit_one_value = - |fmt: &mut Formatter, needs_comma: bool, op_num: usize, op: &Operand| { - let comma = if needs_comma { "," } else { "" }; - if op.is_immediate_or_entityref() { - let n = inst.imm_opnums.iter().position(|&i| i == op_num).unwrap(); - fmtln!(fmt, "{}{}", iform.imm_fields[n].member, comma); - } else if op.is_value() { - let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap(); - fmtln!(fmt, "pos.func.dfg.resolve_aliases(args[{}]),", n); - } else { - // This is a value list argument or a varargs. - assert!(iform.has_value_list || op.is_varargs()); - } - }; - - if need_tuple { - fmt.line("("); - fmt.indent(|fmt| { - for (op_num, op) in inst.operands_in.iter().enumerate() { - let needs_comma = emit_args || op_num + 1 < inst.operands_in.len(); - emit_one_value(fmt, needs_comma, op_num, op); - } - if emit_args { - fmt.line("args"); - } - }); - fmt.line(")"); - } else { - // Only one of these can be true at the same time, otherwise we'd need a tuple. - emit_one_value(fmt, false, 0, &inst.operands_in[0]); - if emit_args { - fmt.line("args"); - } - } - - fmt.outdented_line("} else {"); - fmt.line(r#"unreachable!("bad instruction format")"#); - }); - fmtln!(fmt, "};"); - fmt.empty_line(); - - assert_eq!(inst.operands_in.len(), apply.args.len()); - for (i, op) in inst.operands_in.iter().enumerate() { - if op.is_varargs() { - let name = &var_pool - .get(apply.args[i].maybe_var().expect("vararg without name")) - .name; - let n = inst - .imm_opnums - .iter() - .chain(inst.value_opnums.iter()) - .max() - .copied() - .unwrap_or(0); - fmtln!(fmt, "let {} = &Vec::from(&args[{}..]);", name, n); - } - } - - for &op_num in &inst.value_opnums { - let arg = &apply.args[op_num]; - if let Some(var_index) = arg.maybe_var() { - let var = var_pool.get(var_index); - if var.has_free_typevar() { - fmtln!( - fmt, - "let typeof_{} = pos.func.dfg.value_type({});", - var.name, - var.name - ); - } - } - } - - // If the definition creates results, detach the values and place them in locals. - let mut replace_inst = false; - if !def.defined_vars.is_empty() { - if def.defined_vars - == def_pool - .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) - .defined_vars - { - // Special case: The instruction replacing node defines the exact same values. - fmt.comment(format!( - "Results handled by {}.", - def_pool - .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) - .to_comment_string(var_pool) - )); - - fmt.line("let r = pos.func.dfg.inst_results(inst);"); - for (i, &var_index) in def.defined_vars.iter().enumerate() { - let var = var_pool.get(var_index); - fmtln!(fmt, "let {} = &r[{}];", var.name, i); - fmtln!( - fmt, - "let typeof_{} = pos.func.dfg.value_type(*{});", - var.name, - var.name - ); - } - - replace_inst = true; - } else { - // Boring case: Detach the result values, capture them in locals. - for &var_index in &def.defined_vars { - fmtln!(fmt, "let {};", var_pool.get(var_index).name); - } - - fmt.line("{"); - fmt.indent(|fmt| { - fmt.line("let r = pos.func.dfg.inst_results(inst);"); - for i in 0..def.defined_vars.len() { - let var = var_pool.get(def.defined_vars[i]); - fmtln!(fmt, "{} = r[{}];", var.name, i); - } - }); - fmt.line("}"); - - for &var_index in &def.defined_vars { - let var = var_pool.get(var_index); - if var.has_free_typevar() { - fmtln!( - fmt, - "let typeof_{} = pos.func.dfg.value_type({});", - var.name, - var.name - ); - } - } - } - } - replace_inst -} - -fn build_derived_expr(tv: &TypeVar) -> String { - let base = match &tv.base { - Some(base) => base, - None => { - assert!(tv.name.starts_with("typeof_")); - return format!("Some({})", tv.name); - } - }; - let base_expr = build_derived_expr(&base.type_var); - format!( - "{}.map(|t: crate::ir::Type| t.{}())", - base_expr, - base.derived_func.name() - ) -} - -/// Emit rust code for the given check. -/// -/// The emitted code is a statement redefining the `predicate` variable like this: -/// let predicate = predicate && ... -fn emit_runtime_typecheck<'a>( - constraint: &'a Constraint, - type_sets: &mut UniqueTable<'a, TypeSet>, - fmt: &mut Formatter, -) { - match constraint { - Constraint::InTypeset(tv, ts) => { - let ts_index = type_sets.add(&ts); - fmt.comment(format!( - "{} must belong to {:?}", - tv.name, - type_sets.get(ts_index) - )); - fmtln!( - fmt, - "let predicate = predicate && TYPE_SETS[{}].contains({});", - ts_index, - tv.name - ); - } - Constraint::Eq(tv1, tv2) => { - fmtln!( - fmt, - "let predicate = predicate && match ({}, {}) {{", - build_derived_expr(tv1), - build_derived_expr(tv2) - ); - fmt.indent(|fmt| { - fmt.line("(Some(a), Some(b)) => a == b,"); - fmt.comment("On overflow, constraint doesn\'t apply"); - fmt.line("_ => false,"); - }); - fmtln!(fmt, "};"); - } - Constraint::WiderOrEq(tv1, tv2) => { - fmtln!( - fmt, - "let predicate = predicate && match ({}, {}) {{", - build_derived_expr(tv1), - build_derived_expr(tv2) - ); - fmt.indent(|fmt| { - fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),"); - fmt.comment("On overflow, constraint doesn\'t apply"); - fmt.line("_ => false,"); - }); - fmtln!(fmt, "};"); - } - } -} - -/// Determine if `node` represents one of the value splitting instructions: `isplit` or `vsplit. -/// These instructions are lowered specially by the `legalize::split` module. -fn is_value_split(def: &Def) -> bool { - let name = &def.apply.inst.name; - name == "isplit" || name == "vsplit" -} - -fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Formatter) { - let defined_vars = { - let vars = def - .defined_vars - .iter() - .map(|&var_index| var_pool.get(var_index).name.as_ref()) - .collect::>(); - if vars.len() == 1 { - vars[0].to_string() - } else { - format!("({})", vars.join(", ")) - } - }; - - if is_value_split(def) { - // Split instructions are not emitted with the builder, but by calling special functions in - // the `legalizer::split` module. These functions will eliminate concat-split patterns. - fmt.line("let curpos = pos.position();"); - fmt.line("let srcloc = pos.srcloc();"); - fmtln!( - fmt, - "let {} = split::{}(pos.func, cfg, curpos, srcloc, {});", - defined_vars, - def.apply.inst.snake_name(), - def.apply.args[0].to_rust_code(var_pool) - ); - return; - } - - if def.defined_vars.is_empty() { - // This node doesn't define any values, so just insert the new instruction. - fmtln!( - fmt, - "pos.ins().{};", - def.apply.rust_builder(&def.defined_vars, var_pool) - ); - return; - } - - if let Some(src_def0) = var_pool.get(def.defined_vars[0]).src_def { - if def.defined_vars == def_pool.get(src_def0).defined_vars { - // The replacement instruction defines the exact same values as the source pattern. - // Unwrapping would have left the results intact. Replace the whole instruction. - fmtln!( - fmt, - "let {} = pos.func.dfg.replace(inst).{};", - defined_vars, - def.apply.rust_builder(&def.defined_vars, var_pool) - ); - - // We need to bump the cursor so following instructions are inserted *after* the - // replaced instruction. - fmt.line("if pos.current_inst() == Some(inst) {"); - fmt.indent(|fmt| { - fmt.line("pos.next_inst();"); - }); - fmt.line("}"); - return; - } - } - - // Insert a new instruction. - let mut builder = format!("let {} = pos.ins()", defined_vars); - - if def.defined_vars.len() == 1 && var_pool.get(def.defined_vars[0]).is_output() { - // Reuse the single source result value. - builder = format!( - "{}.with_result({})", - builder, - var_pool.get(def.defined_vars[0]).to_rust_code() - ); - } else if def - .defined_vars - .iter() - .any(|&var_index| var_pool.get(var_index).is_output()) - { - // There are more than one output values that can be reused. - let array = def - .defined_vars - .iter() - .map(|&var_index| { - let var = var_pool.get(var_index); - if var.is_output() { - format!("Some({})", var.name) - } else { - "None".into() - } - }) - .collect::>() - .join(", "); - builder = format!("{}.with_results([{}])", builder, array); - } - - fmtln!( - fmt, - "{}.{};", - builder, - def.apply.rust_builder(&def.defined_vars, var_pool) - ); -} - -/// Emit code for `transform`, assuming that the opcode of transform's root instruction -/// has already been matched. -/// -/// `inst: Inst` is the variable to be replaced. It is pointed to by `pos: Cursor`. -/// `dfg: DataFlowGraph` is available and mutable. -fn gen_transform<'a>( - replace_inst: bool, - transform: &'a Transform, - type_sets: &mut UniqueTable<'a, TypeSet>, - fmt: &mut Formatter, -) { - // Evaluate the instruction predicate if any. - let apply = &transform.def_pool.get(transform.src).apply; - - let inst_predicate = apply - .inst_predicate_with_ctrl_typevar(&transform.var_pool) - .rust_predicate("pos.func"); - - let has_extra_constraints = !transform.type_env.constraints.is_empty(); - if has_extra_constraints { - // Extra constraints rely on the predicate being a variable that we can rebind as we add - // more constraint predicates. - if let Some(pred) = &inst_predicate { - fmt.multi_line(&format!("let predicate = {};", pred)); - } else { - fmt.line("let predicate = true;"); - } - } - - // Emit any runtime checks; these will rebind `predicate` emitted right above. - for constraint in &transform.type_env.constraints { - emit_runtime_typecheck(constraint, type_sets, fmt); - } - - let do_expand = |fmt: &mut Formatter| { - // Emit any constants that must be created before use. - for (name, value) in transform.const_pool.iter() { - fmtln!( - fmt, - "let {} = pos.func.dfg.constants.insert(vec!{:?}.into());", - name, - value - ); - } - - // If we are adding some blocks, we need to recall the original block, such that we can - // recompute it. - if !transform.block_pool.is_empty() { - fmt.line("let orig_block = pos.current_block().unwrap();"); - } - - // If we're going to delete `inst`, we need to detach its results first so they can be - // reattached during pattern expansion. - if !replace_inst { - fmt.line("pos.func.dfg.clear_results(inst);"); - } - - // Emit new block creation. - for block in &transform.block_pool { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "let {} = pos.func.dfg.make_block();", var.name); - } - - // Emit the destination pattern. - for &def_index in &transform.dst { - if let Some(block) = transform.block_pool.get(def_index) { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "pos.insert_block({});", var.name); - } - emit_dst_inst( - transform.def_pool.get(def_index), - &transform.def_pool, - &transform.var_pool, - fmt, - ); - } - - // Insert a new block after the last instruction, if needed. - let def_next_index = transform.def_pool.next_index(); - if let Some(block) = transform.block_pool.get(def_next_index) { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "pos.insert_block({});", var.name); - } - - // Delete the original instruction if we didn't have an opportunity to replace it. - if !replace_inst { - fmt.line("let removed = pos.remove_inst();"); - fmt.line("debug_assert_eq!(removed, inst);"); - } - - if transform.block_pool.is_empty() { - if transform.def_pool.get(transform.src).apply.inst.is_branch { - // A branch might have been legalized into multiple branches, so we need to recompute - // the cfg. - fmt.line("cfg.recompute_block(pos.func, pos.current_block().unwrap());"); - } - } else { - // Update CFG for the new blocks. - fmt.line("cfg.recompute_block(pos.func, orig_block);"); - for block in &transform.block_pool { - let var = transform.var_pool.get(block.name); - fmtln!(fmt, "cfg.recompute_block(pos.func, {});", var.name); - } - } - - fmt.line("return true;"); - }; - - // Guard the actual expansion by `predicate`. - if has_extra_constraints { - fmt.line("if predicate {"); - fmt.indent(|fmt| { - do_expand(fmt); - }); - fmt.line("}"); - } else if let Some(pred) = &inst_predicate { - fmt.multi_line(&format!("if {} {{", pred)); - fmt.indent(|fmt| { - do_expand(fmt); - }); - fmt.line("}"); - } else { - // Unconditional transform (there was no predicate), just emit it. - do_expand(fmt); - } -} - -fn gen_transform_group<'a>( - group: &'a TransformGroup, - transform_groups: &TransformGroups, - type_sets: &mut UniqueTable<'a, TypeSet>, - fmt: &mut Formatter, -) { - fmt.doc_comment(group.doc); - fmt.line("#[allow(unused_variables,unused_assignments,unused_imports,non_snake_case)]"); - - // Function arguments. - fmtln!(fmt, "pub fn {}(", group.name); - fmt.indent(|fmt| { - fmt.line("inst: crate::ir::Inst,"); - fmt.line("func: &mut crate::ir::Function,"); - fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,"); - fmt.line("isa: &dyn crate::isa::TargetIsa,"); - }); - fmtln!(fmt, ") -> bool {"); - - // Function body. - fmt.indent(|fmt| { - fmt.line("use crate::ir::InstBuilder;"); - fmt.line("use crate::cursor::{Cursor, FuncCursor};"); - fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);"); - fmt.line("pos.use_srcloc(inst);"); - - // Group the transforms by opcode so we can generate a big switch. - // Preserve ordering. - let mut inst_to_transforms = HashMap::new(); - for transform in &group.transforms { - let def_index = transform.src; - let inst = &transform.def_pool.get(def_index).apply.inst; - inst_to_transforms - .entry(inst.camel_name.clone()) - .or_insert_with(Vec::new) - .push(transform); - } - - let mut sorted_inst_names = Vec::from_iter(inst_to_transforms.keys()); - sorted_inst_names.sort(); - - fmt.line("{"); - fmt.indent(|fmt| { - fmt.line("match pos.func.dfg[inst].opcode() {"); - fmt.indent(|fmt| { - for camel_name in sorted_inst_names { - fmtln!(fmt, "ir::Opcode::{} => {{", camel_name); - fmt.indent(|fmt| { - let transforms = inst_to_transforms.get(camel_name).unwrap(); - - // Unwrap the source instruction, create local variables for the input variables. - let replace_inst = unwrap_inst(&transforms[0], fmt); - fmt.empty_line(); - - for (i, transform) in transforms.iter().enumerate() { - if i > 0 { - fmt.empty_line(); - } - gen_transform(replace_inst, transform, type_sets, fmt); - } - }); - fmtln!(fmt, "}"); - fmt.empty_line(); - } - - // Emit the custom transforms. The Rust compiler will complain about any overlap with - // the normal transforms. - let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes); - sorted_custom_legalizes.sort(); - for (inst_camel_name, func_name) in sorted_custom_legalizes { - fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name); - fmt.indent(|fmt| { - fmtln!(fmt, "{}(inst, func, cfg, isa);", func_name); - fmt.line("return true;"); - }); - fmtln!(fmt, "}"); - fmt.empty_line(); - } - - // We'll assume there are uncovered opcodes. - fmt.line("_ => {},"); - }); - fmt.line("}"); - }); - fmt.line("}"); - - // If we fall through, nothing was expanded; call the chain if any. - match &group.chain_with { - Some(group_id) => fmtln!( - fmt, - "{}(inst, func, cfg, isa)", - transform_groups.get(*group_id).rust_name() - ), - None => fmt.line("false"), - }; - }); - fmtln!(fmt, "}"); - fmt.empty_line(); -} - -/// Generate legalization functions for `isa` and add any shared `TransformGroup`s -/// encountered to `shared_groups`. -/// -/// Generate `TYPE_SETS` and `LEGALIZE_ACTIONS` tables. -fn gen_isa( - isa: &TargetIsa, - transform_groups: &TransformGroups, - shared_group_names: &mut HashSet<&'static str>, - fmt: &mut Formatter, -) { - let mut type_sets = UniqueTable::new(); - for group_index in isa.transitive_transform_groups(transform_groups) { - let group = transform_groups.get(group_index); - match group.isa_name { - Some(isa_name) => { - assert!( - isa_name == isa.name, - "ISA-specific legalizations must be used by the same ISA" - ); - gen_transform_group(group, transform_groups, &mut type_sets, fmt); - } - None => { - shared_group_names.insert(group.name); - } - } - } - - gen_typesets_table(&type_sets, fmt); - - let direct_groups = isa.direct_transform_groups(); - fmtln!( - fmt, - "pub static LEGALIZE_ACTIONS: [isa::Legalize; {}] = [", - direct_groups.len() - ); - fmt.indent(|fmt| { - for &group_index in direct_groups { - fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name()); - } - }); - fmtln!(fmt, "];"); -} - -/// Generate the legalizer files. -pub(crate) fn generate( - isas: &[TargetIsa], - transform_groups: &TransformGroups, - extra_legalization_groups: &[&'static str], - filename_prefix: &str, - out_dir: &str, -) -> Result<(), error::Error> { - let mut shared_group_names = HashSet::new(); - - for isa in isas { - let mut fmt = Formatter::new(); - gen_isa(isa, transform_groups, &mut shared_group_names, &mut fmt); - fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?; - } - - // Add extra legalization groups that were explicitly requested. - for group in extra_legalization_groups { - shared_group_names.insert(group); - } - - // Generate shared legalize groups. - let mut fmt = Formatter::new(); - // Generate shared legalize groups. - let mut type_sets = UniqueTable::new(); - let mut sorted_shared_group_names = Vec::from_iter(shared_group_names); - sorted_shared_group_names.sort(); - for group_name in &sorted_shared_group_names { - let group = transform_groups.by_name(group_name); - gen_transform_group(group, transform_groups, &mut type_sets, &mut fmt); - } - gen_typesets_table(&type_sets, &mut fmt); - fmt.update_file(format!("{}r.rs", filename_prefix), out_dir)?; - - Ok(()) -} diff --git a/cranelift/codegen/meta/src/gen_registers.rs b/cranelift/codegen/meta/src/gen_registers.rs deleted file mode 100644 index bd5ac95ae0..0000000000 --- a/cranelift/codegen/meta/src/gen_registers.rs +++ /dev/null @@ -1,148 +0,0 @@ -//! Generate the ISA-specific registers. -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::{RegBank, RegClass}; -use crate::error; -use crate::srcgen::Formatter; -use cranelift_entity::EntityRef; - -fn gen_regbank(fmt: &mut Formatter, reg_bank: &RegBank) { - let names = if !reg_bank.names.is_empty() { - format!(r#""{}""#, reg_bank.names.join(r#"", ""#)) - } else { - "".to_string() - }; - fmtln!(fmt, "RegBank {"); - fmt.indent(|fmt| { - fmtln!(fmt, r#"name: "{}","#, reg_bank.name); - fmtln!(fmt, "first_unit: {},", reg_bank.first_unit); - fmtln!(fmt, "units: {},", reg_bank.units); - fmtln!(fmt, "names: &[{}],", names); - fmtln!(fmt, r#"prefix: "{}","#, reg_bank.prefix); - fmtln!(fmt, "first_toprc: {},", reg_bank.toprcs[0].index()); - fmtln!(fmt, "num_toprcs: {},", reg_bank.toprcs.len()); - fmtln!( - fmt, - "pressure_tracking: {},", - if reg_bank.pressure_tracking { - "true" - } else { - "false" - } - ); - }); - fmtln!(fmt, "},"); -} - -fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) { - let reg_bank = isa.regs.banks.get(reg_class.bank).unwrap(); - - let mask: Vec = reg_class - .mask(reg_bank.first_unit) - .iter() - .map(|x| format!("0x{:08x}", x)) - .collect(); - let mask = mask.join(", "); - - fmtln!( - fmt, - "pub static {}_DATA: RegClassData = RegClassData {{", - reg_class.name - ); - fmt.indent(|fmt| { - fmtln!(fmt, r#"name: "{}","#, reg_class.name); - fmtln!(fmt, "index: {},", reg_class.index.index()); - fmtln!(fmt, "width: {},", reg_class.width); - fmtln!(fmt, "bank: {},", reg_class.bank.index()); - fmtln!(fmt, "toprc: {},", reg_class.toprc.index()); - fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start); - fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask()); - fmtln!(fmt, "mask: [{}],", mask); - fmtln!( - fmt, - "pinned_reg: {:?},", - reg_bank - .pinned_reg - .map(|index| index + reg_bank.first_unit as u16 + reg_class.start as u16) - ); - fmtln!(fmt, "info: &INFO,"); - }); - fmtln!(fmt, "};"); - - fmtln!(fmt, "#[allow(dead_code)]"); - fmtln!( - fmt, - "pub static {}: RegClass = &{}_DATA;", - reg_class.name, - reg_class.name - ); -} - -fn gen_regbank_units(reg_bank: &RegBank, fmt: &mut Formatter) { - for unit in 0..reg_bank.units { - let v = unit + reg_bank.first_unit; - if (unit as usize) < reg_bank.names.len() { - fmtln!(fmt, "{} = {},", reg_bank.names[unit as usize], v); - continue; - } - fmtln!(fmt, "{}{} = {},", reg_bank.prefix, unit, v); - } -} - -fn gen_isa(isa: &TargetIsa, fmt: &mut Formatter) { - // Emit RegInfo. - fmtln!(fmt, "pub static INFO: RegInfo = RegInfo {"); - - fmt.indent(|fmt| { - fmtln!(fmt, "banks: &["); - // Bank descriptors. - fmt.indent(|fmt| { - for reg_bank in isa.regs.banks.values() { - gen_regbank(fmt, ®_bank); - } - }); - fmtln!(fmt, "],"); - // References to register classes. - fmtln!(fmt, "classes: &["); - fmt.indent(|fmt| { - for reg_class in isa.regs.classes.values() { - fmtln!(fmt, "&{}_DATA,", reg_class.name); - } - }); - fmtln!(fmt, "],"); - }); - fmtln!(fmt, "};"); - - // Register class descriptors. - for rc in isa.regs.classes.values() { - gen_regclass(&isa, rc, fmt); - } - - // Emit constants for all the register units. - fmtln!(fmt, "#[allow(dead_code, non_camel_case_types)]"); - fmtln!(fmt, "#[derive(Clone, Copy)]"); - fmtln!(fmt, "pub enum RU {"); - fmt.indent(|fmt| { - for reg_bank in isa.regs.banks.values() { - gen_regbank_units(reg_bank, fmt); - } - }); - fmtln!(fmt, "}"); - - // Emit Into conversion for the RU class. - fmtln!(fmt, "impl Into for RU {"); - fmt.indent(|fmt| { - fmtln!(fmt, "fn into(self) -> RegUnit {"); - fmt.indent(|fmt| { - fmtln!(fmt, "self as RegUnit"); - }); - fmtln!(fmt, "}"); - }); - fmtln!(fmt, "}"); -} - -pub(crate) fn generate(isa: &TargetIsa, filename: &str, out_dir: &str) -> Result<(), error::Error> { - let mut fmt = Formatter::new(); - gen_isa(&isa, &mut fmt); - fmt.update_file(filename, out_dir)?; - Ok(()) -} diff --git a/cranelift/codegen/meta/src/gen_types.rs b/cranelift/codegen/meta/src/gen_types.rs index 6ced212b8d..f55848751c 100644 --- a/cranelift/codegen/meta/src/gen_types.rs +++ b/cranelift/codegen/meta/src/gen_types.rs @@ -12,23 +12,16 @@ use crate::error; use crate::srcgen; /// Emit a constant definition of a single value type. -fn emit_type(ty: &cdsl_types::ValueType, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { +fn emit_type(ty: &cdsl_types::ValueType, fmt: &mut srcgen::Formatter) { let name = ty.to_string().to_uppercase(); - let number = ty.number().ok_or_else(|| { - error::Error::with_msg(format!( - "Could not emit type `{}` which has no number.", - name - )) - })?; + let number = ty.number(); fmt.doc_comment(&ty.doc()); fmtln!(fmt, "pub const {}: Type = Type({:#x});\n", name, number); - - Ok(()) } /// Emit definition for all vector types with `bits` total size. -fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { +fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) { let vec_size: u64 = bits / 8; for vec in cdsl_types::ValueType::all_lane_types() .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes())) @@ -36,41 +29,37 @@ fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) -> Result<(), error::Err .map(|(ty, lane_size)| (ty, vec_size / lane_size)) .map(|(ty, lanes)| cdsl_types::VectorType::new(ty, lanes)) { - emit_type(&cdsl_types::ValueType::from(vec), fmt)?; + emit_type(&cdsl_types::ValueType::from(vec), fmt); } - - Ok(()) } /// Emit types using the given formatter object. -fn emit_types(fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { +fn emit_types(fmt: &mut srcgen::Formatter) { // Emit all of the special types, such as types for CPU flags. for spec in cdsl_types::ValueType::all_special_types().map(cdsl_types::ValueType::from) { - emit_type(&spec, fmt)?; + emit_type(&spec, fmt); } // Emit all of the lane types, such integers, floats, and booleans. for ty in cdsl_types::ValueType::all_lane_types().map(cdsl_types::ValueType::from) { - emit_type(&ty, fmt)?; + emit_type(&ty, fmt); } // Emit all reference types. for ty in cdsl_types::ValueType::all_reference_types().map(cdsl_types::ValueType::from) { - emit_type(&ty, fmt)?; + emit_type(&ty, fmt); } // Emit vector definitions for common SIMD sizes. for vec_size in &[64_u64, 128, 256, 512] { - emit_vectors(*vec_size, fmt)?; + emit_vectors(*vec_size, fmt); } - - Ok(()) } /// Generate the types file. pub(crate) fn generate(filename: &str, out_dir: &str) -> Result<(), error::Error> { let mut fmt = srcgen::Formatter::new(); - emit_types(&mut fmt)?; + emit_types(&mut fmt); fmt.update_file(filename, out_dir)?; Ok(()) } diff --git a/cranelift/codegen/meta/src/isa/arm32.rs b/cranelift/codegen/meta/src/isa/arm32.rs new file mode 100644 index 0000000000..1c3b4d1fe0 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/arm32.rs @@ -0,0 +1,15 @@ +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let setting = SettingGroupBuilder::new("arm32"); + setting.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + + TargetIsa::new("arm32", settings) +} diff --git a/cranelift/codegen/meta/src/isa/arm32/mod.rs b/cranelift/codegen/meta/src/isa/arm32/mod.rs deleted file mode 100644 index 2dc58e4053..0000000000 --- a/cranelift/codegen/meta/src/isa/arm32/mod.rs +++ /dev/null @@ -1,71 +0,0 @@ -use crate::cdsl::instructions::InstructionPredicateMap; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; -use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; - -use crate::shared::Definitions as SharedDefinitions; - -fn define_settings(_shared: &SettingGroup) -> SettingGroup { - let setting = SettingGroupBuilder::new("arm32"); - setting.build() -} - -fn define_regs() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("FloatRegs", "s") - .units(64) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("IntRegs", "r") - .units(16) - .track_pressure(true); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["nzcv"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("S", float_regs).count(32); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("D", float_regs).width(2); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("Q", float_regs).width(4); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - regs.build() -} - -pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { - let settings = define_settings(&shared_defs.settings); - let regs = define_regs(); - - let cpu_modes = vec![]; - - // TODO implement arm32 recipes. - let recipes = Recipes::new(); - - // TODO implement arm32 encodings and predicates. - let encodings_predicates = InstructionPredicateMap::new(); - - TargetIsa::new( - "arm32", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) -} diff --git a/cranelift/codegen/meta/src/isa/arm64.rs b/cranelift/codegen/meta/src/isa/arm64.rs new file mode 100644 index 0000000000..5fd7b69309 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/arm64.rs @@ -0,0 +1,18 @@ +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let mut setting = SettingGroupBuilder::new("arm64"); + let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false); + + setting.add_predicate("use_lse", predicate!(has_lse)); + setting.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + + TargetIsa::new("arm64", settings) +} diff --git a/cranelift/codegen/meta/src/isa/arm64/mod.rs b/cranelift/codegen/meta/src/isa/arm64/mod.rs deleted file mode 100644 index 3ae57fbb62..0000000000 --- a/cranelift/codegen/meta/src/isa/arm64/mod.rs +++ /dev/null @@ -1,70 +0,0 @@ -use crate::cdsl::instructions::InstructionPredicateMap; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; -use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; - -use crate::shared::Definitions as SharedDefinitions; - -fn define_settings(_shared: &SettingGroup) -> SettingGroup { - let mut setting = SettingGroupBuilder::new("arm64"); - let has_lse = setting.add_bool("has_lse", "Has Large System Extensions support.", "", false); - - setting.add_predicate("use_lse", predicate!(has_lse)); - setting.build() -} - -fn define_registers() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - // The `x31` regunit serves as the stack pointer / zero register depending on context. We - // reserve it and don't model the difference. - let builder = RegBankBuilder::new("IntRegs", "x") - .units(32) - .track_pressure(true); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FloatRegs", "v") - .units(32) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["nzcv"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - regs.build() -} - -pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { - let settings = define_settings(&shared_defs.settings); - let regs = define_registers(); - - let cpu_modes = vec![]; - - // TODO implement arm64 recipes. - let recipes = Recipes::new(); - - // TODO implement arm64 encodings and predicates. - let encodings_predicates = InstructionPredicateMap::new(); - - TargetIsa::new( - "arm64", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) -} diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index 34032842c2..9465e63b26 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -5,14 +5,12 @@ use std::fmt; mod arm32; mod arm64; -mod riscv; mod s390x; pub(crate) mod x86; /// Represents known ISA target. #[derive(PartialEq, Copy, Clone)] pub enum Isa { - Riscv, X86, Arm32, Arm64, @@ -31,7 +29,6 @@ impl Isa { /// Creates isa target from arch. pub fn from_arch(arch: &str) -> Option { match arch { - "riscv" => Some(Isa::Riscv), "aarch64" => Some(Isa::Arm64), "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), @@ -42,7 +39,7 @@ impl Isa { /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x] + &[Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x] } } @@ -50,7 +47,6 @@ impl fmt::Display for Isa { // These names should be kept in sync with the crate features. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { - Isa::Riscv => write!(f, "riscv"), Isa::X86 => write!(f, "x86"), Isa::Arm32 => write!(f, "arm32"), Isa::Arm64 => write!(f, "arm64"), @@ -62,7 +58,6 @@ impl fmt::Display for Isa { pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec { isas.iter() .map(|isa| match isa { - Isa::Riscv => riscv::define(shared_defs), Isa::X86 => x86::define(shared_defs), Isa::Arm32 => arm32::define(shared_defs), Isa::Arm64 => arm64::define(shared_defs), diff --git a/cranelift/codegen/meta/src/isa/riscv/encodings.rs b/cranelift/codegen/meta/src/isa/riscv/encodings.rs deleted file mode 100644 index c255ddb483..0000000000 --- a/cranelift/codegen/meta/src/isa/riscv/encodings.rs +++ /dev/null @@ -1,431 +0,0 @@ -use crate::cdsl::ast::{Apply, Expr, Literal, VarPool}; -use crate::cdsl::encodings::{Encoding, EncodingBuilder}; -use crate::cdsl::instructions::{ - Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, -}; -use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::SettingGroup; - -use crate::shared::types::Bool::B1; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; -use crate::shared::Definitions as SharedDefinitions; - -use super::recipes::RecipeGroup; - -pub(crate) struct PerCpuModeEncodings<'defs> { - pub inst_pred_reg: InstructionPredicateRegistry, - pub enc32: Vec, - pub enc64: Vec, - recipes: &'defs Recipes, -} - -impl<'defs> PerCpuModeEncodings<'defs> { - fn new(recipes: &'defs Recipes) -> Self { - Self { - inst_pred_reg: InstructionPredicateRegistry::new(), - enc32: Vec::new(), - enc64: Vec::new(), - recipes, - } - } - fn enc( - &self, - inst: impl Into, - recipe: EncodingRecipeNumber, - bits: u16, - ) -> EncodingBuilder { - EncodingBuilder::new(inst.into(), recipe, bits) - } - fn add32(&mut self, encoding: EncodingBuilder) { - self.enc32 - .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); - } - fn add64(&mut self, encoding: EncodingBuilder) { - self.enc64 - .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); - } -} - -// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as -// the two low bits, with bits 6:2 determining the base opcode. -// -// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ... -// The functions below encode the encbits. - -fn load_bits(funct3: u16) -> u16 { - assert!(funct3 <= 0b111); - funct3 << 5 -} - -fn store_bits(funct3: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b01000 | (funct3 << 5) -} - -fn branch_bits(funct3: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b11000 | (funct3 << 5) -} - -fn jalr_bits() -> u16 { - // This was previously accepting an argument funct3 of 3 bits and used the following formula: - //0b11001 | (funct3 << 5) - 0b11001 -} - -fn jal_bits() -> u16 { - 0b11011 -} - -fn opimm_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b00100 | (funct3 << 5) | (funct7 << 8) -} - -fn opimm32_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - 0b00110 | (funct3 << 5) | (funct7 << 8) -} - -fn op_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - assert!(funct7 <= 0b111_1111); - 0b01100 | (funct3 << 5) | (funct7 << 8) -} - -fn op32_bits(funct3: u16, funct7: u16) -> u16 { - assert!(funct3 <= 0b111); - assert!(funct7 <= 0b111_1111); - 0b01110 | (funct3 << 5) | (funct7 << 8) -} - -fn lui_bits() -> u16 { - 0b01101 -} - -pub(crate) fn define<'defs>( - shared_defs: &'defs SharedDefinitions, - isa_settings: &SettingGroup, - recipes: &'defs RecipeGroup, -) -> PerCpuModeEncodings<'defs> { - // Instructions shorthands. - let shared = &shared_defs.instructions; - - let band = shared.by_name("band"); - let band_imm = shared.by_name("band_imm"); - let bor = shared.by_name("bor"); - let bor_imm = shared.by_name("bor_imm"); - let br_icmp = shared.by_name("br_icmp"); - let brz = shared.by_name("brz"); - let brnz = shared.by_name("brnz"); - let bxor = shared.by_name("bxor"); - let bxor_imm = shared.by_name("bxor_imm"); - let call = shared.by_name("call"); - let call_indirect = shared.by_name("call_indirect"); - let copy = shared.by_name("copy"); - let copy_nop = shared.by_name("copy_nop"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let iadd = shared.by_name("iadd"); - let iadd_imm = shared.by_name("iadd_imm"); - let iconst = shared.by_name("iconst"); - let icmp = shared.by_name("icmp"); - let icmp_imm = shared.by_name("icmp_imm"); - let imul = shared.by_name("imul"); - let ishl = shared.by_name("ishl"); - let ishl_imm = shared.by_name("ishl_imm"); - let isub = shared.by_name("isub"); - let jump = shared.by_name("jump"); - let regmove = shared.by_name("regmove"); - let spill = shared.by_name("spill"); - let sshr = shared.by_name("sshr"); - let sshr_imm = shared.by_name("sshr_imm"); - let ushr = shared.by_name("ushr"); - let ushr_imm = shared.by_name("ushr_imm"); - let return_ = shared.by_name("return"); - - // Recipes shorthands, prefixed with r_. - let r_copytossa = recipes.by_name("copytossa"); - let r_fillnull = recipes.by_name("fillnull"); - let r_icall = recipes.by_name("Icall"); - let r_icopy = recipes.by_name("Icopy"); - let r_ii = recipes.by_name("Ii"); - let r_iicmp = recipes.by_name("Iicmp"); - let r_iret = recipes.by_name("Iret"); - let r_irmov = recipes.by_name("Irmov"); - let r_iz = recipes.by_name("Iz"); - let r_gp_sp = recipes.by_name("GPsp"); - let r_gp_fi = recipes.by_name("GPfi"); - let r_r = recipes.by_name("R"); - let r_ricmp = recipes.by_name("Ricmp"); - let r_rshamt = recipes.by_name("Rshamt"); - let r_sb = recipes.by_name("SB"); - let r_sb_zero = recipes.by_name("SBzero"); - let r_stacknull = recipes.by_name("stacknull"); - let r_u = recipes.by_name("U"); - let r_uj = recipes.by_name("UJ"); - let r_uj_call = recipes.by_name("UJcall"); - - // Predicates shorthands. - let use_m = isa_settings.predicate_by_name("use_m"); - - // Definitions. - let mut e = PerCpuModeEncodings::new(&recipes.recipes); - - // Basic arithmetic binary instructions are encoded in an R-type instruction. - for &(inst, inst_imm, f3, f7) in &[ - (iadd, Some(iadd_imm), 0b000, 0b000_0000), - (isub, None, 0b000, 0b010_0000), - (bxor, Some(bxor_imm), 0b100, 0b000_0000), - (bor, Some(bor_imm), 0b110, 0b000_0000), - (band, Some(band_imm), 0b111, 0b000_0000), - ] { - e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7))); - - // Immediate versions for add/xor/or/and. - if let Some(inst_imm) = inst_imm { - e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); - e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); - } - } - - // 32-bit ops in RV64. - e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000))); - e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000))); - // There are no andiw/oriw/xoriw variations. - e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); - - // Use iadd_imm with %x0 to materialize constants. - e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); - e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); - e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); - - // Dynamic shifts have the same masking semantics as the clif base instructions. - for &(inst, inst_imm, f3, f7) in &[ - (ishl, ishl_imm, 0b1, 0b0), - (ushr, ushr_imm, 0b101, 0b0), - (sshr, sshr_imm, 0b101, 0b10_0000), - ] { - e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); - // Allow i32 shift amounts in 64-bit shifts. - e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); - e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); - - // Immediate shifts. - e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); - e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); - e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); - } - - // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit - // numbers in RV64. - { - let mut var_pool = VarPool::new(); - - // Helper that creates an instruction predicate for an instruction in the icmp family. - let mut icmp_instp = |bound_inst: &BoundInstruction, - intcc_field: &'static str| - -> InstructionPredicateNode { - let x = var_pool.create("x"); - let y = var_pool.create("y"); - let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); - Apply::new( - bound_inst.clone().into(), - vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)], - ) - .inst_predicate(&var_pool) - .unwrap() - }; - - let icmp_i32 = icmp.bind(I32); - let icmp_i64 = icmp.bind(I64); - e.add32( - e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i32, "slt")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i64, "slt")), - ); - - e.add32( - e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i32, "ult")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) - .inst_predicate(icmp_instp(&icmp_i64, "ult")), - ); - - // Immediate variants. - let icmp_i32 = icmp_imm.bind(I32); - let icmp_i64 = icmp_imm.bind(I64); - e.add32( - e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) - .inst_predicate(icmp_instp(&icmp_i32, "slt")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) - .inst_predicate(icmp_instp(&icmp_i64, "slt")), - ); - - e.add32( - e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) - .inst_predicate(icmp_instp(&icmp_i32, "ult")), - ); - e.add64( - e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) - .inst_predicate(icmp_instp(&icmp_i64, "ult")), - ); - } - - // Integer constants with the low 12 bits clear are materialized by lui. - e.add32(e.enc(iconst.bind(I32), r_u, lui_bits())); - e.add64(e.enc(iconst.bind(I32), r_u, lui_bits())); - e.add64(e.enc(iconst.bind(I64), r_u, lui_bits())); - - // "M" Standard Extension for Integer Multiplication and Division. - // Gated by the `use_m` flag. - e.add32( - e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001)) - .isa_predicate(use_m), - ); - e.add64( - e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001)) - .isa_predicate(use_m), - ); - e.add64( - e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001)) - .isa_predicate(use_m), - ); - - // Control flow. - - // Unconditional branches. - e.add32(e.enc(jump, r_uj, jal_bits())); - e.add64(e.enc(jump, r_uj, jal_bits())); - e.add32(e.enc(call, r_uj_call, jal_bits())); - e.add64(e.enc(call, r_uj_call, jal_bits())); - - // Conditional branches. - { - let mut var_pool = VarPool::new(); - - // Helper that creates an instruction predicate for an instruction in the icmp family. - let mut br_icmp_instp = |bound_inst: &BoundInstruction, - intcc_field: &'static str| - -> InstructionPredicateNode { - let x = var_pool.create("x"); - let y = var_pool.create("y"); - let dest = var_pool.create("dest"); - let args = var_pool.create("args"); - let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); - Apply::new( - bound_inst.clone().into(), - vec![ - Expr::Literal(cc), - Expr::Var(x), - Expr::Var(y), - Expr::Var(dest), - Expr::Var(args), - ], - ) - .inst_predicate(&var_pool) - .unwrap() - }; - - let br_icmp_i32 = br_icmp.bind(I32); - let br_icmp_i64 = br_icmp.bind(I64); - for &(cond, f3) in &[ - ("eq", 0b000), - ("ne", 0b001), - ("slt", 0b100), - ("sge", 0b101), - ("ult", 0b110), - ("uge", 0b111), - ] { - e.add32( - e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) - .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)), - ); - e.add64( - e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) - .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)), - ); - } - } - - for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] { - e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); - e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); - e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); - e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); - } - - // Returns are a special case of jalr_bits using %x1 to hold the return address. - // The return address is provided by a special-purpose `link` return value that - // is added by legalize_signature(). - e.add32(e.enc(return_, r_iret, jalr_bits())); - e.add64(e.enc(return_, r_iret, jalr_bits())); - e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits())); - e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits())); - - // Spill and fill. - e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); - e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); - e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); - e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); - e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); - e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); - - // No-op fills, created by late-stage redundant-fill removal. - for &ty in &[I64, I32] { - e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0)); - e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0)); - } - e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0)); - e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0)); - - // Register copies. - e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); - e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); - e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); - - e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); - e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); - e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); - - e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); - e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); - e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); - e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); - - // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn - // into a no-op. - // The same encoding is generated for both the 64- and 32-bit architectures. - for &ty in &[I64, I32, I16, I8] { - e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - } - for &ty in &[F64, F32] { - e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); - } - - // Copy-to-SSA - e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0))); - e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); - e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0))); - e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0))); - - e -} diff --git a/cranelift/codegen/meta/src/isa/riscv/mod.rs b/cranelift/codegen/meta/src/isa/riscv/mod.rs deleted file mode 100644 index 868ac17cfe..0000000000 --- a/cranelift/codegen/meta/src/isa/riscv/mod.rs +++ /dev/null @@ -1,136 +0,0 @@ -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; -use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; - -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I32, I64}; -use crate::shared::Definitions as SharedDefinitions; - -mod encodings; -mod recipes; - -fn define_settings(shared: &SettingGroup) -> SettingGroup { - let mut setting = SettingGroupBuilder::new("riscv"); - - let supports_m = setting.add_bool( - "supports_m", - "CPU supports the 'M' extension (mul/div)", - "", - false, - ); - let supports_a = setting.add_bool( - "supports_a", - "CPU supports the 'A' extension (atomics)", - "", - false, - ); - let supports_f = setting.add_bool( - "supports_f", - "CPU supports the 'F' extension (float)", - "", - false, - ); - let supports_d = setting.add_bool( - "supports_d", - "CPU supports the 'D' extension (double)", - "", - false, - ); - - let enable_m = setting.add_bool( - "enable_m", - "Enable the use of 'M' instructions if available", - "", - true, - ); - - setting.add_bool( - "enable_e", - "Enable the 'RV32E' instruction set with only 16 registers", - "", - false, - ); - - let shared_enable_atomics = shared.get_bool("enable_atomics"); - let shared_enable_float = shared.get_bool("enable_float"); - let shared_enable_simd = shared.get_bool("enable_simd"); - - setting.add_predicate("use_m", predicate!(supports_m && enable_m)); - setting.add_predicate("use_a", predicate!(supports_a && shared_enable_atomics)); - setting.add_predicate("use_f", predicate!(supports_f && shared_enable_float)); - setting.add_predicate("use_d", predicate!(supports_d && shared_enable_float)); - setting.add_predicate( - "full_float", - predicate!(shared_enable_simd && supports_f && supports_d), - ); - - setting.build() -} - -fn define_registers() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("IntRegs", "x") - .units(32) - .track_pressure(true); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FloatRegs", "f") - .units(32) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - regs.add_class(builder); - - regs.build() -} - -pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { - let settings = define_settings(&shared_defs.settings); - let regs = define_registers(); - - // CPU modes for 32-bit and 64-bit operation. - let mut rv_32 = CpuMode::new("RV32"); - let mut rv_64 = CpuMode::new("RV64"); - - let expand = shared_defs.transform_groups.by_name("expand"); - let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags"); - - rv_32.legalize_monomorphic(expand); - rv_32.legalize_default(narrow_no_flags); - rv_32.legalize_type(I32, expand); - rv_32.legalize_type(F32, expand); - rv_32.legalize_type(F64, expand); - - rv_64.legalize_monomorphic(expand); - rv_64.legalize_default(narrow_no_flags); - rv_64.legalize_type(I32, expand); - rv_64.legalize_type(I64, expand); - rv_64.legalize_type(F32, expand); - rv_64.legalize_type(F64, expand); - - let recipes = recipes::define(shared_defs, ®s); - - let encodings = encodings::define(shared_defs, &settings, &recipes); - rv_32.set_encodings(encodings.enc32); - rv_64.set_encodings(encodings.enc64); - let encodings_predicates = encodings.inst_pred_reg.extract(); - - let recipes = recipes.collect(); - - let cpu_modes = vec![rv_32, rv_64]; - - TargetIsa::new( - "riscv", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) -} diff --git a/cranelift/codegen/meta/src/isa/riscv/recipes.rs b/cranelift/codegen/meta/src/isa/riscv/recipes.rs deleted file mode 100644 index dc879dcecb..0000000000 --- a/cranelift/codegen/meta/src/isa/riscv/recipes.rs +++ /dev/null @@ -1,280 +0,0 @@ -use std::collections::HashMap; - -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack}; -use crate::cdsl::regs::IsaRegs; -use crate::shared::Definitions as SharedDefinitions; - -/// An helper to create recipes and use them when defining the RISCV encodings. -pub(crate) struct RecipeGroup { - /// The actualy list of recipes explicitly created in this file. - pub recipes: Recipes, - - /// Provides fast lookup from a name to an encoding recipe. - name_to_recipe: HashMap, -} - -impl RecipeGroup { - fn new() -> Self { - Self { - recipes: Recipes::new(), - name_to_recipe: HashMap::new(), - } - } - - fn push(&mut self, builder: EncodingRecipeBuilder) { - assert!( - self.name_to_recipe.get(&builder.name).is_none(), - "riscv recipe '{}' created twice", - builder.name - ); - let name = builder.name.clone(); - let number = self.recipes.push(builder.build()); - self.name_to_recipe.insert(name, number); - } - - pub fn by_name(&self, name: &str) -> EncodingRecipeNumber { - *self - .name_to_recipe - .get(name) - .unwrap_or_else(|| panic!("unknown riscv recipe name {}", name)) - } - - pub fn collect(self) -> Recipes { - self.recipes - } -} - -pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeGroup { - let formats = &shared_defs.formats; - - // Register classes shorthands. - let gpr = regs.class_by_name("GPR"); - - // Definitions. - let mut recipes = RecipeGroup::new(); - - // R-type 32-bit instructions: These are mostly binary arithmetic instructions. - // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8) - recipes.push( - EncodingRecipeBuilder::new("R", &formats.binary, 4) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), - ); - - // R-type with an immediate shift amount instead of rs2. - recipes.push( - EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"), - ); - - // R-type encoding of an integer comparison. - recipes.push( - EncodingRecipeBuilder::new("Ricmp", &formats.int_compare, 4) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), - ); - - recipes.push( - EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 12, - 0, - )) - .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), - ); - - // I-type instruction with a hardcoded %x0 rs1. - recipes.push( - EncodingRecipeBuilder::new("Iz", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &formats.unary_imm, - "imm", - 12, - 0, - )) - .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"), - ); - - // I-type encoding of an integer comparison. - recipes.push( - EncodingRecipeBuilder::new("Iicmp", &formats.int_compare_imm, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &formats.int_compare_imm, - "imm", - 12, - 0, - )) - .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), - ); - - // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset. The - // variable return values are not encoded. - recipes.push( - EncodingRecipeBuilder::new("Iret", &formats.multiary, 4).emit( - r#" - // Return instructions are always a jalr to %x1. - // The return address is provided as a special-purpose link argument. - put_i( - bits, - 1, // rs1 = %x1 - 0, // no offset. - 0, // rd = %x0: no address written. - sink, - ); - "#, - ), - ); - - // I-type encoding for `jalr` as a call_indirect. - recipes.push( - EncodingRecipeBuilder::new("Icall", &formats.call_indirect, 4) - .operands_in(vec![gpr]) - .emit( - r#" - // call_indirect instructions are jalr with rd=%x1. - put_i( - bits, - in_reg0, - 0, // no offset. - 1, // rd = %x1: link register. - sink, - ); - "#, - ), - ); - - // Copy of a GPR is implemented as addi x, 0. - recipes.push( - EncodingRecipeBuilder::new("Icopy", &formats.unary, 4) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"), - ); - - // Same for a GPR regmove. - recipes.push( - EncodingRecipeBuilder::new("Irmov", &formats.reg_move, 4) - .operands_in(vec![gpr]) - .emit("put_i(bits, src, 0, dst, sink);"), - ); - - // Same for copy-to-SSA -- GPR regmove. - recipes.push( - EncodingRecipeBuilder::new("copytossa", &formats.copy_to_ssa, 4) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![gpr]) - .emit("put_i(bits, src, 0, out_reg0, sink);"), - ); - - // U-type instructions have a 20-bit immediate that targets bits 12-31. - recipes.push( - EncodingRecipeBuilder::new("U", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &formats.unary_imm, - "imm", - 32, - 12, - )) - .emit("put_u(bits, imm.into(), out_reg0, sink);"), - ); - - // UJ-type unconditional branch instructions. - recipes.push( - EncodingRecipeBuilder::new("UJ", &formats.jump, 4) - .branch_range((0, 21)) - .emit( - r#" - let dest = i64::from(func.offsets[destination]); - let disp = dest - i64::from(sink.offset()); - put_uj(bits, disp, 0, sink); - "#, - ), - ); - - recipes.push(EncodingRecipeBuilder::new("UJcall", &formats.call, 4).emit( - r#" - sink.reloc_external(func.srclocs[inst], - Reloc::RiscvCall, - &func.dfg.ext_funcs[func_ref].name, - 0); - // rd=%x1 is the standard link register. - put_uj(bits, 0, 1, sink); - "#, - )); - - // SB-type branch instructions. - recipes.push( - EncodingRecipeBuilder::new("SB", &formats.branch_icmp, 4) - .operands_in(vec![gpr, gpr]) - .branch_range((0, 13)) - .emit( - r#" - let dest = i64::from(func.offsets[destination]); - let disp = dest - i64::from(sink.offset()); - put_sb(bits, disp, in_reg0, in_reg1, sink); - "#, - ), - ); - - // SB-type branch instruction with rs2 fixed to zero. - recipes.push( - EncodingRecipeBuilder::new("SBzero", &formats.branch, 4) - .operands_in(vec![gpr]) - .branch_range((0, 13)) - .emit( - r#" - let dest = i64::from(func.offsets[destination]); - let disp = dest - i64::from(sink.offset()); - put_sb(bits, disp, in_reg0, 0, sink); - "#, - ), - ); - - // Spill of a GPR. - recipes.push( - EncodingRecipeBuilder::new("GPsp", &formats.unary, 4) - .operands_in(vec![gpr]) - .operands_out(vec![Stack::new(gpr)]) - .emit("unimplemented!();"), - ); - - // Fill of a GPR. - recipes.push( - EncodingRecipeBuilder::new("GPfi", &formats.unary, 4) - .operands_in(vec![Stack::new(gpr)]) - .operands_out(vec![gpr]) - .emit("unimplemented!();"), - ); - - // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op. - recipes.push( - EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) - .operands_in(vec![Stack::new(gpr)]) - .operands_out(vec![Stack::new(gpr)]) - .emit(""), - ); - - // No-op fills, created by late-stage redundant-fill removal. - recipes.push( - EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) - .operands_in(vec![Stack::new(gpr)]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit(""), - ); - - recipes -} diff --git a/cranelift/codegen/meta/src/isa/s390x/mod.rs b/cranelift/codegen/meta/src/isa/s390x.rs similarity index 73% rename from cranelift/codegen/meta/src/isa/s390x/mod.rs rename to cranelift/codegen/meta/src/isa/s390x.rs index 97a5947080..1e36e462c6 100644 --- a/cranelift/codegen/meta/src/isa/s390x/mod.rs +++ b/cranelift/codegen/meta/src/isa/s390x.rs @@ -1,7 +1,4 @@ -use crate::cdsl::instructions::InstructionPredicateMap; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; -use crate::cdsl::regs::IsaRegsBuilder; use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; use crate::shared::Definitions as SharedDefinitions; @@ -45,18 +42,6 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = define_settings(&shared_defs.settings); - let regs = IsaRegsBuilder::new().build(); - let recipes = Recipes::new(); - let encodings_predicates = InstructionPredicateMap::new(); - let cpu_modes = vec![]; - - TargetIsa::new( - "s390x", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) + TargetIsa::new("s390x", settings) } diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86.rs similarity index 95% rename from cranelift/codegen/meta/src/isa/x86/settings.rs rename to cranelift/codegen/meta/src/isa/x86.rs index 824683bbf6..eec6ac105f 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86.rs @@ -1,6 +1,15 @@ +use crate::cdsl::isa::TargetIsa; use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; -pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { +use crate::shared::Definitions as SharedDefinitions; + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + + TargetIsa::new("x86", settings) +} + +fn define_settings(shared: &SettingGroup) -> SettingGroup { let mut settings = SettingGroupBuilder::new("x86"); // CPUID.01H:ECX diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs deleted file mode 100644 index 2f222defb5..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ /dev/null @@ -1,2731 +0,0 @@ -#![allow(non_snake_case)] - -use cranelift_codegen_shared::condcodes::IntCC; -use std::collections::HashMap; - -use crate::cdsl::encodings::{Encoding, EncodingBuilder}; -use crate::cdsl::instructions::{ - vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate, - InstructionPredicateNode, InstructionPredicateRegistry, -}; -use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; -use crate::shared::Definitions as SharedDefinitions; - -use crate::isa::x86::opcodes::*; - -use super::recipes::{RecipeGroup, Template}; -use crate::cdsl::instructions::BindParameter::Any; - -pub(crate) struct PerCpuModeEncodings { - pub enc32: Vec, - pub enc64: Vec, - pub recipes: Recipes, - recipes_by_name: HashMap, - pub inst_pred_reg: InstructionPredicateRegistry, -} - -impl PerCpuModeEncodings { - fn new() -> Self { - Self { - enc32: Vec::new(), - enc64: Vec::new(), - recipes: Recipes::new(), - recipes_by_name: HashMap::new(), - inst_pred_reg: InstructionPredicateRegistry::new(), - } - } - - fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber { - if let Some(found_index) = self.recipes_by_name.get(&recipe.name) { - assert!( - self.recipes[*found_index] == recipe, - "trying to insert different recipes with a same name ({})", - recipe.name - ); - *found_index - } else { - let recipe_name = recipe.name.clone(); - let index = self.recipes.push(recipe); - self.recipes_by_name.insert(recipe_name, index); - index - } - } - - fn make_encoding( - &mut self, - inst: InstSpec, - template: Template, - builder_closure: T, - ) -> Encoding - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let (recipe, bits) = template.build(); - let recipe_number = self.add_recipe(recipe); - let builder = EncodingBuilder::new(inst, recipe_number, bits); - builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) - } - - fn enc32_func(&mut self, inst: impl Into, template: Template, builder_closure: T) - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc32.push(encoding); - } - fn enc32(&mut self, inst: impl Into, template: Template) { - self.enc32_func(inst, template, |x| x); - } - fn enc32_isap( - &mut self, - inst: impl Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap)); - } - fn enc32_instp( - &mut self, - inst: impl Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp)); - } - fn enc32_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { - let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); - let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); - self.enc32.push(encoding); - } - - fn enc64_func(&mut self, inst: impl Into, template: Template, builder_closure: T) - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc64.push(encoding); - } - fn enc64(&mut self, inst: impl Into, template: Template) { - self.enc64_func(inst, template, |x| x); - } - fn enc64_isap( - &mut self, - inst: impl Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap)); - } - fn enc64_instp( - &mut self, - inst: impl Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp)); - } - fn enc64_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { - let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); - let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); - self.enc64.push(encoding); - } - - /// Adds I32/I64 encodings as appropriate for a typed instruction. - /// The REX prefix is always inferred at runtime. - /// - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - - // I32 on x86: no REX prefix. - self.enc32(inst.bind(I32), template.infer_rex()); - - // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(I32), template.infer_rex()); - - // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Adds I32/I64 encodings as appropriate for a typed instruction. - /// All variants of REX prefix are explicitly emitted, not inferred. - /// - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with and without REX. - fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(I32), template.nonrex()); - - // REX-less encoding must come after REX encoding so we don't use it by default. - // Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.bind(I32), template.rex()); - self.enc64(inst.bind(I32), template.nonrex()); - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Adds B32/B64 encodings as appropriate for a typed instruction. - /// The REX prefix is always inferred at runtime. - /// - /// Adds encoding for `inst.b32` to X86_32. - /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX. - /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix. - fn enc_b32_b64(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - - // B32 on x86: no REX prefix. - self.enc32(inst.bind(B32), template.infer_rex()); - - // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(B32), template.infer_rex()); - - // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(B64), template.rex().w()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with a REX prefix. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64_rex_only(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(I32), template.nonrex()); - self.enc64(inst.bind(I32), template.rex()); - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64_instp( - &mut self, - inst: &Instruction, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst.bind(I32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64_func(inst.bind(I32), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(I32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(I64), template.rex().w(), |builder| { - builder.inst_predicate(instp) - }); - } - - /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r32` to X86_64 with and without REX. - /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64_instp( - &mut self, - inst: &Instruction, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst.bind(R32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64_func(inst.bind(R32), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(R32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(R64), template.rex().w(), |builder| { - builder.inst_predicate(instp) - }); - } - - /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(R32), template.nonrex()); - self.enc64(inst.bind(R64), template.rex().w()); - } - - fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(R32).bind(Any), template.clone()); - - // REX-less encoding must come after REX encoding so we don't use it by - // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(R32).bind(Any), template.clone()); - - if w_bit { - self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w()); - } else { - self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(R64).bind(Any), template); - } - } - - /// Add encodings for `inst` to X86_64 with and without a REX prefix. - fn enc_x86_64(&mut self, inst: impl Into + Clone, template: Template) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64(inst.clone(), template.rex()); - self.enc64(inst, template); - } - - /// Add encodings for `inst` to X86_64 with and without a REX prefix. - fn enc_x86_64_instp( - &mut self, - inst: impl Clone + Into, - template: Template, - instp: InstructionPredicateNode, - ) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64_func(inst.clone(), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst, template, |builder| builder.inst_predicate(instp)); - } - fn enc_x86_64_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: SettingPredicateNumber, - ) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64_isap(inst.clone(), template.rex(), isap); - self.enc64_isap(inst, template, isap); - } - - /// Add all three encodings for `inst`: - /// - X86_32 - /// - X86_64 with and without the REX prefix. - fn enc_both(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc_x86_64(inst, template); - } - fn enc_both_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc32_isap(inst.clone(), template.clone(), isap); - self.enc_x86_64_isap(inst, template, isap); - } - fn enc_both_instp( - &mut self, - inst: impl Clone + Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_instp(inst.clone(), template.clone(), instp.clone()); - self.enc_x86_64_instp(inst, template, instp); - } - - /// Add two encodings for `inst`: - /// - X86_32, no REX prefix, since this is not valid in 32-bit mode. - /// - X86_64, dynamically infer the REX prefix. - fn enc_both_inferred(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc64(inst, template.infer_rex()); - } - fn enc_both_inferred_maybe_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: Option, - ) { - self.enc32_maybe_isap(inst.clone(), template.clone(), isap); - self.enc64_maybe_isap(inst, template.infer_rex(), isap); - } - - /// Add two encodings for `inst`: - /// - X86_32 - /// - X86_64 with the REX prefix. - fn enc_both_rex_only(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc64(inst, template.rex()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` - /// argument to determine whether or not to set the REX.W bit. - fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(I32).bind(Any), template.clone()); - - // REX-less encoding must come after REX encoding so we don't use it by - // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(I32).bind(Any), template.clone()); - - if w_bit { - self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w()); - } else { - self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(I64).bind(Any), template); - } - } - - /// Add the same encoding/recipe pairing to both X86_32 and X86_64 - fn enc_32_64_rec( - &mut self, - inst: impl Clone + Into, - recipe: &EncodingRecipe, - bits: u16, - ) { - self.enc32_rec(inst.clone(), recipe, bits); - self.enc64_rec(inst, recipe, bits); - } - - /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened - fn enc_32_64_func( - &mut self, - inst: impl Clone + Into, - template: Template, - builder_closure: T, - ) where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc32.push(encoding.clone()); - self.enc64.push(encoding); - } - - /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand - /// binding) has already happened. - fn enc_32_64_maybe_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: Option, - ) { - self.enc32_maybe_isap(inst.clone(), template.clone(), isap); - self.enc64_maybe_isap(inst, template, isap); - } - - fn enc32_maybe_isap( - &mut self, - inst: impl Into, - template: Template, - isap: Option, - ) { - match isap { - None => self.enc32(inst, template), - Some(isap) => self.enc32_isap(inst, template, isap), - } - } - - fn enc64_maybe_isap( - &mut self, - inst: impl Into, - template: Template, - isap: Option, - ) { - match isap { - None => self.enc64(inst, template), - Some(isap) => self.enc64_isap(inst, template, isap), - } - } -} - -// Definitions. - -#[inline(never)] -fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let bconst = shared.by_name("bconst"); - let bint = shared.by_name("bint"); - let copy = shared.by_name("copy"); - let copy_special = shared.by_name("copy_special"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let get_pinned_reg = shared.by_name("get_pinned_reg"); - let iconst = shared.by_name("iconst"); - let ireduce = shared.by_name("ireduce"); - let regmove = shared.by_name("regmove"); - let sextend = shared.by_name("sextend"); - let set_pinned_reg = shared.by_name("set_pinned_reg"); - let uextend = shared.by_name("uextend"); - let dummy_sarg_t = shared.by_name("dummy_sarg_t"); - - // Shorthands for recipes. - let rec_copysp = r.template("copysp"); - let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_get_pinned_reg = r.recipe("get_pinned_reg"); - let rec_null = r.recipe("null"); - let rec_pu_id = r.template("pu_id"); - let rec_pu_id_bool = r.template("pu_id_bool"); - let rec_pu_iq = r.template("pu_iq"); - let rec_rmov = r.template("rmov"); - let rec_set_pinned_reg = r.template("set_pinned_reg"); - let rec_u_id = r.template("u_id"); - let rec_u_id_z = r.template("u_id_z"); - let rec_umr = r.template("umr"); - let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); - let rec_urm_noflags = r.template("urm_noflags"); - let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); - let rec_dummy_sarg_t = r.recipe("dummy_sarg_t"); - - // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! - e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); - e.enc_x86_64( - set_pinned_reg.bind(I64), - rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), - ); - - e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - for &ty in &[I8, I16, I32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); - } - for &ty in &[B8, B16, B32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); - } - e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); - e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); - e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); - e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - - // Immediate constants. - e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - - e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM)); - e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - - // The 32-bit immediate movl also zero-extends to 64 bits. - let is_unsigned_int32 = - InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0); - - e.enc64_func( - iconst.bind(I64), - rec_pu_id.opcodes(&MOV_IMM).rex(), - |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), - ); - e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| { - encoding.inst_predicate(is_unsigned_int32) - }); - - // Sign-extended 32-bit immediate. - e.enc64( - iconst.bind(I64), - rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(), - ); - - // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix. - e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w()); - - // Bool constants (uses MOV) - for &ty in &[B1, B8, B16, B32] { - e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM)); - } - e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex()); - - // You may expect that i8 encodings would use 0x30 (XORB) to indicate that encodings should be - // on 8-bit operands (f.ex "xor %al, %al"). Cranelift currently does not know when it can - // safely drop the 0x66 prefix, so we explicitly select a wider but permissible opcode. - let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm"); - e.enc_both_instp( - iconst.bind(I8), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - - // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that - // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not - // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these - // scenarios, so we explicitly select a wider but permissible opcode. - // - // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't - // an appropriate i16 encoding available. - e.enc_both_instp( - iconst.bind(I16), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - e.enc_both_instp( - iconst.bind(I32), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); - - // Numerical conversions. - - // Reducing an integer is a no-op. - e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - - e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); - - // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending - // instructions for %al/%ax/%eax to %ax/%eax/%rax. - - // movsbl - e.enc32( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - - // movswl - e.enc32( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - - // movsbq - e.enc64( - sextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), - ); - - // movswq - e.enc64( - sextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), - ); - - // movslq - e.enc64( - sextend.bind(I64).bind(I32), - rec_urm_noflags.opcodes(&MOVSXD).rex().w(), - ); - - // movzbl - e.enc32( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwl - e.enc32( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // movzbq, encoded as movzbl because it's equivalent and shorter. - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwq, encoded as movzwl because it's equivalent and shorter - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // A 32-bit register copy clears the high 32 bits. - e.enc64( - uextend.bind(I64).bind(I32), - rec_umr.opcodes(&MOV_STORE).rex(), - ); - e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); - - // Convert bool to int. - // - // This assumes that b1 is represented as an 8-bit low register with the value 0 - // or 1. - // - // Encode movzbq as movzbl, because it's equivalent and shorter. - for &to in &[I8, I16, I32, I64] { - for &from in &[B1, B8] { - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - if to != I64 { - e.enc32( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - } - } - for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] { - e.enc_both( - bint.bind(*to).bind(*from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - - // Copy Special - // For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); - e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); - - // Copy to SSA. These have to be done with special _rex_only encoders, because the standard - // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account - // the source register, which is specified directly in the instruction. - e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only( - copy_to_ssa.bind(I16), - rec_umr_reg_to_ssa.opcodes(&MOV_STORE), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F64), - rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F32), - rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), - ); - - e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0); -} - -#[inline(never)] -fn define_memory( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let adjust_sp_down = shared.by_name("adjust_sp_down"); - let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); - let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); - let copy_nop = shared.by_name("copy_nop"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let istore16 = shared.by_name("istore16"); - let istore16_complex = shared.by_name("istore16_complex"); - let istore32 = shared.by_name("istore32"); - let istore32_complex = shared.by_name("istore32_complex"); - let istore8 = shared.by_name("istore8"); - let istore8_complex = shared.by_name("istore8_complex"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let regfill = shared.by_name("regfill"); - let regspill = shared.by_name("regspill"); - let sload16 = shared.by_name("sload16"); - let sload16_complex = shared.by_name("sload16_complex"); - let sload32 = shared.by_name("sload32"); - let sload32_complex = shared.by_name("sload32_complex"); - let sload8 = shared.by_name("sload8"); - let sload8_complex = shared.by_name("sload8_complex"); - let spill = shared.by_name("spill"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let uload16 = shared.by_name("uload16"); - let uload16_complex = shared.by_name("uload16_complex"); - let uload32 = shared.by_name("uload32"); - let uload32_complex = shared.by_name("uload32_complex"); - let uload8 = shared.by_name("uload8"); - let uload8_complex = shared.by_name("uload8_complex"); - let x86_pop = x86.by_name("x86_pop"); - let x86_push = x86.by_name("x86_push"); - - // Shorthands for recipes. - let rec_adjustsp = r.template("adjustsp"); - let rec_adjustsp_ib = r.template("adjustsp_ib"); - let rec_adjustsp_id = r.template("adjustsp_id"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_fillnull = r.recipe("fillnull"); - let rec_fillSib32 = r.template("fillSib32"); - let rec_ld = r.template("ld"); - let rec_ldDisp32 = r.template("ldDisp32"); - let rec_ldDisp8 = r.template("ldDisp8"); - let rec_ldWithIndex = r.template("ldWithIndex"); - let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); - let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); - let rec_popq = r.template("popq"); - let rec_pushq = r.template("pushq"); - let rec_regfill32 = r.template("regfill32"); - let rec_regspill32 = r.template("regspill32"); - let rec_spillSib32 = r.template("spillSib32"); - let rec_st = r.template("st"); - let rec_stacknull = r.recipe("stacknull"); - let rec_stDisp32 = r.template("stDisp32"); - let rec_stDisp32_abcd = r.template("stDisp32_abcd"); - let rec_stDisp8 = r.template("stDisp8"); - let rec_stDisp8_abcd = r.template("stDisp8_abcd"); - let rec_stWithIndex = r.template("stWithIndex"); - let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); - let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); - let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); - let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); - let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); - let rec_st_abcd = r.template("st_abcd"); - - // Loads and stores. - let is_load_complex_length_two = - InstructionPredicate::new_length_equals(&*formats.load_complex, 2); - - for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { - e.enc_i32_i64_instp( - load_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - e.enc_r32_r64_instp( - load_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - e.enc_x86_64_instp( - uload32_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - - e.enc64_instp( - sload32_complex, - recipe.opcodes(&MOVSXD).rex().w(), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - uload16_complex, - recipe.opcodes(&MOVZX_WORD), - is_load_complex_length_two.clone(), - ); - e.enc_i32_i64_instp( - sload16_complex, - recipe.opcodes(&MOVSX_WORD), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - uload8_complex, - recipe.opcodes(&MOVZX_BYTE), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - sload8_complex, - recipe.opcodes(&MOVSX_BYTE), - is_load_complex_length_two.clone(), - ); - } - - let is_store_complex_length_three = - InstructionPredicate::new_length_equals(&*formats.store_complex, 3); - - for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { - e.enc_i32_i64_instp( - store_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_r32_r64_instp( - store_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore32_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_both_instp( - istore16_complex.bind(I32), - recipe.opcodes(&MOV_STORE_16), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore16_complex.bind(I64), - recipe.opcodes(&MOV_STORE_16), - is_store_complex_length_three.clone(), - ); - } - - for recipe in &[ - rec_stWithIndex_abcd, - rec_stWithIndexDisp8_abcd, - rec_stWithIndexDisp32_abcd, - ] { - e.enc_both_instp( - istore8_complex.bind(I32), - recipe.opcodes(&MOV_BYTE_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore8_complex.bind(I64), - recipe.opcodes(&MOV_BYTE_STORE), - is_store_complex_length_three.clone(), - ); - } - - for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { - e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE)); - e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); - } - - // Byte stores are more complicated because the registers they can address - // depends of the presence of a REX prefix. The st*_abcd recipes fall back to - // the corresponding st* recipes when a REX prefix is applied. - - for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { - e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); - e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); - } - - e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); - - // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid - // constraining the permitted registers. - // See MIN_SPILL_SLOT_SIZE which makes this safe. - - e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE)); - for &ty in &[I8, I16] { - e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE)); - } - - for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { - e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); - e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); - e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD)); - e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w()); - e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD)); - e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD)); - e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE)); - e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); - } - - e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD)); - e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); - - // No-op fills, created by late-stage redundant-fill removal. - for &ty in &[I64, I32, I16, I8] { - e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); - } - e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0); - for &ty in &[F64, F32] { - e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0); - } - for &ty in &[R64, R32] { - e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); - } - - // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. - - e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD)); - for &ty in &[I8, I16] { - e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD)); - } - - // Push and Pop. - e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG)); - e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG)); - - e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); - e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); - - // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn - // into a no-op. - // The same encoding is generated for both the 64- and 32-bit architectures. - for &ty in &[I64, I32, I16, I8] { - e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); - e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); - } - for &ty in &[F64, F32] { - e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); - e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); - } - - // Adjust SP down by a dynamic value (or up, with a negative operand). - e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB)); - e.enc64( - adjust_sp_down.bind(I64), - rec_adjustsp.opcodes(&SUB).rex().w(), - ); - - // Adjust SP up by an immediate (or down, with a negative immediate). - e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8)); - e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM)); - e.enc64( - adjust_sp_up_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(), - ); - e.enc64( - adjust_sp_up_imm, - rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(), - ); - - // Adjust SP down by an immediate (or up, with a negative immediate). - e.enc32( - adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5), - ); - e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5)); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(), - ); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), - ); -} - -#[inline(never)] -fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let bitcast = shared.by_name("bitcast"); - let copy = shared.by_name("copy"); - let regmove = shared.by_name("regmove"); - - // Shorthands for recipes. - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_furm = r.template("furm"); - let rec_rfumr = r.template("rfumr"); - - // Floating-point moves. - // movd - e.enc_both( - bitcast.bind(F32).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc_both( - bitcast.bind(I32).bind(F32), - rec_rfumr.opcodes(&MOVD_STORE_XMM), - ); - - // movq - e.enc64( - bitcast.bind(F64).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - e.enc64( - bitcast.bind(I64).bind(F64), - rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), - ); - - // movaps - e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); - e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); -} - -#[inline(never)] -fn define_fpu_memory( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let fill = shared.by_name("fill"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let regfill = shared.by_name("regfill"); - let regspill = shared.by_name("regspill"); - let spill = shared.by_name("spill"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - - // Shorthands for recipes. - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - - // Float loads and stores. - e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD)); - - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndex.opcodes(&MOVSS_LOAD), - ); - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD), - ); - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), - ); - - e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD)); - - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndex.opcodes(&MOVSD_LOAD), - ); - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD), - ); - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), - ); - - e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE)); - e.enc_both( - store.bind(F32).bind(Any), - rec_fstDisp8.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store.bind(F32).bind(Any), - rec_fstDisp32.opcodes(&MOVSS_STORE), - ); - - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndex.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), - ); - - e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE)); - e.enc_both( - store.bind(F64).bind(Any), - rec_fstDisp8.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store.bind(F64).bind(Any), - rec_fstDisp32.opcodes(&MOVSD_STORE), - ); - - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndex.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE), - ); - - e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD)); - e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD)); - e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD)); - e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD)); - - e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE)); - e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); - e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); - e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); -} - -#[inline(never)] -fn define_fpu_ops( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let ceil = shared.by_name("ceil"); - let f32const = shared.by_name("f32const"); - let f64const = shared.by_name("f64const"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdemote = shared.by_name("fdemote"); - let fdiv = shared.by_name("fdiv"); - let ffcmp = shared.by_name("ffcmp"); - let floor = shared.by_name("floor"); - let fmul = shared.by_name("fmul"); - let fpromote = shared.by_name("fpromote"); - let fsub = shared.by_name("fsub"); - let nearest = shared.by_name("nearest"); - let sqrt = shared.by_name("sqrt"); - let trunc = shared.by_name("trunc"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - - // Shorthands for recipes. - let rec_f32imm_z = r.template("f32imm_z"); - let rec_f64imm_z = r.template("f64imm_z"); - let rec_fa = r.template("fa"); - let rec_fcmp = r.template("fcmp"); - let rec_fcscc = r.template("fcscc"); - let rec_frurm = r.template("frurm"); - let rec_furm = r.template("furm"); - let rec_furmi_rnd = r.template("furmi_rnd"); - let rec_rfurm = r.template("rfurm"); - - // Predicates shorthands. - let use_sse41 = settings.predicate_by_name("use_sse41"); - - // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for - // 32-bit and 64-bit floats respectively. - let is_zero_32_bit_float = - InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm"); - e.enc32_instp( - f32const, - rec_f32imm_z.opcodes(&XORPS), - is_zero_32_bit_float.clone(), - ); - - let is_zero_64_bit_float = - InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm"); - e.enc32_instp( - f64const, - rec_f64imm_z.opcodes(&XORPD), - is_zero_64_bit_float.clone(), - ); - - e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); - e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); - - // cvtsi2ss - e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); - - // cvtsi2sd - e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD)); - - // cvtss2sd - e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD)); - - // cvtsd2ss - e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS)); - - // cvttss2si - e.enc_both( - x86_cvtt2si.bind(I32).bind(F32), - rec_rfurm.opcodes(&CVTTSS2SI), - ); - e.enc64( - x86_cvtt2si.bind(I64).bind(F32), - rec_rfurm.opcodes(&CVTTSS2SI).rex().w(), - ); - - // cvttsd2si - e.enc_both( - x86_cvtt2si.bind(I32).bind(F64), - rec_rfurm.opcodes(&CVTTSD2SI), - ); - e.enc64( - x86_cvtt2si.bind(I64).bind(F64), - rec_rfurm.opcodes(&CVTTSD2SI).rex().w(), - ); - - // Exact square roots. - e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS)); - e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD)); - - // Rounding. The recipe looks at the opcode to pick an immediate. - for inst in &[nearest, floor, ceil, trunc] { - e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41); - e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41); - } - - // Binary arithmetic ops. - e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS)); - e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD)); - - e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS)); - e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD)); - - e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS)); - e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD)); - - e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS)); - e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD)); - - e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS)); - e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD)); - - e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); - e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); - - // Comparisons. - // - // This only covers the condition codes in `supported_floatccs`, the rest are - // handled by legalization patterns. - e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); - e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); - e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); - e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); -} - -#[inline(never)] -fn define_alu( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let clz = shared.by_name("clz"); - let ctz = shared.by_name("ctz"); - let icmp = shared.by_name("icmp"); - let icmp_imm = shared.by_name("icmp_imm"); - let ifcmp = shared.by_name("ifcmp"); - let ifcmp_imm = shared.by_name("ifcmp_imm"); - let ifcmp_sp = shared.by_name("ifcmp_sp"); - let ishl = shared.by_name("ishl"); - let ishl_imm = shared.by_name("ishl_imm"); - let popcnt = shared.by_name("popcnt"); - let rotl = shared.by_name("rotl"); - let rotl_imm = shared.by_name("rotl_imm"); - let rotr = shared.by_name("rotr"); - let rotr_imm = shared.by_name("rotr_imm"); - let selectif = shared.by_name("selectif"); - let selectif_spectre_guard = shared.by_name("selectif_spectre_guard"); - let sshr = shared.by_name("sshr"); - let sshr_imm = shared.by_name("sshr_imm"); - let trueff = shared.by_name("trueff"); - let trueif = shared.by_name("trueif"); - let ushr = shared.by_name("ushr"); - let ushr_imm = shared.by_name("ushr_imm"); - let x86_bsf = x86.by_name("x86_bsf"); - let x86_bsr = x86.by_name("x86_bsr"); - - // Shorthands for recipes. - let rec_bsf_and_bsr = r.template("bsf_and_bsr"); - let rec_cmov = r.template("cmov"); - let rec_icscc = r.template("icscc"); - let rec_icscc_ib = r.template("icscc_ib"); - let rec_icscc_id = r.template("icscc_id"); - let rec_rcmp = r.template("rcmp"); - let rec_rcmp_ib = r.template("rcmp_ib"); - let rec_rcmp_id = r.template("rcmp_id"); - let rec_rcmp_sp = r.template("rcmp_sp"); - let rec_rc = r.template("rc"); - let rec_setf_abcd = r.template("setf_abcd"); - let rec_seti_abcd = r.template("seti_abcd"); - let rec_urm = r.template("urm"); - - // Predicates shorthands. - let use_popcnt = settings.predicate_by_name("use_popcnt"); - let use_lzcnt = settings.predicate_by_name("use_lzcnt"); - let use_bmi1 = settings.predicate_by_name("use_bmi1"); - - let band = shared.by_name("band"); - let band_imm = shared.by_name("band_imm"); - let band_not = shared.by_name("band_not"); - let bnot = shared.by_name("bnot"); - let bor = shared.by_name("bor"); - let bor_imm = shared.by_name("bor_imm"); - let bxor = shared.by_name("bxor"); - let bxor_imm = shared.by_name("bxor_imm"); - let iadd = shared.by_name("iadd"); - let iadd_ifcarry = shared.by_name("iadd_ifcarry"); - let iadd_ifcin = shared.by_name("iadd_ifcin"); - let iadd_ifcout = shared.by_name("iadd_ifcout"); - let iadd_imm = shared.by_name("iadd_imm"); - let imul = shared.by_name("imul"); - let isub = shared.by_name("isub"); - let isub_ifbin = shared.by_name("isub_ifbin"); - let isub_ifborrow = shared.by_name("isub_ifborrow"); - let isub_ifbout = shared.by_name("isub_ifbout"); - let x86_sdivmodx = x86.by_name("x86_sdivmodx"); - let x86_smulx = x86.by_name("x86_smulx"); - let x86_udivmodx = x86.by_name("x86_udivmodx"); - let x86_umulx = x86.by_name("x86_umulx"); - - let rec_div = r.template("div"); - let rec_fa = r.template("fa"); - let rec_fax = r.template("fax"); - let rec_mulx = r.template("mulx"); - let rec_r_ib = r.template("r_ib"); - let rec_r_id = r.template("r_id"); - let rec_rin = r.template("rin"); - let rec_rio = r.template("rio"); - let rec_rout = r.template("rout"); - let rec_rr = r.template("rr"); - let rec_rrx = r.template("rrx"); - let rec_ur = r.template("ur"); - - e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); - e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); - e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); - e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); - - e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); - e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); - - e.enc_i32_i64(band, rec_rr.opcodes(&AND)); - e.enc_b32_b64(band, rec_rr.opcodes(&AND)); - - // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can - // even use the single-byte immediate for 0xffff_ffXX masks. - - e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); - e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); - - e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); - e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); - e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); - e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); - - e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); - e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); - e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); - e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); - - // x86 has a bitwise not instruction NOT. - e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2)); - - // Also add a `b1` encodings for the logic instructions. - // TODO: Should this be done with 8-bit instructions? It would improve partial register - // dependencies. - e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); - e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); - e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); - - e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); - e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); - e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); - - e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); - e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); - - // Binary bitwise ops. - // - // The F64 version is intentionally encoded using the single-precision opcode: - // the operation is identical and the encoding is one byte shorter. - e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS)); - e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS)); - - e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS)); - e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS)); - - e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS)); - e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS)); - - // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. - e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); - e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); - - // Shifts and rotates. - // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit - // and 16-bit shifts would need explicit masking. - - for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { - // Cannot use enc_i32_i64 for this pattern because instructions require - // to bind any. - e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr)); - e.enc32( - inst.bind(I32).bind(I16), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc32( - inst.bind(I32).bind(I32), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc64( - inst.bind(I64).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - } - - e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); - e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); - e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); - e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); - e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); - - // Population count. - e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - e.enc64_isap( - popcnt.bind(I64), - rec_urm.opcodes(&POPCNT).rex().w(), - use_popcnt, - ); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - - // Count leading zero bits. - e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - - // Count trailing zero bits. - e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - - // Bit scan forwards and reverse - e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); - e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); - - // Comparisons - e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); - e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); - e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); - // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). - - e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); - e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); - - // Convert flags to bool. - // This encodes `b1` as an 8-bit low register with the value 0 or 1. - e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - - // Conditional move (a.k.a integer select). - e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); - // A Spectre-guard integer select is exactly the same as a selectif, but - // is not associated with any other legalization rules and is not - // recognized by any optimizations, so it must arrive here unmodified - // and in its original place. - e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW)); -} - -#[inline(never)] -#[allow(clippy::cognitive_complexity)] -fn define_simd( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let avg_round = shared.by_name("avg_round"); - let bitcast = shared.by_name("bitcast"); - let bor = shared.by_name("bor"); - let bxor = shared.by_name("bxor"); - let copy = shared.by_name("copy"); - let copy_nop = shared.by_name("copy_nop"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdiv = shared.by_name("fdiv"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let fmul = shared.by_name("fmul"); - let fsub = shared.by_name("fsub"); - let iabs = shared.by_name("iabs"); - let iadd = shared.by_name("iadd"); - let icmp = shared.by_name("icmp"); - let imul = shared.by_name("imul"); - let ishl_imm = shared.by_name("ishl_imm"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let raw_bitcast = shared.by_name("raw_bitcast"); - let regfill = shared.by_name("regfill"); - let regmove = shared.by_name("regmove"); - let regspill = shared.by_name("regspill"); - let sadd_sat = shared.by_name("sadd_sat"); - let scalar_to_vector = shared.by_name("scalar_to_vector"); - let sload8x8 = shared.by_name("sload8x8"); - let sload8x8_complex = shared.by_name("sload8x8_complex"); - let sload16x4 = shared.by_name("sload16x4"); - let sload16x4_complex = shared.by_name("sload16x4_complex"); - let sload32x2 = shared.by_name("sload32x2"); - let sload32x2_complex = shared.by_name("sload32x2_complex"); - let spill = shared.by_name("spill"); - let sqrt = shared.by_name("sqrt"); - let sshr_imm = shared.by_name("sshr_imm"); - let ssub_sat = shared.by_name("ssub_sat"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let swiden_low = shared.by_name("swiden_low"); - let uadd_sat = shared.by_name("uadd_sat"); - let uload8x8 = shared.by_name("uload8x8"); - let uload8x8_complex = shared.by_name("uload8x8_complex"); - let uload16x4 = shared.by_name("uload16x4"); - let uload16x4_complex = shared.by_name("uload16x4_complex"); - let uload32x2 = shared.by_name("uload32x2"); - let uload32x2_complex = shared.by_name("uload32x2_complex"); - let snarrow = shared.by_name("snarrow"); - let unarrow = shared.by_name("unarrow"); - let uwiden_low = shared.by_name("uwiden_low"); - let ushr_imm = shared.by_name("ushr_imm"); - let usub_sat = shared.by_name("usub_sat"); - let vconst = shared.by_name("vconst"); - let vselect = shared.by_name("vselect"); - let widening_pairwise_dot_product_s = shared.by_name("widening_pairwise_dot_product_s"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_insertps = x86.by_name("x86_insertps"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - let x86_movlhps = x86.by_name("x86_movlhps"); - let x86_movsd = x86.by_name("x86_movsd"); - let x86_pblendw = x86.by_name("x86_pblendw"); - let x86_pextr = x86.by_name("x86_pextr"); - let x86_pinsr = x86.by_name("x86_pinsr"); - let x86_pmaxs = x86.by_name("x86_pmaxs"); - let x86_pmaxu = x86.by_name("x86_pmaxu"); - let x86_pmins = x86.by_name("x86_pmins"); - let x86_pminu = x86.by_name("x86_pminu"); - let x86_pmullq = x86.by_name("x86_pmullq"); - let x86_pmuludq = x86.by_name("x86_pmuludq"); - let x86_palignr = x86.by_name("x86_palignr"); - let x86_pshufb = x86.by_name("x86_pshufb"); - let x86_pshufd = x86.by_name("x86_pshufd"); - let x86_psll = x86.by_name("x86_psll"); - let x86_psra = x86.by_name("x86_psra"); - let x86_psrl = x86.by_name("x86_psrl"); - let x86_ptest = x86.by_name("x86_ptest"); - let x86_punpckh = x86.by_name("x86_punpckh"); - let x86_punpckl = x86.by_name("x86_punpckl"); - let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps"); - - // Shorthands for recipes. - let rec_blend = r.template("blend"); - let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); - let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128"); - let rec_f_ib = r.template("f_ib"); - let rec_fa = r.template("fa"); - let rec_fa_ib = r.template("fa_ib"); - let rec_fax = r.template("fax"); - let rec_fcmp = r.template("fcmp"); - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - let rec_furm = r.template("furm"); - let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_icscc_fpr = r.template("icscc_fpr"); - let rec_null_fpr = r.recipe("null_fpr"); - let rec_pfcmp = r.template("pfcmp"); - let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); - let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); - let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); - let rec_stacknull = r.recipe("stacknull"); - let rec_vconst = r.template("vconst"); - let rec_vconst_optimized = r.template("vconst_optimized"); - - // Predicates shorthands. - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); - let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); - let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); - let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd"); - let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd"); - - // SIMD vector size: eventually multiple vector sizes may be supported but for now only - // SSE-sized vectors are available. - let sse_vector_size: u64 = 128; - - // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see - // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the - // value across the register. - - let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; - - // PSHUFB, 8-bit shuffle using two XMM registers. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = x86_pshufb.bind(vector(ty, sse_vector_size)); - let template = rec_fa.opcodes(&PSHUFB); - e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd)); - } - - // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let instruction = x86_pshufd.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD); - e.enc_both_inferred(instruction, template); - } - - // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be - // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB; - // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let opcode = match ty.lane_bits() { - 32 => &BLENDVPS, - 64 => &BLENDVPD, - _ => &PBLENDVB, - }; - let instruction = vselect.bind(vector(ty, sse_vector_size)); - let template = rec_blend.opcodes(opcode); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // PBLENDW, select lanes using a u8 immediate. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let instruction = x86_pblendw.bind(vector(ty, sse_vector_size)); - let template = rec_fa_ib.opcodes(&PBLENDW); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according - // to the Intel manual: "When the destination operand is an XMM register, the source operand is - // written to the low doubleword of the register and the register is zero-extended to 128 bits." - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size)); - if ty.is_float() { - // No need to move floats--they already live in XMM registers. - e.enc_32_64_rec(instruction, rec_null_fpr, 0); - } else { - let template = rec_frurm.opcodes(&MOVD_LOAD_XMM); - if ty.lane_bits() < 64 { - e.enc_both_inferred(instruction, template); - } else { - // No 32-bit encodings for 64-bit widths. - assert_eq!(ty.lane_bits(), 64); - e.enc64(instruction, template.rex().w()); - } - } - } - - // SIMD insertlane - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let (opcode, isap): (&[_], _) = match ty.lane_bits() { - 8 => (&PINSRB, Some(use_sse41_simd)), - 16 => (&PINSRW, None), - 32 | 64 => (&PINSR, Some(use_sse41_simd)), - _ => panic!("invalid size for SIMD insertlane"), - }; - - let instruction = x86_pinsr.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_r.opcodes(opcode); - if ty.lane_bits() < 64 { - e.enc_both_inferred_maybe_isap(instruction, template, isap); - } else { - // It turns out the 64-bit widths have REX/W encodings and only are available on - // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), isap); - } - } - - // For legalizing insertlane with floats, INSERTPS from SSE4.1. - { - let instruction = x86_insertps.bind(vector(F32, sse_vector_size)); - let template = rec_fa_ib.opcodes(&INSERTPS); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // For legalizing insertlane with floats, MOVSD from SSE2. - { - let instruction = x86_movsd.bind(vector(F64, sse_vector_size)); - let template = rec_fa.opcodes(&MOVSD_LOAD); - e.enc_both_inferred(instruction, template); // from SSE2 - } - - // For legalizing insertlane with floats, MOVLHPS from SSE. - { - let instruction = x86_movlhps.bind(vector(F64, sse_vector_size)); - let template = rec_fa.opcodes(&MOVLHPS); - e.enc_both_inferred(instruction, template); // from SSE - } - - // SIMD extractlane - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let opcode = match ty.lane_bits() { - 8 => &PEXTRB, - 16 => &PEXTRW, - 32 | 64 => &PEXTR, - _ => panic!("invalid size for SIMD extractlane"), - }; - - let instruction = x86_pextr.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_gpr.opcodes(opcode); - if ty.lane_bits() < 64 { - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } else { - // It turns out the 64-bit widths have REX/W encodings and only are available on - // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd)); - } - } - - // SIMD packing/unpacking - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let (high, low) = match ty.lane_bits() { - 8 => (&PUNPCKHBW, &PUNPCKLBW), - 16 => (&PUNPCKHWD, &PUNPCKLWD), - 32 => (&PUNPCKHDQ, &PUNPCKLDQ), - 64 => (&PUNPCKHQDQ, &PUNPCKLQDQ), - _ => panic!("invalid size for SIMD packing/unpacking"), - }; - - e.enc_both_inferred( - x86_punpckh.bind(vector(ty, sse_vector_size)), - rec_fa.opcodes(high), - ); - e.enc_both_inferred( - x86_punpckl.bind(vector(ty, sse_vector_size)), - rec_fa.opcodes(low), - ); - } - - // SIMD narrow/widen - for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { - let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); - } - for (ty, opcodes, isap) in &[ - (I16, &PACKUSWB[..], None), - (I32, &PACKUSDW[..], Some(use_sse41_simd)), - ] { - let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); - } - for (ty, swiden_opcode, uwiden_opcode) in &[ - (I8, &PMOVSXBW[..], &PMOVZXBW[..]), - (I16, &PMOVSXWD[..], &PMOVZXWD[..]), - ] { - let isap = Some(use_sse41_simd); - let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap); - let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap); - } - for ty in &[I8, I16, I32, I64] { - e.enc_both_inferred_maybe_isap( - x86_palignr.bind(vector(*ty, sse_vector_size)), - rec_fa_ib.opcodes(&PALIGNR[..]), - Some(use_ssse3_simd), - ); - } - - // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). - for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { - for to_type in - ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) - { - let instruction = raw_bitcast - .bind(vector(to_type, sse_vector_size)) - .bind(vector(from_type, sse_vector_size)); - e.enc_32_64_rec(instruction, rec_null_fpr, 0); - } - } - - // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an - // XMM register. - for float_type in &[F32, F64] { - for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { - e.enc_32_64_rec( - raw_bitcast - .bind(vector(lane_type, sse_vector_size)) - .bind(*float_type), - rec_null_fpr, - 0, - ); - e.enc_32_64_rec( - raw_bitcast - .bind(*float_type) - .bind(vector(lane_type, sse_vector_size)), - rec_null_fpr, - 0, - ); - } - } - - // SIMD conversions - { - let fcvt_from_sint_32 = fcvt_from_sint - .bind(vector(F32, sse_vector_size)) - .bind(vector(I32, sse_vector_size)); - e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS)); - - e.enc_32_64_maybe_isap( - x86_vcvtudq2ps, - rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS), - Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F - ); - - e.enc_both_inferred( - x86_cvtt2si - .bind(vector(I32, sse_vector_size)) - .bind(vector(F32, sse_vector_size)), - rec_furm.opcodes(&CVTTPS2DQ), - ); - } - - // SIMD vconst for special cases (all zeroes, all ones) - // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this - // encoding first - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind(vector(ty, sse_vector_size)); - - let is_zero_128bit = - InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle"); - let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex(); - e.enc_32_64_func(instruction.clone(), template, |builder| { - builder.inst_predicate(is_zero_128bit) - }); - - let is_ones_128bit = - InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle"); - let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex(); - e.enc_32_64_func(instruction, template, |builder| { - builder.inst_predicate(is_ones_128bit) - }); - } - - // SIMD vconst using MOVUPS - // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have - // to guarantee that the constants are aligned when emitted and there is currently no mechanism - // for that; alternately, constants could be loaded into XMM registers using a sequence like: - // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored - // in memory) but some performance measurements are needed. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind(vector(ty, sse_vector_size)); - let template = rec_vconst.opcodes(&MOVUPS_LOAD); - e.enc_both_inferred(instruction, template); // from SSE - } - - // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of - // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have - // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124). - // Also, it would be ideal to infer REX prefixes for all of these instructions but for the - // time being only instructions with common recipes have `infer_rex()` support. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - // Store - let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); - e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); - e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); - - // Store complex - let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size)); - e.enc_both( - bound_store_complex.clone(), - rec_fstWithIndex.opcodes(&MOVUPS_STORE), - ); - e.enc_both( - bound_store_complex.clone(), - rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE), - ); - e.enc_both( - bound_store_complex, - rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE), - ); - - // Load - let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); - e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); - e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); - - // Load complex - let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size)); - e.enc_both( - bound_load_complex.clone(), - rec_fldWithIndex.opcodes(&MOVUPS_LOAD), - ); - e.enc_both( - bound_load_complex.clone(), - rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD), - ); - e.enc_both( - bound_load_complex, - rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD), - ); - - // Spill - let bound_spill = spill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); - let bound_regspill = regspill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); - - // Fill - let bound_fill = fill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); - let bound_regfill = regfill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); - let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size)); - e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); - - // Regmove - let bound_regmove = regmove.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); - - // Copy - let bound_copy = copy.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); - let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD)); - let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size)); - e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); - } - - // SIMD load extend - for (inst, opcodes) in &[ - (uload8x8, &PMOVZXBW), - (uload16x4, &PMOVZXWD), - (uload32x2, &PMOVZXDQ), - (sload8x8, &PMOVSXBW), - (sload16x4, &PMOVSXWD), - (sload32x2, &PMOVSXDQ), - ] { - let isap = Some(use_sse41_simd); - for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] { - let inst = *inst; - let template = recipe.opcodes(*opcodes); - e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap); - e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap); - } - } - - // SIMD load extend (complex addressing) - let is_load_complex_length_two = - InstructionPredicate::new_length_equals(&*formats.load_complex, 2); - for (inst, opcodes) in &[ - (uload8x8_complex, &PMOVZXBW), - (uload16x4_complex, &PMOVZXWD), - (uload32x2_complex, &PMOVZXDQ), - (sload8x8_complex, &PMOVSXBW), - (sload16x4_complex, &PMOVSXWD), - (sload32x2_complex, &PMOVSXDQ), - ] { - for recipe in &[ - rec_fldWithIndex, - rec_fldWithIndexDisp8, - rec_fldWithIndexDisp32, - ] { - let template = recipe.opcodes(*opcodes); - let predicate = |encoding: EncodingBuilder| { - encoding - .isa_predicate(use_sse41_simd) - .inst_predicate(is_load_complex_length_two.clone()) - }; - e.enc32_func(inst.clone(), template.clone(), predicate); - // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64. - e.enc64_func(inst.clone(), template.rex(), predicate); - e.enc64_func(inst.clone(), template, predicate); - } - } - - // SIMD integer addition - for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { - let iadd = iadd.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes)); - } - - // SIMD integer saturating addition - e.enc_both_inferred( - sadd_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PADDSB), - ); - e.enc_both_inferred( - sadd_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PADDSW), - ); - e.enc_both_inferred( - uadd_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PADDUSB), - ); - e.enc_both_inferred( - uadd_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PADDUSW), - ); - - // SIMD integer subtraction - let isub = shared.by_name("isub"); - for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { - let isub = isub.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes)); - } - - // SIMD integer saturating subtraction - e.enc_both_inferred( - ssub_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PSUBSB), - ); - e.enc_both_inferred( - ssub_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PSUBSW), - ); - e.enc_both_inferred( - usub_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PSUBUSB), - ); - e.enc_both_inferred( - usub_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PSUBUSW), - ); - - // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 - // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD. - for (ty, opcodes, isap) in &[ - (I16, &PMULLW[..], None), - (I32, &PMULLD[..], Some(use_sse41_simd)), - ] { - let imul = imul.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); - } - - // SIMD multiplication with lane expansion. - e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ)); - - // SIMD multiplication and add adjacent pairs, from SSE2. - e.enc_both_inferred(widening_pairwise_dot_product_s, rec_fa.opcodes(&PMADDWD)); - - // SIMD integer multiplication for I64x2 using a AVX512. - { - e.enc_32_64_maybe_isap( - x86_pmullq, - rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(), - Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL - ); - } - - // SIMD integer average with rounding. - for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] { - let avgr = avg_round.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes)); - } - - // SIMD integer absolute value. - for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] { - let iabs = iabs.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd)); - } - - // SIMD logical operations - let band = shared.by_name("band"); - let band_not = shared.by_name("band_not"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - // and - let band = band.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(band, rec_fa.opcodes(&PAND)); - - // and not (note flipped recipe operands to match band_not order) - let band_not = band_not.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN)); - - // or - let bor = bor.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(bor, rec_fa.opcodes(&POR)); - - // xor - let bxor = bxor.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR)); - - // ptest - let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd)); - } - - // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement - // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an - // I128x1 but restrictions on the type builder prevent this; the general idea here is that - // the upper bits are all zeroed and do not form parts of any separate lane. See - // https://github.com/bytecodealliance/wasmtime/issues/1140. - e.enc_both_inferred( - bitcast.bind(vector(I64, sse_vector_size)).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc64( - bitcast.bind(vector(I64, sse_vector_size)).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - - // SIMD shift left - for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] { - let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes)); - } - - // SIMD shift right (logical) - for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] { - let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes)); - } - - // SIMD shift right (arithmetic) - for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] { - let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes)); - } - - // SIMD immediate shift - for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] { - let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6)); - - let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2)); - - // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set. - if *ty != I64 { - let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4)); - } - } - - // SIMD integer comparisons - { - use IntCC::*; - for (ty, cc, opcodes, isa_predicate) in &[ - (I8, Equal, &PCMPEQB[..], None), - (I16, Equal, &PCMPEQW[..], None), - (I32, Equal, &PCMPEQD[..], None), - (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)), - (I8, SignedGreaterThan, &PCMPGTB[..], None), - (I16, SignedGreaterThan, &PCMPGTW[..], None), - (I32, SignedGreaterThan, &PCMPGTD[..], None), - (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)), - ] { - let instruction = icmp - .bind(Immediate::IntCC(*cc)) - .bind(vector(*ty, sse_vector_size)); - let template = rec_icscc_fpr.opcodes(opcodes); - e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate); - } - } - - // SIMD min/max - for (ty, inst, opcodes, isa_predicate) in &[ - (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)), - (I16, x86_pmaxs, &PMAXSW[..], None), - (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)), - (I8, x86_pmaxu, &PMAXUB[..], None), - (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)), - (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)), - (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)), - (I16, x86_pmins, &PMINSW[..], None), - (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)), - (I8, x86_pminu, &PMINUB[..], None), - (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)), - (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)), - ] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate); - } - - // SIMD float comparisons - e.enc_both_inferred( - fcmp.bind(vector(F32, sse_vector_size)), - rec_pfcmp.opcodes(&CMPPS), - ); - e.enc_both_inferred( - fcmp.bind(vector(F64, sse_vector_size)), - rec_pfcmp.opcodes(&CMPPD), - ); - - // SIMD float arithmetic - for (ty, inst, opcodes) in &[ - (F32, fadd, &ADDPS[..]), - (F64, fadd, &ADDPD[..]), - (F32, fsub, &SUBPS[..]), - (F64, fsub, &SUBPD[..]), - (F32, fmul, &MULPS[..]), - (F64, fmul, &MULPD[..]), - (F32, fdiv, &DIVPS[..]), - (F64, fdiv, &DIVPD[..]), - (F32, x86_fmin, &MINPS[..]), - (F64, x86_fmin, &MINPD[..]), - (F32, x86_fmax, &MAXPS[..]), - (F64, x86_fmax, &MAXPD[..]), - ] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(inst, rec_fa.opcodes(opcodes)); - } - for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(inst, rec_furm.opcodes(opcodes)); - } -} - -#[inline(never)] -fn define_entity_ref( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let const_addr = shared.by_name("const_addr"); - let func_addr = shared.by_name("func_addr"); - let stack_addr = shared.by_name("stack_addr"); - let symbol_value = shared.by_name("symbol_value"); - - // Shorthands for recipes. - let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); - let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); - let rec_fnaddr4 = r.template("fnaddr4"); - let rec_fnaddr8 = r.template("fnaddr8"); - let rec_const_addr = r.template("const_addr"); - let rec_got_fnaddr8 = r.template("got_fnaddr8"); - let rec_got_gvaddr8 = r.template("got_gvaddr8"); - let rec_gvaddr4 = r.template("gvaddr4"); - let rec_gvaddr8 = r.template("gvaddr8"); - let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); - let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); - let rec_spaddr_id = r.template("spaddr_id"); - - // Predicates shorthands. - let all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - let is_pic = settings.predicate_by_name("is_pic"); - let not_all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let not_is_pic = settings.predicate_by_name("not_is_pic"); - - // Function addresses. - - // Non-PIC, all-ones funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_fnaddr4.opcodes(&MOV_IMM), - not_all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), - not_all_ones_funcaddrs_and_not_is_pic, - ); - - // Non-PIC, all-zeros funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_allones_fnaddr4.opcodes(&MOV_IMM), - all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), - all_ones_funcaddrs_and_not_is_pic, - ); - - // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. - let is_colocated_func = - InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); - e.enc64_instp( - func_addr.bind(I64), - rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), - is_colocated_func, - ); - - // 64-bit, non-colocated, PIC. - e.enc64_isap( - func_addr.bind(I64), - rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Global addresses. - - // Non-PIC. - e.enc32_isap( - symbol_value.bind(I32), - rec_gvaddr4.opcodes(&MOV_IMM), - not_is_pic, - ); - e.enc64_isap( - symbol_value.bind(I64), - rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), - not_is_pic, - ); - - // PIC, colocated. - e.enc64_func( - symbol_value.bind(I64), - rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), - |encoding| { - encoding - .isa_predicate(is_pic) - .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) - }, - ); - - // PIC, non-colocated. - e.enc64_isap( - symbol_value.bind(I64), - rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Stack addresses. - // - // TODO: Add encoding rules for stack_load and stack_store, so that they - // don't get legalized to stack_addr + load/store. - e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w()); - e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA)); - - // Constant addresses (PIC). - e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w()); - e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA)); -} - -/// Control flow opcodes. -#[inline(never)] -fn define_control_flow( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let brff = shared.by_name("brff"); - let brif = shared.by_name("brif"); - let brnz = shared.by_name("brnz"); - let brz = shared.by_name("brz"); - let call = shared.by_name("call"); - let call_indirect = shared.by_name("call_indirect"); - let debugtrap = shared.by_name("debugtrap"); - let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); - let jump = shared.by_name("jump"); - let jump_table_base = shared.by_name("jump_table_base"); - let jump_table_entry = shared.by_name("jump_table_entry"); - let return_ = shared.by_name("return"); - let trap = shared.by_name("trap"); - let trapff = shared.by_name("trapff"); - let trapif = shared.by_name("trapif"); - let resumable_trap = shared.by_name("resumable_trap"); - - // Shorthands for recipes. - let rec_brfb = r.template("brfb"); - let rec_brfd = r.template("brfd"); - let rec_brib = r.template("brib"); - let rec_brid = r.template("brid"); - let rec_call_id = r.template("call_id"); - let rec_call_plt_id = r.template("call_plt_id"); - let rec_call_r = r.template("call_r"); - let rec_debugtrap = r.recipe("debugtrap"); - let rec_indirect_jmp = r.template("indirect_jmp"); - let rec_jmpb = r.template("jmpb"); - let rec_jmpd = r.template("jmpd"); - let rec_jt_base = r.template("jt_base"); - let rec_jt_entry = r.template("jt_entry"); - let rec_ret = r.template("ret"); - let rec_t8jccb_abcd = r.template("t8jccb_abcd"); - let rec_t8jccd_abcd = r.template("t8jccd_abcd"); - let rec_t8jccd_long = r.template("t8jccd_long"); - let rec_tjccb = r.template("tjccb"); - let rec_tjccd = r.template("tjccd"); - let rec_trap = r.template("trap"); - let rec_trapif = r.recipe("trapif"); - let rec_trapff = r.recipe("trapff"); - - // Predicates shorthands. - let is_pic = settings.predicate_by_name("is_pic"); - - // Call/return - - // 32-bit, both PIC and non-PIC. - e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); - - // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. - let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); - e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); - - // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC - // is currently using the large model, which requires calls be lowered to - // func_addr+call_indirect. - e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); - - e.enc32( - call_indirect.bind(I32), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - - e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); - e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); - - // Branches. - e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - - e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Not all float condition codes are legal, see `supported_floatccs`. - e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Note that the tjccd opcode will be prefixed with 0x0f. - e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); - - // Branch on a b1 value in a register only looks at the low 8 bits. See also - // bint encodings below. - // - // Start with the worst-case encoding for X86_32 only. The register allocator - // can't handle a branch with an ABCD-constrained operand. - e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); - e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); - - e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); - e.enc_both( - brnz.bind(B1), - rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), - ); - e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); - - // Jump tables. - e.enc64( - jump_table_entry.bind(I64), - rec_jt_entry.opcodes(&MOVSXD).rex().w(), - ); - e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); - - e.enc64( - jump_table_base.bind(I64), - rec_jt_base.opcodes(&LEA).rex().w(), - ); - e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); - - e.enc_x86_64( - indirect_jump_table_br.bind(I64), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - e.enc32( - indirect_jump_table_br.bind(I32), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - - // Trap as ud2 - e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - - // Debug trap as int3 - e.enc32_rec(debugtrap, rec_debugtrap, 0); - e.enc64_rec(debugtrap, rec_debugtrap, 0); - - e.enc32_rec(trapif, rec_trapif, 0); - e.enc64_rec(trapif, rec_trapif, 0); - e.enc32_rec(trapff, rec_trapff, 0); - e.enc64_rec(trapff, rec_trapff, 0); -} - -/// Reference type instructions. -#[inline(never)] -fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - - let is_null = shared.by_name("is_null"); - let is_invalid = shared.by_name("is_invalid"); - let null = shared.by_name("null"); - let safepoint = shared.by_name("safepoint"); - - let rec_is_zero = r.template("is_zero"); - let rec_is_invalid = r.template("is_invalid"); - let rec_pu_id_ref = r.template("pu_id_ref"); - let rec_safepoint = r.recipe("safepoint"); - - // Null references implemented as iconst 0. - e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); - - e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); - e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); - - // is_null, implemented by testing whether the value is 0. - e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); - - // is_invalid, implemented by testing whether the value is -1. - e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7)); - - // safepoint instruction calls sink, no actual encoding. - e.enc32_rec(safepoint, rec_safepoint, 0); - e.enc64_rec(safepoint, rec_safepoint, 0); -} - -#[allow(clippy::cognitive_complexity)] -pub(crate) fn define( - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) -> PerCpuModeEncodings { - // Definitions. - let mut e = PerCpuModeEncodings::new(); - - define_moves(&mut e, shared_defs, r); - define_memory(&mut e, shared_defs, x86, r); - define_fpu_moves(&mut e, shared_defs, r); - define_fpu_memory(&mut e, shared_defs, r); - define_fpu_ops(&mut e, shared_defs, settings, x86, r); - define_alu(&mut e, shared_defs, settings, x86, r); - define_simd(&mut e, shared_defs, settings, x86, r); - define_entity_ref(&mut e, shared_defs, settings, r); - define_control_flow(&mut e, shared_defs, settings, r); - define_reftypes(&mut e, shared_defs, r); - - let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr"); - let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr"); - - let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr"); - let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr"); - - e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0); - e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0); - - e -} diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs deleted file mode 100644 index 7acd2e2c50..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ /dev/null @@ -1,723 +0,0 @@ -#![allow(non_snake_case)] - -use crate::cdsl::instructions::{ - AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, -}; -use crate::cdsl::operands::Operand; -use crate::cdsl::types::ValueType; -use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; -use crate::shared::entities::EntityRefs; -use crate::shared::formats::Formats; -use crate::shared::immediates::Immediates; -use crate::shared::types; - -#[allow(clippy::many_single_char_names)] -pub(crate) fn define( - mut all_instructions: &mut AllInstructions, - formats: &Formats, - immediates: &Immediates, - entities: &EntityRefs, -) -> InstructionGroup { - let mut ig = InstructionGroupBuilder::new(&mut all_instructions); - - let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); - - let iWord = &TypeVar::new( - "iWord", - "A scalar integer machine word", - TypeSetBuilder::new().ints(32..64).build(), - ); - let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator"); - let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator"); - let d = &Operand::new("d", iWord).with_doc("Denominator"); - let q = &Operand::new("q", iWord).with_doc("Quotient"); - let r = &Operand::new("r", iWord).with_doc("Remainder"); - - ig.push( - Inst::new( - "x86_udivmodx", - r#" - Extended unsigned division. - - Concatenate the bits in `nhi` and `nlo` to form the numerator. - Interpret the bits as an unsigned number and divide by the unsigned - denominator `d`. Trap when `d` is zero or if the quotient is larger - than the range of the output. - - Return both quotient and remainder. - "#, - &formats.ternary, - ) - .operands_in(vec![nlo, nhi, d]) - .operands_out(vec![q, r]) - .can_trap(true), - ); - - ig.push( - Inst::new( - "x86_sdivmodx", - r#" - Extended signed division. - - Concatenate the bits in `nhi` and `nlo` to form the numerator. - Interpret the bits as a signed number and divide by the signed - denominator `d`. Trap when `d` is zero or if the quotient is outside - the range of the output. - - Return both quotient and remainder. - "#, - &formats.ternary, - ) - .operands_in(vec![nlo, nhi, d]) - .operands_out(vec![q, r]) - .can_trap(true), - ); - - let argL = &Operand::new("argL", iWord); - let argR = &Operand::new("argR", iWord); - let resLo = &Operand::new("resLo", iWord); - let resHi = &Operand::new("resHi", iWord); - - ig.push( - Inst::new( - "x86_umulx", - r#" - Unsigned integer multiplication, producing a double-length result. - - Polymorphic over all scalar integer types, but does not support vector - types. - "#, - &formats.binary, - ) - .operands_in(vec![argL, argR]) - .operands_out(vec![resLo, resHi]), - ); - - ig.push( - Inst::new( - "x86_smulx", - r#" - Signed integer multiplication, producing a double-length result. - - Polymorphic over all scalar integer types, but does not support vector - types. - "#, - &formats.binary, - ) - .operands_in(vec![argL, argR]) - .operands_out(vec![resLo, resHi]), - ); - - let Float = &TypeVar::new( - "Float", - "A scalar or vector floating point number", - TypeSetBuilder::new() - .floats(Interval::All) - .simd_lanes(Interval::All) - .build(), - ); - let IntTo = &TypeVar::new( - "IntTo", - "An integer type with the same number of lanes", - TypeSetBuilder::new() - .ints(32..64) - .simd_lanes(Interval::All) - .build(), - ); - let x = &Operand::new("x", Float); - let a = &Operand::new("a", IntTo); - - ig.push( - Inst::new( - "x86_cvtt2si", - r#" - Convert with truncation floating point to signed integer. - - The source floating point operand is converted to a signed integer by - rounding towards zero. If the result can't be represented in the output - type, returns the smallest signed value the output type can represent. - - This instruction does not trap. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - let f32x4 = &TypeVar::new( - "f32x4", - "A floating point number", - TypeSetBuilder::new() - .floats(32..32) - .simd_lanes(4..4) - .build(), - ); - let i32x4 = &TypeVar::new( - "i32x4", - "An integer type with the same number of lanes", - TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(), - ); - let x = &Operand::new("x", i32x4); - let a = &Operand::new("a", f32x4); - - ig.push( - Inst::new( - "x86_vcvtudq2ps", - r#" - Convert unsigned integer to floating point. - - Convert packed doubleword unsigned integers to packed single-precision floating-point - values. This instruction does not trap. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", Float); - let a = &Operand::new("a", Float); - let y = &Operand::new("y", Float); - - ig.push( - Inst::new( - "x86_fmin", - r#" - Floating point minimum with x86 semantics. - - This is equivalent to the C ternary operator `x < y ? x : y` which - differs from `fmin` when either operand is NaN or when comparing - +0.0 to -0.0. - - When the two operands don't compare as LT, `y` is returned unchanged, - even if it is a signalling NaN. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_fmax", - r#" - Floating point maximum with x86 semantics. - - This is equivalent to the C ternary operator `x > y ? x : y` which - differs from `fmax` when either operand is NaN or when comparing - +0.0 to -0.0. - - When the two operands don't compare as GT, `y` is returned unchanged, - even if it is a signalling NaN. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", iWord); - - ig.push( - Inst::new( - "x86_push", - r#" - Pushes a value onto the stack. - - Decrements the stack pointer and stores the specified value on to the top. - - This is polymorphic in i32 and i64. However, it is only implemented for i64 - in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .other_side_effects(true) - .can_store(true), - ); - - ig.push( - Inst::new( - "x86_pop", - r#" - Pops a value from the stack. - - Loads a value from the top of the stack and then increments the stack - pointer. - - This is polymorphic in i32 and i64. However, it is only implemented for i64 - in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.nullary, - ) - .operands_out(vec![x]) - .other_side_effects(true) - .can_load(true), - ); - - let y = &Operand::new("y", iWord); - let rflags = &Operand::new("rflags", iflags); - - ig.push( - Inst::new( - "x86_bsr", - r#" - Bit Scan Reverse -- returns the bit-index of the most significant 1 - in the word. Result is undefined if the argument is zero. However, it - sets the Z flag depending on the argument, so it is at least easy to - detect and handle that case. - - This is polymorphic in i32 and i64. It is implemented for both i64 and - i32 in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![y, rflags]), - ); - - ig.push( - Inst::new( - "x86_bsf", - r#" - Bit Scan Forwards -- returns the bit-index of the least significant 1 - in the word. Is otherwise identical to 'bsr', just above. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![y, rflags]), - ); - - let uimm8 = &immediates.uimm8; - let TxN = &TypeVar::new( - "TxN", - "A SIMD vector type", - TypeSetBuilder::new() - .ints(Interval::All) - .floats(Interval::All) - .bools(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)"); - let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)"); - let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details"); - - ig.push( - Inst::new( - "x86_pshufd", - r#" - Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended - register and re-orders the data according to the passed immediate byte. - "#, - &formats.binary_imm8, - ) - .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pshufb", - r#" - Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle - mask from either memory or another extended register - "#, - &formats.binary, - ) - .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN) - .operands_out(vec![a]), - ); - - let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b"); - ig.push( - Inst::new( - "x86_pblendw", - r#" - Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a - lane in ``b``: if the bit is set, the lane is copied into ``a``. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![a, b, mask]) - .operands_out(vec![a]), - ); - - let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index"); - let x = &Operand::new("x", TxN); - let a = &Operand::new("a", &TxN.lane_of()); - - ig.push( - Inst::new( - "x86_pextr", - r#" - Extract lane ``Idx`` from ``x``. - The lane index, ``Idx``, is an immediate value, not an SSA value. It - must indicate a valid lane index for the type of ``x``. - "#, - &formats.binary_imm8, - ) - .operands_in(vec![x, Idx]) - .operands_out(vec![a]), - ); - - let IBxN = &TypeVar::new( - "IBxN", - "A SIMD vector type containing only booleans and integers", - TypeSetBuilder::new() - .ints(Interval::All) - .bools(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let x = &Operand::new("x", IBxN); - let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value"); - let a = &Operand::new("a", IBxN); - - ig.push( - Inst::new( - "x86_pinsr", - r#" - Insert ``y`` into ``x`` at lane ``Idx``. - The lane index, ``Idx``, is an immediate value, not an SSA value. It - must indicate a valid lane index for the type of ``x``. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, Idx]) - .operands_out(vec![a]), - ); - - let FxN = &TypeVar::new( - "FxN", - "A SIMD vector type containing floats", - TypeSetBuilder::new() - .floats(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let x = &Operand::new("x", FxN); - let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value"); - let a = &Operand::new("a", FxN); - - ig.push( - Inst::new( - "x86_insertps", - r#" - Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is - extracted from and which it is inserted to. This is similar to x86_pinsr but inserts - floats, which are already stored in an XMM register. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, Idx]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", TxN); - let y = &Operand::new("y", TxN); - let a = &Operand::new("a", TxN); - - ig.push( - Inst::new( - "x86_punpckh", - r#" - Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional - i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation - would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane - ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_punpckl", - r#" - Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional - i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation - would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane - ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", FxN); - let y = &Operand::new("y", FxN); - let a = &Operand::new("a", FxN); - - ig.push( - Inst::new( - "x86_movsd", - r#" - Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x`` - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_movlhps", - r#" - Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x`` - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let IxN = &TypeVar::new( - "IxN", - "A SIMD vector type containing integers", - TypeSetBuilder::new() - .ints(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let I128 = &TypeVar::new( - "I128", - "A SIMD vector type containing one large integer (due to Cranelift type constraints, \ - this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \ - upper lane is concatenated with the lower lane to form the integer)", - TypeSetBuilder::new() - .ints(64..64) - .simd_lanes(2..2) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", IxN).with_doc("Vector value to shift"); - let y = &Operand::new("y", I128).with_doc("Number of bits to shift"); - let a = &Operand::new("a", IxN); - - ig.push( - Inst::new( - "x86_psll", - r#" - Shift Packed Data Left Logical -- This implements the behavior of the shared instruction - ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL* - family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_psrl", - r#" - Shift Packed Data Right Logical -- This implements the behavior of the shared instruction - ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL* - family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_psra", - r#" - Shift Packed Data Right Arithmetic -- This implements the behavior of the shared - instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by - the PSRA* family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let I64x2 = &TypeVar::new( - "I64x2", - "A SIMD vector type containing two 64-bit integers", - TypeSetBuilder::new() - .ints(64..64) - .simd_lanes(2..2) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", I64x2); - let y = &Operand::new("y", I64x2); - let a = &Operand::new("a", I64x2); - ig.push( - Inst::new( - "x86_pmullq", - r#" - Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with - lane-wise wrapping if the result overflows. This instruction is necessary to add distinct - encodings for CPUs with newer vector features. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmuludq", - r#" - Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2 - unsigned integers and receive a 64x2 result. This instruction avoids the need for handling - overflow as in `x86_pmullq`. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", TxN); - let y = &Operand::new("y", TxN); - let f = &Operand::new("f", iflags); - ig.push( - Inst::new( - "x86_ptest", - r#" - Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the - bitwise AND of the first source operand (first operand) and the second source operand - (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise - AND of the second source operand (second operand) and the logical NOT of the destination - operand (first operand). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![f]), - ); - - let x = &Operand::new("x", IxN); - let y = &Operand::new("y", IxN); - let a = &Operand::new("a", IxN); - ig.push( - Inst::new( - "x86_pmaxs", - r#" - Maximum of Packed Signed Integers -- Compare signed integers in the first and second - operand and return the maximum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmaxu", - r#" - Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second - operand and return the maximum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmins", - r#" - Minimum of Packed Signed Integers -- Compare signed integers in the first and second - operand and return the minimum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pminu", - r#" - Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second - operand and return the minimum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let c = &Operand::new("c", uimm8) - .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details"); - ig.push( - Inst::new( - "x86_palignr", - r#" - Concatenate destination and source operands, extracting a byte-aligned result shifted to - the right by `c`. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, c]) - .operands_out(vec![a]), - ); - - let i64_t = &TypeVar::new( - "i64_t", - "A scalar 64bit integer", - TypeSetBuilder::new().ints(64..64).build(), - ); - - let GV = &Operand::new("GV", &entities.global_value); - let addr = &Operand::new("addr", i64_t); - - ig.push( - Inst::new( - "x86_elf_tls_get_addr", - r#" - Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should - not be used. - "#, - &formats.unary_global_value, - ) - // This is a bit overly broad to mark as clobbering *all* the registers, because it should - // only preserve caller-saved registers. There's no way to indicate this to register - // allocation yet, though, so mark as clobbering all registers instead. - .clobbers_all_regs(true) - .operands_in(vec![GV]) - .operands_out(vec![addr]), - ); - ig.push( - Inst::new( - "x86_macho_tls_get_addr", - r#" - Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should - not be used. - "#, - &formats.unary_global_value, - ) - // See above comment for x86_elf_tls_get_addr. - .clobbers_all_regs(true) - .operands_in(vec![GV]) - .operands_out(vec![addr]), - ); - - ig.build() -} diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs deleted file mode 100644 index de78c3b3b7..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ /dev/null @@ -1,827 +0,0 @@ -use crate::cdsl::ast::{constant, var, ExprBuilder, Literal}; -use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::cdsl::xform::TransformGroupBuilder; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::Definitions as SharedDefinitions; - -#[allow(clippy::many_single_char_names)] -pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) { - let mut expand = TransformGroupBuilder::new( - "x86_expand", - r#" - Legalize instructions by expansion. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("expand_flags").id); - - let mut narrow = TransformGroupBuilder::new( - "x86_narrow", - r#" - Legalize instructions by narrowing. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("narrow_flags").id); - - let mut narrow_avx = TransformGroupBuilder::new( - "x86_narrow_avx", - r#" - Legalize instructions by narrowing with CPU feature checks. - - This special case converts using x86 AVX instructions where available."#, - ) - .isa("x86"); - // We cannot chain with the x86_narrow group until this group is built, see bottom of this - // function for where this is chained. - - let mut widen = TransformGroupBuilder::new( - "x86_widen", - r#" - Legalize instructions by widening. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("widen").id); - - // List of instructions. - let insts = &shared.instructions; - let band = insts.by_name("band"); - let bor = insts.by_name("bor"); - let clz = insts.by_name("clz"); - let ctz = insts.by_name("ctz"); - let fcmp = insts.by_name("fcmp"); - let fcvt_from_uint = insts.by_name("fcvt_from_uint"); - let fcvt_to_sint = insts.by_name("fcvt_to_sint"); - let fcvt_to_uint = insts.by_name("fcvt_to_uint"); - let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); - let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); - let fmax = insts.by_name("fmax"); - let fmin = insts.by_name("fmin"); - let iadd = insts.by_name("iadd"); - let iconst = insts.by_name("iconst"); - let imul = insts.by_name("imul"); - let ineg = insts.by_name("ineg"); - let isub = insts.by_name("isub"); - let ishl = insts.by_name("ishl"); - let ireduce = insts.by_name("ireduce"); - let popcnt = insts.by_name("popcnt"); - let sdiv = insts.by_name("sdiv"); - let selectif = insts.by_name("selectif"); - let smulhi = insts.by_name("smulhi"); - let srem = insts.by_name("srem"); - let tls_value = insts.by_name("tls_value"); - let udiv = insts.by_name("udiv"); - let umulhi = insts.by_name("umulhi"); - let ushr = insts.by_name("ushr"); - let ushr_imm = insts.by_name("ushr_imm"); - let urem = insts.by_name("urem"); - - let x86_bsf = x86_instructions.by_name("x86_bsf"); - let x86_bsr = x86_instructions.by_name("x86_bsr"); - let x86_umulx = x86_instructions.by_name("x86_umulx"); - let x86_smulx = x86_instructions.by_name("x86_smulx"); - - let imm = &shared.imm; - - // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce - // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is - // not encodable. - let a = var("a"); - let x = var("x"); - let y = var("y"); - let z = var("z"); - - for &ty in &[I8, I16, I32] { - let ishl_by_i64 = ishl.bind(ty).bind(I64); - let ireduce = ireduce.bind(I32); - expand.legalize( - def!(a = ishl_by_i64(x, y)), - vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], - ); - } - - for &ty in &[I8, I16, I32] { - let ushr_by_i64 = ushr.bind(ty).bind(I64); - let ireduce = ireduce.bind(I32); - expand.legalize( - def!(a = ushr_by_i64(x, y)), - vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], - ); - } - - // Division and remainder. - // - // The srem expansion requires custom code because srem INT_MIN, -1 is not - // allowed to trap. The other ops need to check avoid_div_traps. - expand.custom_legalize(sdiv, "expand_sdivrem"); - expand.custom_legalize(srem, "expand_sdivrem"); - expand.custom_legalize(udiv, "expand_udivrem"); - expand.custom_legalize(urem, "expand_udivrem"); - - // Double length (widening) multiplication. - let a = var("a"); - let x = var("x"); - let y = var("y"); - let a1 = var("a1"); - let a2 = var("a2"); - let res_lo = var("res_lo"); - let res_hi = var("res_hi"); - - expand.legalize( - def!(res_hi = umulhi(x, y)), - vec![def!((res_lo, res_hi) = x86_umulx(x, y))], - ); - - expand.legalize( - def!(res_hi = smulhi(x, y)), - vec![def!((res_lo, res_hi) = x86_smulx(x, y))], - ); - - // Floating point condition codes. - // - // The 8 condition codes in `supported_floatccs` are directly supported by a - // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization - // patterns. - - let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq"); - let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord"); - let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq"); - let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne"); - let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno"); - let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one"); - - // Equality needs an explicit `ord` test which checks the parity bit. - expand.legalize( - def!(a = fcmp(floatcc_eq, x, y)), - vec![ - def!(a1 = fcmp(floatcc_ord, x, y)), - def!(a2 = fcmp(floatcc_ueq, x, y)), - def!(a = band(a1, a2)), - ], - ); - expand.legalize( - def!(a = fcmp(floatcc_ne, x, y)), - vec![ - def!(a1 = fcmp(floatcc_uno, x, y)), - def!(a2 = fcmp(floatcc_one, x, y)), - def!(a = bor(a1, a2)), - ], - ); - - let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt"); - let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt"); - let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le"); - let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge"); - let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt"); - let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult"); - let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge"); - let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule"); - - // Inequalities that need to be reversed. - for &(cc, rev_cc) in &[ - (floatcc_lt, floatcc_gt), - (floatcc_le, floatcc_ge), - (floatcc_ugt, floatcc_ult), - (floatcc_uge, floatcc_ule), - ] { - expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]); - } - - // We need to modify the CFG for min/max legalization. - expand.custom_legalize(fmin, "expand_minmax"); - expand.custom_legalize(fmax, "expand_minmax"); - - // Conversions from unsigned need special handling. - expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint"); - // Conversions from float to int can trap and modify the control flow graph. - expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint"); - expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint"); - expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat"); - expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat"); - - // Count leading and trailing zeroes, for baseline x86_64 - let c_minus_one = var("c_minus_one"); - let c_thirty_one = var("c_thirty_one"); - let c_thirty_two = var("c_thirty_two"); - let c_sixty_three = var("c_sixty_three"); - let c_sixty_four = var("c_sixty_four"); - let index1 = var("index1"); - let r2flags = var("r2flags"); - let index2 = var("index2"); - - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - let imm64_minus_one = Literal::constant(&imm.imm64, -1); - let imm64_63 = Literal::constant(&imm.imm64, 63); - expand.legalize( - def!(a = clz.I64(x)), - vec![ - def!(c_minus_one = iconst(imm64_minus_one)), - def!(c_sixty_three = iconst(imm64_63)), - def!((index1, r2flags) = x86_bsr(x)), - def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), - def!(a = isub(c_sixty_three, index2)), - ], - ); - - let imm64_31 = Literal::constant(&imm.imm64, 31); - expand.legalize( - def!(a = clz.I32(x)), - vec![ - def!(c_minus_one = iconst(imm64_minus_one)), - def!(c_thirty_one = iconst(imm64_31)), - def!((index1, r2flags) = x86_bsr(x)), - def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), - def!(a = isub(c_thirty_one, index2)), - ], - ); - - let imm64_64 = Literal::constant(&imm.imm64, 64); - expand.legalize( - def!(a = ctz.I64(x)), - vec![ - def!(c_sixty_four = iconst(imm64_64)), - def!((index1, r2flags) = x86_bsf(x)), - def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)), - ], - ); - - let imm64_32 = Literal::constant(&imm.imm64, 32); - expand.legalize( - def!(a = ctz.I32(x)), - vec![ - def!(c_thirty_two = iconst(imm64_32)), - def!((index1, r2flags) = x86_bsf(x)), - def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)), - ], - ); - - // Population count for baseline x86_64 - let x = var("x"); - let r = var("r"); - - let qv3 = var("qv3"); - let qv4 = var("qv4"); - let qv5 = var("qv5"); - let qv6 = var("qv6"); - let qv7 = var("qv7"); - let qv8 = var("qv8"); - let qv9 = var("qv9"); - let qv10 = var("qv10"); - let qv11 = var("qv11"); - let qv12 = var("qv12"); - let qv13 = var("qv13"); - let qv14 = var("qv14"); - let qv15 = var("qv15"); - let qc77 = var("qc77"); - #[allow(non_snake_case)] - let qc0F = var("qc0F"); - let qc01 = var("qc01"); - - let imm64_1 = Literal::constant(&imm.imm64, 1); - let imm64_4 = Literal::constant(&imm.imm64, 4); - expand.legalize( - def!(r = popcnt.I64(x)), - vec![ - def!(qv3 = ushr_imm(x, imm64_1)), - def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))), - def!(qv4 = band(qv3, qc77)), - def!(qv5 = isub(x, qv4)), - def!(qv6 = ushr_imm(qv4, imm64_1)), - def!(qv7 = band(qv6, qc77)), - def!(qv8 = isub(qv5, qv7)), - def!(qv9 = ushr_imm(qv7, imm64_1)), - def!(qv10 = band(qv9, qc77)), - def!(qv11 = isub(qv8, qv10)), - def!(qv12 = ushr_imm(qv11, imm64_4)), - def!(qv13 = iadd(qv11, qv12)), - def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))), - def!(qv14 = band(qv13, qc0F)), - def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))), - def!(qv15 = imul(qv14, qc01)), - def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))), - ], - ); - - let lv3 = var("lv3"); - let lv4 = var("lv4"); - let lv5 = var("lv5"); - let lv6 = var("lv6"); - let lv7 = var("lv7"); - let lv8 = var("lv8"); - let lv9 = var("lv9"); - let lv10 = var("lv10"); - let lv11 = var("lv11"); - let lv12 = var("lv12"); - let lv13 = var("lv13"); - let lv14 = var("lv14"); - let lv15 = var("lv15"); - let lc77 = var("lc77"); - #[allow(non_snake_case)] - let lc0F = var("lc0F"); - let lc01 = var("lc01"); - - expand.legalize( - def!(r = popcnt.I32(x)), - vec![ - def!(lv3 = ushr_imm(x, imm64_1)), - def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))), - def!(lv4 = band(lv3, lc77)), - def!(lv5 = isub(x, lv4)), - def!(lv6 = ushr_imm(lv4, imm64_1)), - def!(lv7 = band(lv6, lc77)), - def!(lv8 = isub(lv5, lv7)), - def!(lv9 = ushr_imm(lv7, imm64_1)), - def!(lv10 = band(lv9, lc77)), - def!(lv11 = isub(lv8, lv10)), - def!(lv12 = ushr_imm(lv11, imm64_4)), - def!(lv13 = iadd(lv11, lv12)), - def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))), - def!(lv14 = band(lv13, lc0F)), - def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))), - def!(lv15 = imul(lv14, lc01)), - def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))), - ], - ); - - expand.custom_legalize(ineg, "convert_ineg"); - expand.custom_legalize(tls_value, "expand_tls_value"); - widen.custom_legalize(ineg, "convert_ineg"); - - // To reduce compilation times, separate out large blocks of legalizations by theme. - define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx); - - expand.build_and_add_to(&mut shared.transform_groups); - let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups); - narrow_avx - .chain_with(narrow_id) - .build_and_add_to(&mut shared.transform_groups); - widen.build_and_add_to(&mut shared.transform_groups); -} - -fn define_simd( - shared: &mut SharedDefinitions, - x86_instructions: &InstructionGroup, - narrow: &mut TransformGroupBuilder, - narrow_avx: &mut TransformGroupBuilder, -) { - let insts = &shared.instructions; - let band = insts.by_name("band"); - let band_not = insts.by_name("band_not"); - let bitcast = insts.by_name("bitcast"); - let bitselect = insts.by_name("bitselect"); - let bor = insts.by_name("bor"); - let bnot = insts.by_name("bnot"); - let bxor = insts.by_name("bxor"); - let extractlane = insts.by_name("extractlane"); - let fabs = insts.by_name("fabs"); - let fcmp = insts.by_name("fcmp"); - let fcvt_from_uint = insts.by_name("fcvt_from_uint"); - let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); - let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); - let fmax = insts.by_name("fmax"); - let fmin = insts.by_name("fmin"); - let fneg = insts.by_name("fneg"); - let iadd_imm = insts.by_name("iadd_imm"); - let icmp = insts.by_name("icmp"); - let imax = insts.by_name("imax"); - let imin = insts.by_name("imin"); - let imul = insts.by_name("imul"); - let ineg = insts.by_name("ineg"); - let insertlane = insts.by_name("insertlane"); - let ishl = insts.by_name("ishl"); - let ishl_imm = insts.by_name("ishl_imm"); - let raw_bitcast = insts.by_name("raw_bitcast"); - let scalar_to_vector = insts.by_name("scalar_to_vector"); - let splat = insts.by_name("splat"); - let shuffle = insts.by_name("shuffle"); - let sshr = insts.by_name("sshr"); - let swizzle = insts.by_name("swizzle"); - let trueif = insts.by_name("trueif"); - let uadd_sat = insts.by_name("uadd_sat"); - let umax = insts.by_name("umax"); - let umin = insts.by_name("umin"); - let snarrow = insts.by_name("snarrow"); - let swiden_high = insts.by_name("swiden_high"); - let swiden_low = insts.by_name("swiden_low"); - let ushr_imm = insts.by_name("ushr_imm"); - let ushr = insts.by_name("ushr"); - let uwiden_high = insts.by_name("uwiden_high"); - let uwiden_low = insts.by_name("uwiden_low"); - let vconst = insts.by_name("vconst"); - let vall_true = insts.by_name("vall_true"); - let vany_true = insts.by_name("vany_true"); - let vselect = insts.by_name("vselect"); - - let x86_palignr = x86_instructions.by_name("x86_palignr"); - let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); - let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); - let x86_pmins = x86_instructions.by_name("x86_pmins"); - let x86_pminu = x86_instructions.by_name("x86_pminu"); - let x86_pshufb = x86_instructions.by_name("x86_pshufb"); - let x86_pshufd = x86_instructions.by_name("x86_pshufd"); - let x86_psra = x86_instructions.by_name("x86_psra"); - let x86_ptest = x86_instructions.by_name("x86_ptest"); - let x86_punpckh = x86_instructions.by_name("x86_punpckh"); - let x86_punpckl = x86_instructions.by_name("x86_punpckl"); - - let imm = &shared.imm; - - // Set up variables and immediates. - let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); - let uimm8_one = Literal::constant(&imm.uimm8, 0x01); - let uimm8_eight = Literal::constant(&imm.uimm8, 8); - let u128_zeroes = constant(vec![0x00; 16]); - let u128_ones = constant(vec![0xff; 16]); - let u128_seventies = constant(vec![0x70; 16]); - let a = var("a"); - let b = var("b"); - let c = var("c"); - let d = var("d"); - let e = var("e"); - let f = var("f"); - let g = var("g"); - let h = var("h"); - let x = var("x"); - let y = var("y"); - let z = var("z"); - - // Limit the SIMD vector size: eventually multiple vector sizes may be supported - // but for now only SSE-sized vectors are available. - let sse_vector_size: u64 = 128; - let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; - - // SIMD splat: 8-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any8x16(x)), - vec![ - // Move into the lowest 8 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Zero out a different XMM register; the shuffle mask for moving the lowest byte - // to all other byte lanes is 0x0. - def!(b = vconst(u128_zeroes)), - // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b). - def!(y = x86_pshufb(a, b)), - ], - ); - } - - // SIMD splat: 16-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let splat_x16x8 = splat.bind(vector(ty, sse_vector_size)); - let raw_bitcast_any16x8_to_i32x4 = raw_bitcast - .bind(vector(I32, sse_vector_size)) - .bind(vector(ty, sse_vector_size)); - let raw_bitcast_i32x4_to_any16x8 = raw_bitcast - .bind(vector(ty, sse_vector_size)) - .bind(vector(I32, sse_vector_size)); - narrow.legalize( - def!(y = splat_x16x8(x)), - vec![ - // Move into the lowest 16 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Insert the value again but in the next lowest 16 bits. - def!(b = insertlane(a, x, uimm8_one)), - // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD. - def!(c = raw_bitcast_any16x8_to_i32x4(b)), - // Broadcast the bytes in the XMM register with PSHUFD. - def!(d = x86_pshufd(c, uimm8_zero)), - // No instruction emitted; pretend this is an X16x8 again. - def!(y = raw_bitcast_i32x4_to_any16x8(d)), - ], - ); - } - - // SIMD splat: 32-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let splat_any32x4 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any32x4(x)), - vec![ - // Translate to an x86 MOV to get the value in an XMM register. - def!(a = scalar_to_vector(x)), - // Broadcast the bytes in the XMM register with PSHUFD. - def!(y = x86_pshufd(a, uimm8_zero)), - ], - ); - } - - // SIMD splat: 64-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { - let splat_any64x2 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any64x2(x)), - vec![ - // Move into the lowest 64 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Move into the highest 64 bits of the same XMM register. - def!(y = insertlane(a, x, uimm8_one)), - ], - ); - } - - // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring - // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion, - // see https://github.com/WebAssembly/simd/issues/93. - { - let swizzle = swizzle.bind(vector(I8, sse_vector_size)); - narrow.legalize( - def!(a = swizzle(x, y)), - vec![ - def!(b = vconst(u128_seventies)), - def!(c = uadd_sat(y, b)), - def!(a = x86_pshufb(x, c)), - ], - ); - } - - // SIMD bnot - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let bnot = bnot.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = bnot(x)), - vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))], - ); - } - - // SIMD shift right (arithmetic, i16x8 and i32x4) - for ty in &[I16, I32] { - let sshr = sshr.bind(vector(*ty, sse_vector_size)); - let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); - narrow.legalize( - def!(a = sshr(x, y)), - vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))], - ); - } - // SIMD shift right (arithmetic, i8x16) - { - let sshr = sshr.bind(vector(I8, sse_vector_size)); - let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); - let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size)); - let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size)); - narrow.legalize( - def!(z = sshr(x, y)), - vec![ - // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits. - def!(a = iadd_imm(y, uimm8_eight)), - def!(b = bitcast_i64x2(a)), - // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right. - def!(c = x86_punpckl(x, x)), - def!(d = raw_bitcast_i16x8(c)), - def!(e = x86_psra(d, b)), - // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right. - def!(f = x86_punpckh(x, x)), - def!(g = raw_bitcast_i16x8_again(f)), - def!(h = x86_psra(g, b)), - // Re-pack the vector. - def!(z = snarrow(e, h)), - ], - ); - } - // SIMD shift right (arithmetic, i64x2) - { - let sshr_vector = sshr.bind(vector(I64, sse_vector_size)); - let sshr_scalar_lane0 = sshr.bind(I64); - let sshr_scalar_lane1 = sshr.bind(I64); - narrow.legalize( - def!(z = sshr_vector(x, y)), - vec![ - // Use scalar operations to shift the first lane. - def!(a = extractlane(x, uimm8_zero)), - def!(b = sshr_scalar_lane0(a, y)), - def!(c = insertlane(x, b, uimm8_zero)), - // Do the same for the second lane. - def!(d = extractlane(x, uimm8_one)), - def!(e = sshr_scalar_lane1(d, y)), - def!(z = insertlane(c, e, uimm8_one)), - ], - ); - } - - // SIMD select - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c - narrow.legalize( - def!(d = bitselect(c, x, y)), - vec![ - def!(a = band(x, c)), - def!(b = band_not(y, c)), - def!(d = bor(a, b)), - ], - ); - } - - // SIMD vselect; replace with bitselect if BLEND* instructions are not available. - // This works, because each lane of boolean vector is filled with zeroes or ones. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vselect = vselect.bind(vector(ty, sse_vector_size)); - let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(d = vselect(c, x, y)), - vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))], - ); - } - - // SIMD vany_true - let ne = Literal::enumerator_for(&imm.intcc, "ne"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vany_true = vany_true.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = vany_true(x)), - vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))], - ); - } - - // SIMD vall_true - let eq = Literal::enumerator_for(&imm.intcc, "eq"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vall_true = vall_true.bind(vector(ty, sse_vector_size)); - if ty.is_int() { - // In the common case (Wasm's integer-only all_true), we do not require a - // bitcast. - narrow.legalize( - def!(y = vall_true(x)), - vec![ - def!(a = vconst(u128_zeroes)), - def!(c = icmp(eq, x, a)), - def!(d = x86_ptest(c, c)), - def!(y = trueif(eq, d)), - ], - ); - } else { - // However, to support other types we must bitcast them to an integer vector to - // use icmp. - let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16); - let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size)); - narrow.legalize( - def!(y = vall_true(x)), - vec![ - def!(a = vconst(u128_zeroes)), - def!(b = raw_bitcast_to_int(x)), - def!(c = icmp(eq, b, a)), - def!(d = x86_ptest(c, c)), - def!(y = trueif(eq, d)), - ], - ); - } - } - - // SIMD icmp ne - let ne = Literal::enumerator_for(&imm.intcc, "ne"); - for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) { - let icmp_ = icmp.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(ne, a, b)), - vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))], - ); - } - - // SIMD icmp greater-/less-than - let sgt = Literal::enumerator_for(&imm.intcc, "sgt"); - let ugt = Literal::enumerator_for(&imm.intcc, "ugt"); - let sge = Literal::enumerator_for(&imm.intcc, "sge"); - let uge = Literal::enumerator_for(&imm.intcc, "uge"); - let slt = Literal::enumerator_for(&imm.intcc, "slt"); - let ult = Literal::enumerator_for(&imm.intcc, "ult"); - let sle = Literal::enumerator_for(&imm.intcc, "sle"); - let ule = Literal::enumerator_for(&imm.intcc, "ule"); - for ty in &[I8, I16, I32] { - // greater-than - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(ugt, a, b)), - vec![ - def!(x = x86_pmaxu(a, b)), - def!(y = icmp(eq, x, b)), - def!(c = bnot(y)), - ], - ); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(sge, a, b)), - vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))], - ); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(uge, a, b)), - vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))], - ); - - // less-than - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]); - } - - // SIMD integer min/max - for ty in &[I8, I16, I32] { - let imin = imin.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]); - let umin = umin.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]); - let imax = imax.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]); - let umax = umax.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]); - } - - // SIMD fcmp greater-/less-than - let gt = Literal::enumerator_for(&imm.floatcc, "gt"); - let lt = Literal::enumerator_for(&imm.floatcc, "lt"); - let ge = Literal::enumerator_for(&imm.floatcc, "ge"); - let le = Literal::enumerator_for(&imm.floatcc, "le"); - let ugt = Literal::enumerator_for(&imm.floatcc, "ugt"); - let ult = Literal::enumerator_for(&imm.floatcc, "ult"); - let uge = Literal::enumerator_for(&imm.floatcc, "uge"); - let ule = Literal::enumerator_for(&imm.floatcc, "ule"); - for ty in &[F32, F64] { - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]); - } - - for ty in &[F32, F64] { - let fneg = fneg.bind(vector(*ty, sse_vector_size)); - let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); - let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1); - let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); - let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = fneg(a)), - vec![ - def!(c = vconst(u128_ones)), - def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB. - def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. - def!(b = bxor(a, e)), // Flip the MSB. - ], - ); - } - - // SIMD fabs - for ty in &[F32, F64] { - let fabs = fabs.bind(vector(*ty, sse_vector_size)); - let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); - let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); - let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = fabs(a)), - vec![ - def!(c = vconst(u128_ones)), - def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. - def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. - def!(b = band(a, e)), // Unset the MSB. - ], - ); - } - - // SIMD widen - for ty in &[I8, I16] { - let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = swiden_high(a)), - vec![ - def!(c = x86_palignr(a, a, uimm8_eight)), - def!(b = swiden_low(c)), - ], - ); - let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = uwiden_high(a)), - vec![ - def!(c = x86_palignr(a, a, uimm8_eight)), - def!(b = uwiden_low(c)), - ], - ); - } - - narrow.custom_legalize(shuffle, "convert_shuffle"); - narrow.custom_legalize(extractlane, "convert_extractlane"); - narrow.custom_legalize(insertlane, "convert_insertlane"); - narrow.custom_legalize(ineg, "convert_ineg"); - narrow.custom_legalize(ushr, "convert_ushr"); - narrow.custom_legalize(ishl, "convert_ishl"); - narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector"); - narrow.custom_legalize(fmin, "expand_minmax_vector"); - narrow.custom_legalize(fmax, "expand_minmax_vector"); - - narrow_avx.custom_legalize(imul, "convert_i64x2_imul"); - narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector"); - narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector"); -} diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs deleted file mode 100644 index 26c833a77f..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ /dev/null @@ -1,87 +0,0 @@ -use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::isa::TargetIsa; -use crate::cdsl::types::{ReferenceType, VectorType}; - -use crate::shared::types::Bool::B1; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; -use crate::shared::Definitions as SharedDefinitions; - -mod encodings; -mod instructions; -mod legalize; -mod opcodes; -mod recipes; -mod registers; -pub(crate) mod settings; - -pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { - let settings = settings::define(&shared_defs.settings); - let regs = registers::define(); - - let inst_group = instructions::define( - &mut shared_defs.all_instructions, - &shared_defs.formats, - &shared_defs.imm, - &shared_defs.entities, - ); - legalize::define(shared_defs, &inst_group); - - // CPU modes for 32-bit and 64-bit operations. - let mut x86_64 = CpuMode::new("I64"); - let mut x86_32 = CpuMode::new("I32"); - - let expand_flags = shared_defs.transform_groups.by_name("expand_flags"); - let x86_widen = shared_defs.transform_groups.by_name("x86_widen"); - let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow"); - let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx"); - let x86_expand = shared_defs.transform_groups.by_name("x86_expand"); - - x86_32.legalize_monomorphic(expand_flags); - x86_32.legalize_default(x86_narrow); - x86_32.legalize_type(B1, expand_flags); - x86_32.legalize_type(I8, x86_widen); - x86_32.legalize_type(I16, x86_widen); - x86_32.legalize_type(I32, x86_expand); - x86_32.legalize_value_type(ReferenceType(R32), x86_expand); - x86_32.legalize_type(F32, x86_expand); - x86_32.legalize_type(F64, x86_expand); - x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); - x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); - x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); - - x86_64.legalize_monomorphic(expand_flags); - x86_64.legalize_default(x86_narrow); - x86_64.legalize_type(B1, expand_flags); - x86_64.legalize_type(I8, x86_widen); - x86_64.legalize_type(I16, x86_widen); - x86_64.legalize_type(I32, x86_expand); - x86_64.legalize_type(I64, x86_expand); - x86_64.legalize_value_type(ReferenceType(R64), x86_expand); - x86_64.legalize_type(F32, x86_expand); - x86_64.legalize_type(F64, x86_expand); - x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); - x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); - x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); - - let recipes = recipes::define(shared_defs, &settings, ®s); - - let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes); - x86_32.set_encodings(encodings.enc32); - x86_64.set_encodings(encodings.enc64); - let encodings_predicates = encodings.inst_pred_reg.extract(); - - let recipes = encodings.recipes; - - let cpu_modes = vec![x86_64, x86_32]; - - TargetIsa::new( - "x86", - settings, - regs, - recipes, - cpu_modes, - encodings_predicates, - ) -} diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs deleted file mode 100644 index 2e72a1744d..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ /dev/null @@ -1,721 +0,0 @@ -//! Static, named definitions of instruction opcodes. - -/// Empty opcode for use as a default. -pub static EMPTY: [u8; 0] = []; - -/// Add with carry flag r{16,32,64} to r/m of the same size. -pub static ADC: [u8; 1] = [0x11]; - -/// Add r{16,32,64} to r/m of the same size. -pub static ADD: [u8; 1] = [0x01]; - -/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended. -pub static ADD_IMM: [u8; 1] = [0x81]; - -/// Add sign-extended imm8 to r/m{16,32,64}. -pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in -/// xmm1 (SSE2). -pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58]; - -/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in -/// xmm1 (SSE). -pub static ADDPS: [u8; 2] = [0x0f, 0x58]; - -/// Add the low double-precision floating-point value from xmm2/mem to xmm1 -/// and store the result in xmm1. -pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58]; - -/// Add the low single-precision floating-point value from xmm2/mem to xmm1 -/// and store the result in xmm1. -pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58]; - -/// r/m{16,32,64} AND register of the same size (Intel docs have a typo). -pub static AND: [u8; 1] = [0x21]; - -/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended. -pub static AND_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} AND sign-extended imm8. -pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Return the bitwise logical AND NOT of packed single-precision floating-point -/// values in xmm1 and xmm2/mem. -pub static ANDNPS: [u8; 2] = [0x0f, 0x55]; - -/// Return the bitwise logical AND of packed single-precision floating-point values -/// in xmm1 and xmm2/mem. -pub static ANDPS: [u8; 2] = [0x0f, 0x54]; - -/// Bit scan forward (stores index of first encountered 1 from the front). -pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc]; - -/// Bit scan reverse (stores index of first encountered 1 from the back). -pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd]; - -/// Select packed single-precision floating-point values from xmm1 and xmm2/m128 -/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). -pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14]; - -/// Select packed double-precision floating-point values from xmm1 and xmm2/m128 -/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). -pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15]; - -/// Call near, relative, displacement relative to next instruction (sign-extended). -pub static CALL_RELATIVE: [u8; 1] = [0xe8]; - -/// Move r/m{16,32,64} if overflow (OF=1). -pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40]; - -/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64). -pub static CMP_IMM: [u8; 1] = [0x81]; - -/// Compare imm8 with r/m{16,32,64}. -pub static CMP_IMM8: [u8; 1] = [0x83]; - -/// Compare r{16,32,64} with r/m of the same size. -pub static CMP_REG: [u8; 1] = [0x39]; - -/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of -/// imm8 as comparison predicate (SSE2). -pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2]; - -/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of -/// imm8 as comparison predicate (SSE). -pub static CMPPS: [u8; 2] = [0x0f, 0xc2]; - -/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision -/// floating-point values in xmm1 (SSE2). -pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b]; - -/// Convert scalar double-precision floating-point value to scalar single-precision -/// floating-point value. -pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; - -/// Convert doubleword integer to scalar double-precision floating-point value. -pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a]; - -/// Convert doubleword integer to scalar single-precision floating-point value. -pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a]; - -/// Convert scalar single-precision floating-point value to scalar double-precision -/// float-point value. -pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a]; - -/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed -/// doubleword values in xmm1 using truncation (SSE2). -pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b]; - -/// Convert with truncation scalar double-precision floating-point value to signed -/// integer. -pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c]; - -/// Convert with truncation scalar single-precision floating-point value to integer. -pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c]; - -/// Unsigned divide for {16,32,64}-bit. -pub static DIV: [u8; 1] = [0xf7]; - -/// Divide packed double-precision floating-point values in xmm1 by packed double-precision -/// floating-point values in xmm2/mem (SSE2). -pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e]; - -/// Divide packed single-precision floating-point values in xmm1 by packed single-precision -/// floating-point values in xmm2/mem (SSE). -pub static DIVPS: [u8; 2] = [0x0f, 0x5e]; - -/// Divide low double-precision floating-point value in xmm1 by low double-precision -/// floating-point value in xmm2/m64. -pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e]; - -/// Divide low single-precision floating-point value in xmm1 by low single-precision -/// floating-point value in xmm2/m32. -pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e]; - -/// Signed divide for {16,32,64}-bit. -pub static IDIV: [u8; 1] = [0xf7]; - -/// Signed multiply for {16,32,64}-bit, generic registers. -pub static IMUL: [u8; 2] = [0x0f, 0xaf]; - -/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX. -pub static IMUL_RDX_RAX: [u8; 1] = [0xf7]; - -/// Insert scalar single-precision floating-point value. -pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21]; - -/// Either: -/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory. -/// 2. Jump far, absolute indirect, address given in m16:64. -pub static JUMP_ABSOLUTE: [u8; 1] = [0xff]; - -/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits. -pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9]; - -/// Jump near (rel32) if overflow (OF=1). -pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80]; - -/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits. -pub static JUMP_SHORT: [u8; 1] = [0xeb]; - -/// Jump short (rel8) if equal (ZF=1). -pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74]; - -/// Jump short (rel8) if not equal (ZF=0). -pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75]; - -/// Jump short (rel8) if overflow (OF=1). -pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70]; - -/// Store effective address for m in register r{16,32,64}. -pub static LEA: [u8; 1] = [0x8d]; - -/// Count the number of leading zero bits. -pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd]; - -/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE2). -pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f]; - -/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE). -pub static MAXPS: [u8; 2] = [0x0f, 0x5f]; - -/// Return the maximum scalar double-precision floating-point value between -/// xmm2/m64 and xmm1. -pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f]; - -/// Return the maximum scalar single-precision floating-point value between -/// xmm2/m32 and xmm1. -pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f]; - -/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE2). -pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d]; - -/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE). -pub static MINPS: [u8; 2] = [0x0f, 0x5d]; - -/// Return the minimum scalar double-precision floating-point value between -/// xmm2/m64 and xmm1. -pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d]; - -/// Return the minimum scalar single-precision floating-point value between -/// xmm2/m32 and xmm1. -pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d]; - -/// Move r8 to r/m8. -pub static MOV_BYTE_STORE: [u8; 1] = [0x88]; - -/// Move imm{16,32,64} to same-sized register. -pub static MOV_IMM: [u8; 1] = [0xb8]; - -/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target. -pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7]; - -/// Move {r/m16, r/m32, r/m64} to same-sized register. -pub static MOV_LOAD: [u8; 1] = [0x8b]; - -/// Move r16 to r/m16. -pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89]; - -/// Move {r16, r32, r64} to same-sized register or memory. -pub static MOV_STORE: [u8; 1] = [0x89]; - -/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE). -pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28]; - -/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix. -pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e]; - -/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix. -pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e]; - -/// Move packed single-precision floating-point values low to high (SSE). -pub static MOVLHPS: [u8; 2] = [0x0f, 0x16]; - -/// Move scalar double-precision floating-point value (from reg/mem to reg). -pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10]; - -/// Move scalar double-precision floating-point value (from reg to reg/mem). -pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11]; - -/// Move scalar single-precision floating-point value (from reg to reg/mem). -pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11]; - -/// Move scalar single-precision floating-point-value (from reg/mem to reg). -pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10]; - -/// Move byte to register with sign-extension. -pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe]; - -/// Move word to register with sign-extension. -pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf]; - -/// Move doubleword to register with sign-extension. -pub static MOVSXD: [u8; 1] = [0x63]; - -/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE). -pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10]; - -/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE). -pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11]; - -/// Move byte to register with zero-extension. -pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6]; - -/// Move word to register with zero-extension. -pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7]; - -/// Unsigned multiply for {16,32,64}-bit. -pub static MUL: [u8; 1] = [0xf7]; - -/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result -/// in xmm1 (SSE2). -pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59]; - -/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result -/// in xmm1 (SSE). -pub static MULPS: [u8; 2] = [0x0f, 0x59]; - -/// Multiply the low double-precision floating-point value in xmm2/m64 by the -/// low double-precision floating-point value in xmm1. -pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59]; - -/// Multiply the low single-precision floating-point value in xmm2/m32 by the -/// low single-precision floating-point value in xmm1. -pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59]; - -/// Reverse each bit of r/m{16,32,64}. -pub static NOT: [u8; 1] = [0xf7]; - -/// r{16,32,64} OR register of same size. -pub static OR: [u8; 1] = [0x09]; - -/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended. -pub static OR_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} OR sign-extended imm8. -pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). -pub static ORPS: [u8; 2] = [0x0f, 0x56]; - -/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3). -pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c]; - -/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in -/// xmm1 (SSSE3). -pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e]; - -/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in -/// xmm1 (SSSE3). -pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d]; - -/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte -/// integers in xmm1 using signed saturation (SSE2). -pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63]; - -/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed -/// word integers in xmm1 using signed saturation (SSE2). -pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b]; - -/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte -/// integers in xmm1 using unsigned saturation (SSE2). -pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67]; - -/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed -/// word integers in xmm1 using unsigned saturation (SSE4.1). -pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b]; - -/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc]; - -/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe]; - -/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; - -/// Add packed word integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; - -/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec]; - -/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed]; - -/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; - -/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; - -/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is -/// shifted to the right by the constant number of bytes in imm8 (SSSE3). -pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f]; - -/// Bitwise AND of xmm2/m128 and xmm1 (SSE2). -pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; - -/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2). -pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf]; - -/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2). -pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0]; - -/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2). -pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; - -/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte -/// in XMM0 and store the values into xmm1 (SSE4.1). -pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10]; - -/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1 -/// (SSE4.1). -pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76]; - -/// Compare packed data for equal (SSE4.1). -pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; - -/// Compare packed signed byte integers for greater than (SSE2). -pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64]; - -/// Compare packed signed doubleword integers for greater than (SSE2). -pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66]; - -/// Compare packed signed quadword integers for greater than (SSE4.2). -pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37]; - -/// Compare packed signed word integers for greater than (SSE2). -pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65]; - -/// Extract doubleword or quadword, depending on REX.W (SSE4.1). -pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; - -/// Extract byte (SSE4.1). -pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; - -/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16. -pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15]; - -/// Insert doubleword or quadword, depending on REX.W (SSE4.1). -pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; - -/// Insert byte (SSE4.1). -pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; - -/// Insert word (SSE2). -pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; - -/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE4.1). -pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c]; - -/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum -/// values in xmm1 (SSE4.1). -pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d]; - -/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE2). -pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee]; - -/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE2). -pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde]; - -/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum -/// values in xmm1 (SSE4.1). -pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f]; - -/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE4.1). -pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e]; - -/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE4.1). -pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38]; - -/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum -/// values in xmm1 (SSE4.1). -pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39]; - -/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE2). -pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea]; - -/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE2). -pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda]; - -/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum -/// values in xmm1 (SSE4.1). -pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b]; - -/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE4.1). -pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a]; - -/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20]; - -/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23]; - -/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25]; - -/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30]; - -/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33]; - -/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35]; - -/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of -/// the results in xmm1 (SSE2). -pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; - -/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32 -/// bits of each product in xmm1 (SSE4.1). -pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; - -/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64 -/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding. -pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; - -/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers -/// in xmm2/m128, and store the quadword results in xmm1 (SSE2). -pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4]; - -/// Multiply the packed word integers, add adjacent doubleword results. -pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5]; - -/// Pop top of stack into r{16,32,64}; increment stack pointer. -pub static POP_REG: [u8; 1] = [0x58]; - -/// Returns the count of number of bits set to 1. -pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8]; - -/// Bitwise OR of xmm2/m128 and xmm1 (SSE2). -pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb]; - -/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3). -pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; - -/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and -/// store the result in xmm1 (SSE2). -pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; - -/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71]; - -/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72]; - -/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73]; - -/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1]; - -/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2]; - -/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3]; - -/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1]; - -/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2]; - -/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3]; - -/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). -pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1]; - -/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). -pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2]; - -/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2). -pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8]; - -/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2). -pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9]; - -/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2). -pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa]; - -/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2). -pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb]; - -/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8]; - -/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9]; - -/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8]; - -/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9]; - -/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all -/// 0s (SSE4.1). -pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; - -/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68]; - -/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69]; - -/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A]; - -/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D]; - -/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60]; - -/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61]; - -/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62]; - -/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C]; - -/// Push r{16,32,64}. -pub static PUSH_REG: [u8; 1] = [0x50]; - -/// Logical exclusive OR (SSE2). -pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef]; - -/// Near return to calling procedure. -pub static RET_NEAR: [u8; 1] = [0xc3]; - -/// General rotation opcode. Kind of rotation depends on encoding. -pub static ROTATE_CL: [u8; 1] = [0xd3]; - -/// General rotation opcode. Kind of rotation depends on encoding. -pub static ROTATE_IMM8: [u8; 1] = [0xc1]; - -/// Round scalar doubl-precision floating-point values. -pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b]; - -/// Round scalar single-precision floating-point values. -pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a]; - -/// Subtract with borrow r{16,32,64} from r/m of the same size. -pub static SBB: [u8; 1] = [0x19]; - -/// Set byte if overflow (OF=1). -pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90]; - -/// Compute the square root of the packed double-precision floating-point values and store the -/// result in xmm1 (SSE2). -pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51]; - -/// Compute the square root of the packed double-precision floating-point values and store the -/// result in xmm1 (SSE). -pub static SQRTPS: [u8; 2] = [0x0f, 0x51]; - -/// Compute square root of scalar double-precision floating-point value. -pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51]; - -/// Compute square root of scalar single-precision value. -pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51]; - -/// Subtract r{16,32,64} from r/m of same size. -pub static SUB: [u8; 1] = [0x29]; - -/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result -/// in xmm1 (SSE2). -pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c]; - -/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result -/// in xmm1 (SSE). -pub static SUBPS: [u8; 2] = [0x0f, 0x5c]; - -/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 -/// and store the result in xmm1. -pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c]; - -/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 -/// and store the result in xmm1. -pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c]; - -/// AND r8 with r/m8; set SF, ZF, PF according to result. -pub static TEST_BYTE_REG: [u8; 1] = [0x84]; - -/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result. -pub static TEST_REG: [u8; 1] = [0x85]; - -/// Count the number of trailing zero bits. -pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc]; - -/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64 -/// and set the EFLAGS flags accordingly. -pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e]; - -/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32 -/// and set the EFLAGS flags accordingly. -pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; - -/// Raise invalid opcode instruction. -pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; - -/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed -/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior -/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode -/// (AVX512VL, AVX512F). -pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a]; - -/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. -pub static XOR_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} XOR sign-extended imm8. -pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// r/m{16,32,64} XOR register of the same size. -pub static XOR: [u8; 1] = [0x31]; - -/// Bitwise logical XOR of packed double-precision floating-point values. -pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57]; - -/// Bitwise logical XOR of packed single-precision floating-point values. -pub static XORPS: [u8; 2] = [0x0f, 0x57]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs deleted file mode 100644 index f45f8dc673..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ /dev/null @@ -1,3445 +0,0 @@ -//! Encoding recipes for x86/x86_64. -use std::rc::Rc; - -use cranelift_codegen_shared::isa::x86::EncodingBits; - -use crate::cdsl::ast::Literal; -use crate::cdsl::formats::InstructionFormat; -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::recipes::{ - EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack, -}; -use crate::cdsl::regs::IsaRegs; -use crate::cdsl::settings::SettingGroup; -use crate::shared::Definitions as SharedDefinitions; - -use crate::isa::x86::opcodes; - -/// Helper data structure to create recipes and template recipes. -/// It contains all the recipes and recipe templates that might be used in the encodings crate of -/// this same directory. -pub(crate) struct RecipeGroup<'builder> { - /// Memoized registers description, to pass it to builders later. - regs: &'builder IsaRegs, - - /// All the recipes explicitly created in this file. This is different from the final set of - /// recipes, which is definitive only once encodings have generated new recipes on the fly. - recipes: Vec, - - /// All the recipe templates created in this file. - templates: Vec>>, -} - -impl<'builder> RecipeGroup<'builder> { - fn new(regs: &'builder IsaRegs) -> Self { - Self { - regs, - recipes: Vec::new(), - templates: Vec::new(), - } - } - fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) { - self.recipes.push(recipe.build()); - } - fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc> { - let template = Rc::new(Template::new(recipe, self.regs)); - self.templates.push(template.clone()); - template - } - fn add_template_inferred( - &mut self, - recipe: EncodingRecipeBuilder, - infer_function: &'static str, - ) -> Rc> { - let template = - Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function)); - self.templates.push(template.clone()); - template - } - fn add_template(&mut self, template: Template<'builder>) -> Rc> { - let template = Rc::new(template); - self.templates.push(template.clone()); - template - } - pub fn recipe(&self, name: &str) -> &EncodingRecipe { - self.recipes - .iter() - .find(|recipe| recipe.name == name) - .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name)) - } - pub fn template(&self, name: &str) -> &Template { - self.templates - .iter() - .find(|recipe| recipe.name() == name) - .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name)) - } -} - -// Opcode representation. -// -// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are -// variable length, so we use separate recipes for different styles of opcodes and prefixes. The -// opcode format is indicated by the recipe name prefix. -// -// The match case below does not include the REX prefix which goes after the mandatory prefix. -// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are -// represented by separate recipes. -// -// The encoding bits are: -// -// 0-7: The opcode byte . -// 8-9: pp, mandatory prefix: -// 00 none (Op*) -// 01 66 (Mp*) -// 10 F3 (Mp*) -// 11 F2 (Mp*) -// 10-11: mm, opcode map: -// 00 (Op1/Mp1) -// 01 0F (Op2/Mp2) -// 10 0F 38 (Op3/Mp3) -// 11 0F 3A (Op3/Mp3) -// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. -// 15: REX.W bit (or VEX.W/E) -// -// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and -// the pp+mm format is ready for supporting VEX prefixes. -// -// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this -// could be simplified. - -/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits. -fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) { - let enc = EncodingBits::new(op_bytes, rrr, w); - (enc.prefix().recipe_name_prefix(), enc.bits()) -} - -/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the -/// corresponding `put_*` function from the `binemit.rs` module. -fn replace_put_op(code: Option, prefix: &str) -> Option { - code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase()))) -} - -/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class. -fn replace_nonrex_constraints( - regs: &IsaRegs, - constraints: Vec, -) -> Vec { - constraints - .into_iter() - .map(|constraint| match constraint { - OperandConstraint::RegClass(rc_index) => { - let new_rc_index = if rc_index == regs.class_by_name("GPR") { - regs.class_by_name("GPR8") - } else if rc_index == regs.class_by_name("FPR") { - regs.class_by_name("FPR8") - } else { - rc_index - }; - OperandConstraint::RegClass(new_rc_index) - } - _ => constraint, - }) - .collect() -} - -fn replace_evex_constraints( - _: &IsaRegs, - constraints: Vec, -) -> Vec { - constraints - .into_iter() - .map(|constraint| match constraint { - OperandConstraint::RegClass(rc_index) => { - // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in - // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the - // rc_index conversion to FPR32. In the meantime, this is effectively a no-op - // conversion--the register class stays the same. - OperandConstraint::RegClass(rc_index) - } - _ => constraint, - }) - .collect() -} - -/// Specifies how the prefix (e.g. REX) is emitted by a Recipe. -#[derive(Copy, Clone, PartialEq)] -pub enum RecipePrefixKind { - /// The REX emission behavior is not hardcoded for the Recipe - /// and may be overridden when using the Template. - Unspecified, - - /// The Recipe must hardcode the non-emission of the REX prefix. - NeverEmitRex, - - /// The Recipe must hardcode the emission of the REX prefix. - AlwaysEmitRex, - - /// The Recipe should infer the emission of the REX.RXB bits from registers, - /// and the REX.W bit from the EncodingBits. - /// - /// Because such a Recipe has a non-constant instruction size, it must have - /// a special `compute_size` handler for the inferrable-REX case. - InferRex, - - /// The Recipe must hardcode the emission of an EVEX prefix. - Evex, -} - -impl Default for RecipePrefixKind { - fn default() -> Self { - Self::Unspecified - } -} - -/// Previously called a TailRecipe in the Python meta language, this allows to create multiple -/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different -/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating -/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be -/// reconsidered later. -#[derive(Clone)] -pub(crate) struct Template<'builder> { - /// Description of registers, used in the build() method. - regs: &'builder IsaRegs, - - /// The recipe template, which is to be specialized (by copy). - recipe: EncodingRecipeBuilder, - - /// How is the REX prefix emitted? - rex_kind: RecipePrefixKind, - - /// Function for `compute_size()` when REX is inferrable. - inferred_rex_compute_size: Option<&'static str>, - - /// Other recipe to use when REX-prefixed. - when_prefixed: Option>>, - - // Parameters passed in the EncodingBits. - /// Value of the W bit (0 or 1), stored in the EncodingBits. - w_bit: u16, - /// Value of the RRR bits (between 0 and 0b111). - rrr_bits: u16, - /// Opcode bytes. - op_bytes: &'static [u8], -} - -impl<'builder> Template<'builder> { - fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self { - Self { - regs, - recipe, - rex_kind: RecipePrefixKind::default(), - inferred_rex_compute_size: None, - when_prefixed: None, - w_bit: 0, - rrr_bits: 0, - op_bytes: &opcodes::EMPTY, - } - } - - fn name(&self) -> &str { - &self.recipe.name - } - fn rex_kind(self, kind: RecipePrefixKind) -> Self { - Self { - rex_kind: kind, - ..self - } - } - fn inferred_rex_compute_size(self, function: &'static str) -> Self { - Self { - inferred_rex_compute_size: Some(function), - ..self - } - } - fn when_prefixed(self, template: Rc>) -> Self { - assert!(self.when_prefixed.is_none()); - Self { - when_prefixed: Some(template), - ..self - } - } - - // Copy setters. - pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self { - assert!(!op_bytes.is_empty()); - let mut copy = self.clone(); - copy.op_bytes = op_bytes; - copy - } - pub fn w(&self) -> Self { - let mut copy = self.clone(); - copy.w_bit = 1; - copy - } - pub fn rrr(&self, value: u16) -> Self { - assert!(value <= 0b111); - let mut copy = self.clone(); - copy.rrr_bits = value; - copy - } - pub fn nonrex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::AlwaysEmitRex, - "Template requires REX prefix." - ); - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::NeverEmitRex; - copy - } - pub fn rex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::NeverEmitRex, - "Template requires no REX prefix." - ); - if let Some(prefixed) = &self.when_prefixed { - let mut ret = prefixed.rex(); - // Forward specialized parameters. - ret.op_bytes = self.op_bytes; - ret.w_bit = self.w_bit; - ret.rrr_bits = self.rrr_bits; - return ret; - } - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::AlwaysEmitRex; - copy - } - pub fn infer_rex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::NeverEmitRex, - "Template requires no REX prefix." - ); - assert!( - self.when_prefixed.is_none(), - "infer_rex used with when_prefixed()." - ); - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::InferRex; - copy - } - - pub fn build(mut self) -> (EncodingRecipe, u16) { - let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); - - let (recipe_name, size_addendum) = match self.rex_kind { - RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => { - // Ensure the operands are limited to non-REX constraints. - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = - Some(replace_nonrex_constraints(self.regs, operands_out)); - - (opcode.into(), self.op_bytes.len() as u64) - } - RecipePrefixKind::AlwaysEmitRex => { - ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1) - } - RecipePrefixKind::InferRex => { - assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead."); - // Hook up the right function for inferred compute_size(). - assert!( - self.inferred_rex_compute_size.is_some(), - "InferRex recipe '{}' needs an inferred_rex_compute_size function.", - &self.recipe.name - ); - self.recipe.compute_size = self.inferred_rex_compute_size; - - ("DynRex".to_string() + opcode, self.op_bytes.len() as u64) - } - RecipePrefixKind::Evex => { - // Allow the operands to expand limits to EVEX constraints. - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out)); - - ("Evex".to_string() + opcode, 4 + 1) - } - }; - - self.recipe.base_size += size_addendum; - - // Branch ranges are relative to the end of the instruction. - // For InferRex, the range should be the minimum, assuming no REX. - if let Some(range) = self.recipe.branch_range.as_mut() { - range.inst_size += size_addendum; - } - - self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name); - self.recipe.name = recipe_name + &self.recipe.name; - - (self.recipe.build(), bits) - } -} - -/// Returns a predicate checking that the "cond" field of the instruction contains one of the -/// directly supported floating point condition codes. -fn supported_floatccs_predicate( - supported_cc: &[Literal], - format: &InstructionFormat, -) -> InstructionPredicate { - supported_cc - .iter() - .fold(InstructionPredicate::new(), |pred, literal| { - pred.or(InstructionPredicate::new_is_field_equal( - format, - "cond", - literal.to_rust_code(), - )) - }) -} - -/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte. -fn valid_scale(format: &InstructionFormat) -> InstructionPredicate { - ["1", "2", "4", "8"] - .iter() - .fold(InstructionPredicate::new(), |pred, &literal| { - pred.or(InstructionPredicate::new_is_field_equal( - format, - "imm", - literal.into(), - )) - }) -} - -pub(crate) fn define<'shared>( - shared_defs: &'shared SharedDefinitions, - settings: &'shared SettingGroup, - regs: &'shared IsaRegs, -) -> RecipeGroup<'shared> { - // The set of floating point condition codes that are directly supported. - // Other condition codes need to be reversed or expressed as two tests. - let floatcc = &shared_defs.imm.floatcc; - let supported_floatccs: Vec = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"] - .iter() - .map(|name| Literal::enumerator_for(floatcc, name)) - .collect(); - - // Register classes shorthands. - let abcd = regs.class_by_name("ABCD"); - let gpr = regs.class_by_name("GPR"); - let fpr = regs.class_by_name("FPR"); - let flag = regs.class_by_name("FLAG"); - - // Operand constraints shorthands. - let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags")); - let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax")); - let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx")); - let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx")); - let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15")); - let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0")); - - // Stack operand with a 32-bit signed displacement from either RBP or RSP. - let stack_gpr32 = Stack::new(gpr); - let stack_fpr32 = Stack::new(fpr); - - let formats = &shared_defs.formats; - - // Predicates shorthands. - let use_sse41 = settings.predicate_by_name("use_sse41"); - - // Definitions. - let mut recipes = RecipeGroup::new(regs); - - // A null unary instruction that takes a GPR register. Can be used for identity copies and - // no-op conversions. - recipes.add_recipe( - EncodingRecipeBuilder::new("null", &formats.unary, 0) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0) - .operands_in(vec![fpr]) - .operands_out(vec![0]) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![stack_gpr32]) - .emit(""), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0) - .operands_out(vec![reg_r15]) - .emit(""), - ); - // umr with a fixed register output that's r15. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - let r15 = RU::r15.into(); - {{PUT_OP}}(bits, rex2(r15, in_reg0), sink); - modrm_rr(r15, in_reg0, sink); - "#, - ), - ); - - // No-op fills, created by late-stage redundant-fill removal. - recipes.add_recipe( - EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit(""), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"), - ); - - // XX opcode, no ModR/M. - recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit( - r#" - sink.trap(code, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - "#, - )); - - // Macro: conditional jump over a ud2. - recipes.add_recipe( - EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4) - .operands_in(vec![reg_rflags]) - .clobbers_flags(false) - .emit( - r#" - // Jump over a 2-byte ud2. - sink.put1(0x70 | (icc2opc(cond.inverse()) as u8)); - sink.put1(2); - // ud2. - sink.trap(code, func.srclocs[inst]); - sink.put1(0x0f); - sink.put1(0x0b); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4) - .operands_in(vec![reg_rflags]) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.float_cond_trap, - )) - .emit( - r#" - // Jump over a 2-byte ud2. - sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8)); - sink.put1(2); - // ud2. - sink.trap(code, func.srclocs[inst]); - sink.put1(0x0f); - sink.put1(0x0b); - "#, - ), - ); - - // XX /r - recipes.add_template_inferred( - EncodingRecipeBuilder::new("rr", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with operands swapped. (RM form). - recipes.add_template_inferred( - EncodingRecipeBuilder::new("rrx", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form with input operands swapped. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fax", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![1]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - // The operand order does not matter for calculating whether a REX prefix is needed. - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form with a byte immediate. - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.ternary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - // XX /n for a unary operation with extension bits. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("ur", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - // XX /r, but for a unary operator with separate input/output register, like - // copies. MR form, preserving flags. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("umr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), - ); - - // Same as umr, but with FPR -> GPR registers. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rfumr", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), - ); - - // Same as umr, but with the source register specified directly. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, src), sink); - modrm_rr(out_reg0, src, sink); - "#, - ), - ); - - // XX /r, but for a unary operator with separate input/output register. - // RM form. Clobbers FLAGS. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("urm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r. Same as urm, but doesn't clobber FLAGS. - let urm_noflags = recipes.add_template_recipe( - EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r. Same as urm_noflags, but input limited to ABCD. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1) - .operands_in(vec![abcd]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - regs, - ) - .when_prefixed(urm_noflags), - ); - - // XX /r, RM form, FPR -> FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("furm", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - - // Same as furm, but with the source register specified directly. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(src, out_reg0), sink); - modrm_rr(src, out_reg0, sink); - "#, - ), - ); - - // XX /r, RM form, GPR -> FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("frurm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - - // XX /r, RM form, FPR -> GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rfurm", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r, RMI form for one of the roundXX SSE 4.1 instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .isa_predicate(use_sse41) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - sink.put1(match opcode { - Opcode::Nearest => 0b00, - Opcode::Floor => 0b01, - Opcode::Ceil => 0b10, - Opcode::Trunc => 0b11, - x => panic!("{} unexpected for furmi_rnd", opcode), - }); - "#, - ), - ); - - // XX /r, for regmove instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(dst, src), sink); - modrm_rr(dst, src, sink); - "#, - ), - ); - - // XX /r, for regmove instructions (FPR version, RM encoded). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1) - .operands_in(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(src, dst), sink); - modrm_rr(src, dst, sink); - "#, - ), - ); - - // XX /n with one arg in %rcx, for shifts. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rc", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rcx), - ]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - ); - - // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("div", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![reg_rax, reg_rdx]) - .emit( - r#" - sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg2), sink); - modrm_r_bits(in_reg2, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"), - ); - - // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("mulx", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg1), sink); - modrm_r_bits(in_reg1, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"), - ); - - // XX /r for BLEND* instructions - recipes.add_template_inferred( - EncodingRecipeBuilder::new("blend", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_xmm0), - OperandConstraint::RegClass(fpr), - OperandConstraint::RegClass(fpr), - ]) - .operands_out(vec![2]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), - "size_with_inferred_rex_for_inreg1_inreg2", - ); - - // XX /n ib with 8-bit immediate sign-extended. - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0", - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2) - .operands_in(vec![fpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0", - ); - - // XX /n id with 32-bit immediate sign-extended. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.binary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.binary_imm8, "imm", 8, 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), "size_with_inferred_rex_for_inreg0_outreg0" - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2) - .operands_in(vec![fpr, gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.ternary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - { - // XX /n id with 32-bit immediate sign-extended. UnaryImm version. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - } - - // XX+rd id unary with 32-bit immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - - // XX+rd id unary with bool immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: u32 = if imm { 1 } else { 0 }; - sink.put4(imm); - "#, - ), - ); - - // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq unary with 64-bit immediate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: i64 = imm.into(); - sink.put8(imm as u64); - "#, - ), - ); - - // XX+rd id unary with zero immediate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /n Unary with floating point 32-bit immediate equal to zero. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_zero_32bit_float( - &*formats.unary_ieee32, - "imm", - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - } - - // XX /n Unary with floating point 64-bit immediate equal to zero. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_zero_64bit_float( - &*formats.unary_ieee64, - "imm", - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - } - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pushq", &formats.unary, 0) - .operands_in(vec![gpr]) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("popq", &formats.nullary, 0) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - "#, - ), - ); - - // XX /r, for regmove instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(dst, src), sink); - modrm_rr(dst, src, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1) - .operands_in(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink); - modrm_rr(RU::rsp.into(), in_reg0, sink); - "#, - ), - ); - - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); - modrm_r_bits(RU::rsp.into(), bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); - modrm_r_bits(RU::rsp.into(), bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - } - - recipes.add_recipe( - EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0) - .operands_out(vec![Stack::new(gpr)]) - .emit(""), - ); - - // XX+rd id with Abs4 function relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.dfg.ext_funcs[func_ref].name, - 0); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 function relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.dfg.ext_funcs[func_ref].name, - 0); - sink.put8(0); - "#, - ), - ); - - // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.dfg.ext_funcs[func_ref].name, - 0); - // Write the immediate as `!0` for the benefit of BaldrMonkey. - sink.put4(!0); - "#, - ), - ); - - // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.dfg.ext_funcs[func_ref].name, - 0); - // Write the immediate as `!0` for the benefit of BaldrMonkey. - sink.put8(!0); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5) - .operands_out(vec![gpr]) - // rex2 gets passed 0 for r/m register because the upper bit of - // r/m doesn't get decoded when in rip-relative addressing mode. - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86PCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5) - .operands_out(vec![gpr]) - // rex2 gets passed 0 for r/m register because the upper bit of - // r/m doesn't get decoded when in rip-relative addressing mode. - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86GOTPCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - "#, - ), - ); - - // XX+rd id with Abs4 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.global_values[global_value].symbol_name(), - 0); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.global_values[global_value].symbol_name(), - 0); - sink.put8(0); - "#, - ), - ); - - // XX+rd iq with PCRel4 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_rm(5, out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86PCRel4, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_rm(5, out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86GOTPCRel4, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - "#, - ), - ); - - // Stack addresses. - // - // TODO Alternative forms for 8-bit immediates, when applicable. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6) - .operands_out(vec![gpr]) - .emit( - r#" - let sp = StackRef::sp(stack_slot, &func.stack_slots); - let base = stk_base(sp.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - let imm : i32 = offset.into(); - sink.put4(sp.offset.checked_add(imm).unwrap() as u32); - "#, - ), - ); - - // Constant addresses. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - const_disp4(constant_handle, func, sink); - "#, - ), - ); - - // Store recipes. - - { - // Simple stores. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into()); - - // XX /r register-indirect store with no offset. - let st = recipes.add_template_recipe( - EncodingRecipeBuilder::new("st", &formats.store, 1) - .operands_in(vec![gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with no offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("st_abcd", &formats.store, 1) - .operands_in(vec![abcd, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - regs, - ) - .when_prefixed(st), - ); - - // XX /r register-indirect store of FPR with no offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fst", &formats.store, 1) - .operands_in(vec![fpr, gpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0); - - // XX /r register-indirect store with 8-bit offset. - let st_disp8 = recipes.add_template_recipe( - EncodingRecipeBuilder::new("stDisp8", &formats.store, 2) - .operands_in(vec![gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with 8-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2) - .operands_in(vec![abcd, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - regs, - ) - .when_prefixed(st_disp8), - ); - - // XX /r register-indirect store with 8-bit offset of FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2) - .operands_in(vec![fpr, gpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - - // XX /r register-indirect store with 32-bit offset. - let st_disp32 = recipes.add_template_recipe( - EncodingRecipeBuilder::new("stDisp32", &formats.store, 5) - .operands_in(vec![gpr, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with 32-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5) - .operands_in(vec![abcd, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - regs, - ) - .when_prefixed(st_disp32), - ); - - // XX /r register-indirect store with 32-bit offset of FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) - .operands_in(vec![fpr, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - } - - { - // Complex stores. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into()); - - // XX /r register-indirect store with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with index and no offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with index and no offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0); - - // XX /r register-indirect store with index and 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with index and 8-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with index and 8-bit offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0); - - // XX /r register-indirect store with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with index and 32-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with index and 32-bit offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - } - - // Unary spill with SIB and 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6) - .operands_in(vec![gpr]) - .operands_out(vec![stack_gpr32]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let base = stk_base(out_stk0.base); - {{PUT_OP}}(bits, rex2(base, in_reg0), sink); - modrm_sib_disp32(in_reg0, sink); - sib_noindex(base, sink); - sink.put4(out_stk0.offset as u32); - "#, - ), - ); - - // Like spillSib32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6) - .operands_in(vec![fpr]) - .operands_out(vec![stack_fpr32]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let base = stk_base(out_stk0.base); - {{PUT_OP}}(bits, rex2(base, in_reg0), sink); - modrm_sib_disp32(in_reg0, sink); - sib_noindex(base, sink); - sink.put4(out_stk0.offset as u32); - "#, - ), - ); - - // Regspill using RSP-relative addressing. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let dst = StackRef::sp(dst, &func.stack_slots); - let base = stk_base(dst.base); - {{PUT_OP}}(bits, rex2(base, src), sink); - modrm_sib_disp32(src, sink); - sib_noindex(base, sink); - sink.put4(dst.offset as u32); - "#, - ), - ); - - // Like regspill32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6) - .operands_in(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let dst = StackRef::sp(dst, &func.stack_slots); - let base = stk_base(dst.base); - {{PUT_OP}}(bits, rex2(base, src), sink); - modrm_sib_disp32(src, sink); - sib_noindex(base, sink); - sink.put4(dst.offset as u32); - "#, - ), - ); - - // Load recipes. - - { - // Simple loads. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into()); - - // XX /r load with no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ld", &formats.load, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else if needs_offset(in_reg0) { - modrm_disp8(in_reg0, out_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg0, out_reg0, sink); - } - "#, - ), - ); - - // XX /r float load with no offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fld", &formats.load, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else if needs_offset(in_reg0) { - modrm_disp8(in_reg0, out_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg0, out_reg0, sink); - } - "#, - ), - "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0); - - // XX /r load with 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp8(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r float load with 8-bit offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp8(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0); - - // XX /r load with 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp32(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp32(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r float load with 32-bit offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp32(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp32(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - } - - { - // Complex loads. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into()); - - // XX /r load with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - } - "#, - ), - ); - - // XX /r float load with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - } - "#, - ), - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0); - - // XX /r load with index and 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r float load with 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0); - - // XX /r load with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp32(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r float load with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp32(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - } - - // Unary fill with SIB and 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - let base = stk_base(in_stk0.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - sink.put4(in_stk0.offset as u32); - "#, - ), - ); - - // Like fillSib32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6) - .operands_in(vec![stack_fpr32]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - let base = stk_base(in_stk0.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - sink.put4(in_stk0.offset as u32); - "#, - ), - ); - - // Regfill with RSP-relative 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6) - .operands_in(vec![stack_gpr32]) - .clobbers_flags(false) - .emit( - r#" - let src = StackRef::sp(src, &func.stack_slots); - let base = stk_base(src.base); - {{PUT_OP}}(bits, rex2(base, dst), sink); - modrm_sib_disp32(dst, sink); - sib_noindex(base, sink); - sink.put4(src.offset as u32); - "#, - ), - ); - - // Like regfill32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6) - .operands_in(vec![stack_fpr32]) - .clobbers_flags(false) - .emit( - r#" - let src = StackRef::sp(src, &func.stack_slots); - let base = stk_base(src.base); - {{PUT_OP}}(bits, rex2(base, dst), sink); - modrm_sib_disp32(dst, sink); - sib_noindex(base, sink); - sink.put4(src.offset as u32); - "#, - ), - ); - - // Call/return. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPLTRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1) - .operands_in(vec![gpr]) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ret", &formats.multiary, 0) - .emit("{{PUT_OP}}(bits, BASE_REX, sink);"), - ); - - // Branches. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jmpb", &formats.jump, 1) - .branch_range((1, 8)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jmpd", &formats.jump, 4) - .branch_range((4, 32)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brib", &formats.branch_int, 1) - .operands_in(vec![reg_rflags]) - .branch_range((1, 8)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brid", &formats.branch_int, 4) - .operands_in(vec![reg_rflags]) - .branch_range((4, 32)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1) - .operands_in(vec![reg_rflags]) - .branch_range((1, 8)) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.branch_float, - )) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4) - .operands_in(vec![reg_rflags]) - .branch_range((4, 32)) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.branch_float, - )) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .inst_predicate(valid_scale(&*formats.branch_table_entry)) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); - if needs_offset(in_reg1) { - modrm_sib_disp8(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); - } - "#, - ), - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - const_disp4(constant_handle, func, sink); - "#, - ), - "size_with_inferred_rex_for_outreg0", - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_outreg0", - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - - // No reloc is needed here as the jump table is emitted directly after - // the function body. - jt_disp4(table, func, sink); - "#, - ), - ); - - // Test flags and set a register. - // - // These setCC instructions only set the low 8 bits, and they can only write ABCD registers - // without a REX prefix. - // - // Other instruction encodings accepting `b1` inputs have the same constraints and only look at - // the low 8 bits of the input register. - - let seti = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("seti", &formats.int_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![abcd]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .when_prefixed(seti), - ); - - let setf = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("setf", &formats.float_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![abcd]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .when_prefixed(setf), - ); - - // Conditional move (a.k.a integer select) - // (maybe-REX.W) 0F 4x modrm(r,r) - // 1 byte, modrm(r,r), is after the opcode - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rflags), - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![2]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"), - ); - - // Bit scan forwards and reverse - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), - ); - - // Arithematic with flag I/O. - - // XX /r, MR form. Add two GPR registers and set carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rout", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // XX /r, MR form. Add two GPR registers and get carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rin", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![0]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // XX /r, MR form. Add two GPR registers with carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rio", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // Compare and set flags. - - // XX /r, MR form. Compare two GPR registers and set flags. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // Same as rcmp, but second operand is the stack pointer. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink); - modrm_rr(in_reg0, RU::rsp.into(), sink); - "#, - ), - ); - - // XX /r, RM form. Compare two FPR registers and set flags. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fcmp", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - { - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0); - - // XX /n, MI form with imm8. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_small_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0); - - // XX /n, MI form with imm32. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_big_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // Test-and-branch. - // - // This recipe represents the macro fusion of a test and a conditional branch. - // This serves two purposes: - // - // 1. Guarantee that the test and branch get scheduled next to each other so - // macro fusion is guaranteed to be possible. - // 2. Hide the status flags from Cranelift which doesn't currently model flags. - // - // The encoding bits affect both the test and the branch instruction: - // - // Bits 0-7 are the Jcc opcode. - // Bits 8-15 control the test instruction which always has opcode byte 0x85. - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - // 8-bit test-and-branch. - - let t8jccb = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2) - .operands_in(vec![abcd]) - .branch_range((3, 8)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .when_prefixed(t8jccb), - ); - - let t8jccd = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6) - .operands_in(vec![abcd]) - .branch_range((7, 32)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .when_prefixed(t8jccd), - ); - - // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. - // The register allocator can't handle a branch instruction with constrained - // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in - // any register, but is is larger because it uses a 32-bit test instruction with - // a 0xff immediate. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6) - .operands_in(vec![gpr]) - .branch_range((11, 32)) - .emit( - r#" - // test32 r, 0xff. - {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.put4(0xff); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - ); - - // Comparison that produces a `b1` result in a GPR. - // - // This is a macro of a `cmp` instruction followed by a `setCC` instruction. - // - // TODO This is not a great solution because: - // - // - The cmp+setcc combination is not recognized by CPU's macro fusion. - // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` - // instructions may need a REX independently. - // - Modeling CPU flags in the type system would be better. - // - // Since the `setCC` instructions only write an 8-bit register, we use that as - // our `b1` representation: A `b1` value is represented as a GPR where the low 8 - // bits are known to be 0 or 1. The high bits are undefined. - // - // This bandaid macro doesn't support a REX prefix for the final `setCC` - // instruction, so it is limited to the `ABCD` register class for booleans. - // The omission of a `when_prefixed` alternative is deliberate here. - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - { - let is_small_imm = - InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_small_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - let is_big_imm = - InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_big_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // Make a FloatCompare instruction predicate with the supported condition codes. - // - // Same thing for floating point. - // - // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: - // - // ZPC OSA - // UN 111 000 - // GT 000 000 - // LT 001 000 - // EQ 100 000 - // - // Not all floating point condition codes are supported. - // The omission of a `when_prefixed` alternative is deliberate here. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![abcd]) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.float_compare, - )) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - // `setCC` instruction, no REX. - use crate::ir::condcodes::FloatCC::*; - let setcc = match cond { - Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) - Unordered => 0x9a, // UN => setp (P=1) - OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), - UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) - GreaterThan => 0x97, // GT => seta (C=0&Z=0) - GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) - UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) - UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) - Equal | // EQ - NotEqual | // UN|LT|GT - LessThan | // LT - LessThanOrEqual | // LT|EQ - UnorderedOrGreaterThan | // UN|GT - UnorderedOrGreaterThanOrEqual // UN|GT|EQ - => panic!("{} not supported by fcscc", cond), - }; - sink.put1(0x0f); - sink.put1(setcc); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - { - let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"] - .iter() - .map(|name| Literal::enumerator_for(floatcc, name)) - .collect(); - recipes.add_template_inferred( - EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs[..], - &*formats.float_compare, - )) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - // Add immediate byte indicating what type of comparison. - use crate::ir::condcodes::FloatCC::*; - let imm = match cond { - Equal => 0x00, - LessThan => 0x01, - LessThanOrEqual => 0x02, - Unordered => 0x03, - NotEqual => 0x04, - UnorderedOrGreaterThanOrEqual => 0x05, - UnorderedOrGreaterThan => 0x06, - Ordered => 0x07, - _ => panic!("{} not supported by pfcmp", cond), - }; - sink.put1(imm); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Test instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Check ZF = 1 flag to see if register holds 0. - sink.put1(0x0f); - sink.put1(0x94); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.put1(0xff); - // `setCC` instruction, no REX. - use crate::ir::condcodes::IntCC::*; - let setcc = 0x90 | icc2opc(Equal); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit( - r#" - sink.add_stack_map(args, func, isa); - "#, - ), - ); - - // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled. - // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function. - - recipes.add_recipe( - EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16) - // FIXME Correct encoding for non rax registers - .operands_out(vec![reg_rax]) - .emit( - r#" - // output %rax - // clobbers %rdi - - // Those data16 prefixes are necessary to pad to 16 bytes. - - // data16 lea gv@tlsgd(%rip),%rdi - sink.put1(0x66); // data16 - sink.put1(0b01001000); // rex.w - const LEA: u8 = 0x8d; - sink.put1(LEA); // lea - modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d - sink.reloc_external(func.srclocs[inst], - Reloc::ElfX86_64TlsGd, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - - // data16 data16 callq __tls_get_addr-4 - sink.put1(0x66); // data16 - sink.put1(0x66); // data16 - sink.put1(0b01001000); // rex.w - sink.put1(0xe8); // call - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPLTRel4, - &ExternalName::LibCall(LibCall::ElfTlsGetAddr), - -4); - sink.put4(0); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9) - // FIXME Correct encoding for non rax registers - .operands_out(vec![reg_rax]) - .emit( - r#" - // output %rax - // clobbers %rdi - - // movq gv@tlv(%rip), %rdi - sink.put1(0x48); // rex - sink.put1(0x8b); // mov - modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d - sink.reloc_external(func.srclocs[inst], - Reloc::MachOX86_64Tlv, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - - // callq *(%rdi) - sink.put1(0xff); - sink.put1(0x17); - "#, - ), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![fpr]) - .emit( - r#" - // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r) - // this maps to: out_reg0, in_reg0, in_reg1 - let context = EvexContext::Other { length: EvexVectorLength::V128 }; - let masking = EvexMasking::None; - put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm - modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg - "#, - ), - regs).rex_kind(RecipePrefixKind::Evex) - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .emit( - r#" - // instruction encoding operands: reg (op1, w), rm (op2, r) - // this maps to: out_reg0, in_reg0 - let context = EvexContext::Other { length: EvexVectorLength::V128 }; - let masking = EvexMasking::None; - put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm - modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg - "#, - ), - regs).rex_kind(RecipePrefixKind::Evex) - ); - - recipes -} diff --git a/cranelift/codegen/meta/src/isa/x86/registers.rs b/cranelift/codegen/meta/src/isa/x86/registers.rs deleted file mode 100644 index 85a8965f89..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/registers.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; - -pub(crate) fn define() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("FloatRegs", "xmm") - .units(16) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("IntRegs", "r") - .units(16) - .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"]) - .track_pressure(true) - .pinned_reg(15); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["rflags"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - let gpr = regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - let fpr = regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8); - let gpr8 = regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4); - regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8); - regs.add_class(builder); - - regs.build() -} diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 29a545aad6..20815ef8d2 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -7,11 +7,7 @@ mod srcgen; pub mod error; pub mod isa; -mod gen_binemit; -mod gen_encodings; mod gen_inst; -mod gen_legalizer; -mod gen_registers; mod gen_settings; mod gen_types; @@ -25,11 +21,7 @@ pub fn isa_from_arch(arch: &str) -> Result { } /// Generates all the Rust source files used in Cranelift from the meta-language. -pub fn generate( - old_backend_isas: &[isa::Isa], - new_backend_isas: &[isa::Isa], - out_dir: &str, -) -> Result<(), error::Error> { +pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> { // Create all the definitions: // - common definitions. let mut shared_defs = shared::define(); @@ -43,7 +35,7 @@ pub fn generate( gen_types::generate("types.rs", &out_dir)?; // - per ISA definitions. - let target_isas = isa::define(old_backend_isas, &mut shared_defs); + let target_isas = isa::define(isas, &mut shared_defs); // At this point, all definitions are done. let all_formats = shared_defs.verify_instruction_formats(); @@ -57,70 +49,13 @@ pub fn generate( &out_dir, )?; - let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() { - // The new backend only requires the "expand" legalization group. - &["expand"] - } else { - &[] - }; - - gen_legalizer::generate( - &target_isas, - &shared_defs.transform_groups, - extra_legalization_groups, - "legalize", - &out_dir, - )?; - for isa in target_isas { - gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?; - gen_settings::generate( &isa.settings, gen_settings::ParentGroup::Shared, &format!("settings-{}.rs", isa.name), &out_dir, )?; - - gen_encodings::generate( - &shared_defs, - &isa, - &format!("encoding-{}.rs", isa.name), - &out_dir, - )?; - - gen_binemit::generate( - &isa.name, - &isa.recipes, - &format!("binemit-{}.rs", isa.name), - &out_dir, - )?; - } - - for isa in new_backend_isas { - match isa { - isa::Isa::X86 => { - // If the old backend ISAs contained x86, this file has already been generated. - if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) { - continue; - } - - let settings = crate::isa::x86::settings::define(&shared_defs.settings); - gen_settings::generate( - &settings, - gen_settings::ParentGroup::Shared, - "settings-x86.rs", - &out_dir, - )?; - } - isa::Isa::Arm64 => { - // aarch64 doesn't have platform-specific settings. - } - isa::Isa::S390x => { - // s390x doesn't have platform-specific settings. - } - isa::Isa::Arm32 | isa::Isa::Riscv => todo!(), - } } Ok(()) diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 3d081951a5..f1d0a1826d 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -18,8 +18,6 @@ pub(crate) struct Formats { pub(crate) call: Rc, pub(crate) call_indirect: Rc, pub(crate) cond_trap: Rc, - pub(crate) copy_special: Rc, - pub(crate) copy_to_ssa: Rc, pub(crate) float_compare: Rc, pub(crate) float_cond: Rc, pub(crate) float_cond_trap: Rc, @@ -37,9 +35,6 @@ pub(crate) struct Formats { pub(crate) load_no_offset: Rc, pub(crate) multiary: Rc, pub(crate) nullary: Rc, - pub(crate) reg_fill: Rc, - pub(crate) reg_move: Rc, - pub(crate) reg_spill: Rc, pub(crate) shuffle: Rc, pub(crate) stack_load: Rc, pub(crate) stack_store: Rc, @@ -283,33 +278,6 @@ impl Formats { .imm(&imm.offset32) .build(), - reg_move: Builder::new("RegMove") - .value() - .imm_with_name("src", &imm.regunit) - .imm_with_name("dst", &imm.regunit) - .build(), - - copy_special: Builder::new("CopySpecial") - .imm_with_name("src", &imm.regunit) - .imm_with_name("dst", &imm.regunit) - .build(), - - copy_to_ssa: Builder::new("CopyToSsa") - .imm_with_name("src", &imm.regunit) - .build(), - - reg_spill: Builder::new("RegSpill") - .value() - .imm_with_name("src", &imm.regunit) - .imm_with_name("dst", &entities.stack_slot) - .build(), - - reg_fill: Builder::new("RegFill") - .value() - .imm_with_name("src", &entities.stack_slot) - .imm_with_name("dst", &imm.regunit) - .build(), - trap: Builder::new("Trap").imm(&imm.trapcode).build(), cond_trap: Builder::new("CondTrap").value().imm(&imm.trapcode).build(), diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs index 9811dcc0ca..12a93d81e6 100644 --- a/cranelift/codegen/meta/src/shared/immediates.rs +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -64,9 +64,6 @@ pub(crate) struct Immediates { /// Flags for memory operations like `load` and `store`. pub memflags: OperandKind, - /// A register unit in the current target ISA. - pub regunit: OperandKind, - /// A trap code indicating the reason for trapping. /// /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes. @@ -149,8 +146,6 @@ impl Immediates { }, memflags: new_imm("flags", "ir::MemFlags").with_doc("Memory operation flags"), - regunit: new_imm("regunit", "isa::RegUnit") - .with_doc("A register unit in the target ISA"), trapcode: { let mut trapcode_values = HashMap::new(); trapcode_values.insert("stk_ovf", "StackOverflow"); diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 0bbaa2907b..01865ccba1 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1,7 +1,7 @@ #![allow(non_snake_case)] use crate::cdsl::instructions::{ - AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, + AllInstructions, InstructionBuilder as Inst, InstructionGroupBuilder, }; use crate::cdsl::operands::Operand; use crate::cdsl::type_inference::Constraint::WiderOrEq; @@ -767,7 +767,7 @@ pub(crate) fn define( formats: &Formats, imm: &Immediates, entities: &EntityRefs, -) -> InstructionGroup { +) { let mut ig = InstructionGroupBuilder::new(all_instructions); define_control_flow(&mut ig, formats, imm, entities); @@ -1929,90 +1929,6 @@ pub(crate) fn define( .can_load(true), ); - let Sarg = &TypeVar::new( - "Sarg", - "Any scalar or vector type with at most 128 lanes", - TypeSetBuilder::new() - .specials(vec![crate::cdsl::types::SpecialType::StructArgument]) - .build(), - ); - let sarg_t = &Operand::new("sarg_t", Sarg); - - // FIXME remove once the old style codegen backends are removed. - ig.push( - Inst::new( - "dummy_sarg_t", - r#" - This creates a sarg_t - - This instruction is internal and should not be created by - Cranelift users. - "#, - &formats.nullary, - ) - .operands_in(vec![]) - .operands_out(vec![sarg_t]), - ); - - let src = &Operand::new("src", &imm.regunit); - let dst = &Operand::new("dst", &imm.regunit); - - ig.push( - Inst::new( - "regmove", - r#" - Temporarily divert ``x`` from ``src`` to ``dst``. - - This instruction moves the location of a value from one register to - another without creating a new SSA value. It is used by the register - allocator to temporarily rearrange register assignments in order to - satisfy instruction constraints. - - The register diversions created by this instruction must be undone - before the value leaves the block. At the entry to a new block, all live - values must be in their originally assigned registers. - "#, - &formats.reg_move, - ) - .operands_in(vec![x, src, dst]) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "copy_special", - r#" - Copies the contents of ''src'' register to ''dst'' register. - - This instructions copies the contents of one register to another - register without involving any SSA values. This is used for copying - special registers, e.g. copying the stack register to the frame - register in a function prologue. - "#, - &formats.copy_special, - ) - .operands_in(vec![src, dst]) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "copy_to_ssa", - r#" - Copies the contents of ''src'' register to ''a'' SSA name. - - This instruction copies the contents of one register, regardless of its SSA name, to - another register, creating a new SSA name. In that sense it is a one-sided version - of ''copy_special''. This instruction is internal and should not be created by - Cranelift users. - "#, - &formats.copy_to_ssa, - ) - .operands_in(vec![src]) - .operands_out(vec![a]) - .other_side_effects(true), - ); - ig.push( Inst::new( "copy_nop", @@ -2098,44 +2014,6 @@ pub(crate) fn define( .operands_out(vec![f]), ); - ig.push( - Inst::new( - "regspill", - r#" - Temporarily divert ``x`` from ``src`` to ``SS``. - - This instruction moves the location of a value from a register to a - stack slot without creating a new SSA value. It is used by the register - allocator to temporarily rearrange register assignments in order to - satisfy instruction constraints. - - See also `regmove`. - "#, - &formats.reg_spill, - ) - .operands_in(vec![x, src, SS]) - .other_side_effects(true), - ); - - ig.push( - Inst::new( - "regfill", - r#" - Temporarily divert ``x`` from ``SS`` to ``dst``. - - This instruction moves the location of a value from a stack slot to a - register without creating a new SSA value. It is used by the register - allocator to temporarily rearrange register assignments in order to - satisfy instruction constraints. - - See also `regmove`. - "#, - &formats.reg_fill, - ) - .operands_in(vec![x, SS, dst]) - .other_side_effects(true), - ); - let N = &Operand::new("args", &entities.varargs).with_doc("Variable number of args for StackMap"); @@ -2302,10 +2180,9 @@ pub(crate) fn define( | of | * | Overflow | | nof | * | No Overflow | - \* The unsigned version of overflow conditions have ISA-specific - semantics and thus have been kept as methods on the TargetIsa trait as - [unsigned_add_overflow_condition][isa::TargetIsa::unsigned_add_overflow_condition] and - [unsigned_sub_overflow_condition][isa::TargetIsa::unsigned_sub_overflow_condition]. + \* The unsigned version of overflow condition for add has ISA-specific semantics and thus + has been kept as a method on the TargetIsa trait as + [unsigned_add_overflow_condition][crate::isa::TargetIsa::unsigned_add_overflow_condition]. When this instruction compares integer vectors, it returns a boolean vector of lane-wise comparisons. @@ -4047,7 +3924,7 @@ pub(crate) fn define( Combine `x` and `y` into a vector with twice the lanes but half the integer width while saturating overflowing values to the unsigned maximum and minimum. - Note that all input lanes are considered unsigned. + Note that all input lanes are considered unsigned: any negative values will be interpreted as unsigned, overflowing and being replaced with the unsigned maximum. The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value @@ -4647,6 +4524,4 @@ pub(crate) fn define( ) .other_side_effects(true), ); - - ig.build() } diff --git a/cranelift/codegen/meta/src/shared/legalize.rs b/cranelift/codegen/meta/src/shared/legalize.rs deleted file mode 100644 index 9a0d6cffde..0000000000 --- a/cranelift/codegen/meta/src/shared/legalize.rs +++ /dev/null @@ -1,1087 +0,0 @@ -use crate::cdsl::ast::{var, ExprBuilder, Literal}; -use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup}; -use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; - -use crate::shared::immediates::Immediates; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I128, I16, I32, I64, I8}; -use cranelift_codegen_shared::condcodes::{CondCode, IntCC}; - -#[allow(clippy::many_single_char_names, clippy::cognitive_complexity)] -pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups { - let mut narrow = TransformGroupBuilder::new( - "narrow", - r#" - Legalize instructions by narrowing. - - The transformations in the 'narrow' group work by expressing - instructions in terms of smaller types. Operations on vector types are - expressed in terms of vector types with fewer lanes, and integer - operations are expressed in terms of smaller integer types. - "#, - ); - - let mut widen = TransformGroupBuilder::new( - "widen", - r#" - Legalize instructions by widening. - - The transformations in the 'widen' group work by expressing - instructions in terms of larger types. - "#, - ); - - let mut expand = TransformGroupBuilder::new( - "expand", - r#" - Legalize instructions by expansion. - - Rewrite instructions in terms of other instructions, generally - operating on the same types as the original instructions. - "#, - ); - - // List of instructions. - let band = insts.by_name("band"); - let band_imm = insts.by_name("band_imm"); - let band_not = insts.by_name("band_not"); - let bint = insts.by_name("bint"); - let bitrev = insts.by_name("bitrev"); - let bnot = insts.by_name("bnot"); - let bor = insts.by_name("bor"); - let bor_imm = insts.by_name("bor_imm"); - let bor_not = insts.by_name("bor_not"); - let brnz = insts.by_name("brnz"); - let brz = insts.by_name("brz"); - let br_icmp = insts.by_name("br_icmp"); - let br_table = insts.by_name("br_table"); - let bxor = insts.by_name("bxor"); - let bxor_imm = insts.by_name("bxor_imm"); - let bxor_not = insts.by_name("bxor_not"); - let cls = insts.by_name("cls"); - let clz = insts.by_name("clz"); - let ctz = insts.by_name("ctz"); - let copy = insts.by_name("copy"); - let fabs = insts.by_name("fabs"); - let f32const = insts.by_name("f32const"); - let f64const = insts.by_name("f64const"); - let fcopysign = insts.by_name("fcopysign"); - let fcvt_from_sint = insts.by_name("fcvt_from_sint"); - let fneg = insts.by_name("fneg"); - let iadd = insts.by_name("iadd"); - let iadd_cin = insts.by_name("iadd_cin"); - let iadd_cout = insts.by_name("iadd_cout"); - let iadd_carry = insts.by_name("iadd_carry"); - let iadd_ifcin = insts.by_name("iadd_ifcin"); - let iadd_ifcout = insts.by_name("iadd_ifcout"); - let iadd_imm = insts.by_name("iadd_imm"); - let icmp = insts.by_name("icmp"); - let icmp_imm = insts.by_name("icmp_imm"); - let iconcat = insts.by_name("iconcat"); - let iconst = insts.by_name("iconst"); - let ifcmp = insts.by_name("ifcmp"); - let ifcmp_imm = insts.by_name("ifcmp_imm"); - let imul = insts.by_name("imul"); - let imul_imm = insts.by_name("imul_imm"); - let ireduce = insts.by_name("ireduce"); - let irsub_imm = insts.by_name("irsub_imm"); - let ishl = insts.by_name("ishl"); - let ishl_imm = insts.by_name("ishl_imm"); - let isplit = insts.by_name("isplit"); - let istore8 = insts.by_name("istore8"); - let istore16 = insts.by_name("istore16"); - let isub = insts.by_name("isub"); - let isub_bin = insts.by_name("isub_bin"); - let isub_bout = insts.by_name("isub_bout"); - let isub_borrow = insts.by_name("isub_borrow"); - let isub_ifbin = insts.by_name("isub_ifbin"); - let isub_ifbout = insts.by_name("isub_ifbout"); - let jump = insts.by_name("jump"); - let load = insts.by_name("load"); - let popcnt = insts.by_name("popcnt"); - let resumable_trapnz = insts.by_name("resumable_trapnz"); - let rotl = insts.by_name("rotl"); - let rotl_imm = insts.by_name("rotl_imm"); - let rotr = insts.by_name("rotr"); - let rotr_imm = insts.by_name("rotr_imm"); - let sdiv = insts.by_name("sdiv"); - let sdiv_imm = insts.by_name("sdiv_imm"); - let select = insts.by_name("select"); - let sextend = insts.by_name("sextend"); - let sshr = insts.by_name("sshr"); - let sshr_imm = insts.by_name("sshr_imm"); - let srem = insts.by_name("srem"); - let srem_imm = insts.by_name("srem_imm"); - let store = insts.by_name("store"); - let udiv = insts.by_name("udiv"); - let udiv_imm = insts.by_name("udiv_imm"); - let uextend = insts.by_name("uextend"); - let uload8 = insts.by_name("uload8"); - let uload16 = insts.by_name("uload16"); - let umulhi = insts.by_name("umulhi"); - let ushr = insts.by_name("ushr"); - let ushr_imm = insts.by_name("ushr_imm"); - let urem = insts.by_name("urem"); - let urem_imm = insts.by_name("urem_imm"); - let trapif = insts.by_name("trapif"); - let trapnz = insts.by_name("trapnz"); - let trapz = insts.by_name("trapz"); - - // Custom expansions for memory objects. - expand.custom_legalize(insts.by_name("global_value"), "expand_global_value"); - expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr"); - expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr"); - - // Custom expansions for calls. - expand.custom_legalize(insts.by_name("call"), "expand_call"); - - // Custom expansions that need to change the CFG. - // TODO: Add sufficient XForm syntax that we don't need to hand-code these. - expand.custom_legalize(trapz, "expand_cond_trap"); - expand.custom_legalize(trapnz, "expand_cond_trap"); - expand.custom_legalize(resumable_trapnz, "expand_cond_trap"); - expand.custom_legalize(br_table, "expand_br_table"); - expand.custom_legalize(select, "expand_select"); - widen.custom_legalize(select, "expand_select"); // small ints - - // Custom expansions for floating point constants. - // These expansions require bit-casting or creating constant pool entries. - expand.custom_legalize(f32const, "expand_fconst"); - expand.custom_legalize(f64const, "expand_fconst"); - - // Custom expansions for stack memory accesses. - expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); - expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); - - // Custom expansions for small stack memory acccess. - widen.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); - widen.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); - - // List of variables to reuse in patterns. - let x = var("x"); - let y = var("y"); - let z = var("z"); - let a = var("a"); - let a1 = var("a1"); - let a2 = var("a2"); - let a3 = var("a3"); - let a4 = var("a4"); - let b = var("b"); - let b1 = var("b1"); - let b2 = var("b2"); - let b3 = var("b3"); - let b4 = var("b4"); - let b_in = var("b_in"); - let b_int = var("b_int"); - let c = var("c"); - let c1 = var("c1"); - let c2 = var("c2"); - let c3 = var("c3"); - let c4 = var("c4"); - let c_in = var("c_in"); - let c_int = var("c_int"); - let d = var("d"); - let d1 = var("d1"); - let d2 = var("d2"); - let d3 = var("d3"); - let d4 = var("d4"); - let e = var("e"); - let e1 = var("e1"); - let e2 = var("e2"); - let e3 = var("e3"); - let e4 = var("e4"); - let f = var("f"); - let f1 = var("f1"); - let f2 = var("f2"); - let xl = var("xl"); - let xh = var("xh"); - let yl = var("yl"); - let yh = var("yh"); - let al = var("al"); - let ah = var("ah"); - let cc = var("cc"); - let block = var("block"); - let ptr = var("ptr"); - let flags = var("flags"); - let offset = var("off"); - let vararg = var("vararg"); - - narrow.custom_legalize(load, "narrow_load"); - narrow.custom_legalize(store, "narrow_store"); - - // iconst.i64 can't be legalized in the meta langage (because integer literals can't be - // embedded as part of arguments), so use a custom legalization for now. - narrow.custom_legalize(iconst, "narrow_iconst"); - - for &(ty, ty_half) in &[(I128, I64), (I64, I32)] { - let inst = uextend.bind(ty).bind(ty_half); - narrow.legalize( - def!(a = inst(x)), - vec![ - def!(ah = iconst(Literal::constant(&imm.imm64, 0))), - def!(a = iconcat(x, ah)), - ], - ); - } - - for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] { - let inst = sextend.bind(ty).bind(ty_half); - narrow.legalize( - def!(a = inst(x)), - vec![ - def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number - def!(a = iconcat(x, ah)), - ], - ); - } - - for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] { - narrow.legalize( - def!(a = bin_op(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(al = bin_op(xl, yl)), - def!(ah = bin_op(xh, yh)), - def!(a = iconcat(al, ah)), - ], - ); - } - - narrow.legalize( - def!(a = bnot(x)), - vec![ - def!((xl, xh) = isplit(x)), - def!(al = bnot(xl)), - def!(ah = bnot(xh)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow.legalize( - def!(a = select(c, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(al = select(c, xl, yl)), - def!(ah = select(c, xh, yh)), - def!(a = iconcat(al, ah)), - ], - ); - - for &ty in &[I128, I64] { - let block = var("block"); - let block1 = var("block1"); - let block2 = var("block2"); - - narrow.legalize( - def!(brz.ty(x, block, vararg)), - vec![ - def!((xl, xh) = isplit(x)), - def!( - a = icmp_imm( - Literal::enumerator_for(&imm.intcc, "eq"), - xl, - Literal::constant(&imm.imm64, 0) - ) - ), - def!( - b = icmp_imm( - Literal::enumerator_for(&imm.intcc, "eq"), - xh, - Literal::constant(&imm.imm64, 0) - ) - ), - def!(c = band(a, b)), - def!(brnz(c, block, vararg)), - ], - ); - - narrow.legalize( - def!(brnz.ty(x, block1, vararg)), - vec![ - def!((xl, xh) = isplit(x)), - def!(brnz(xl, block1, vararg)), - def!(jump(block2, Literal::empty_vararg())), - block!(block2), - def!(brnz(xh, block1, vararg)), - ], - ); - } - - narrow.legalize( - def!(a = popcnt.I128(x)), - vec![ - def!((xl, xh) = isplit(x)), - def!(e1 = popcnt(xl)), - def!(e2 = popcnt(xh)), - def!(e3 = iadd(e1, e2)), - def!(a = uextend(e3)), - ], - ); - - // TODO(ryzokuken): benchmark this and decide if branching is a faster - // approach than evaluating boolean expressions. - - narrow.custom_legalize(icmp_imm, "narrow_icmp_imm"); - - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); - for &(int_ty, int_ty_half) in &[(I64, I32), (I128, I64)] { - narrow.legalize( - def!(b = icmp.int_ty(intcc_eq, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(b1 = icmp.int_ty_half(intcc_eq, xl, yl)), - def!(b2 = icmp.int_ty_half(intcc_eq, xh, yh)), - def!(b = band(b1, b2)), - ], - ); - - narrow.legalize( - def!(b = icmp.int_ty(intcc_ne, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(b1 = icmp.int_ty_half(intcc_ne, xl, yl)), - def!(b2 = icmp.int_ty_half(intcc_ne, xh, yh)), - def!(b = bor(b1, b2)), - ], - ); - - use IntCC::*; - for cc in &[ - SignedGreaterThan, - SignedGreaterThanOrEqual, - SignedLessThan, - SignedLessThanOrEqual, - UnsignedGreaterThan, - UnsignedGreaterThanOrEqual, - UnsignedLessThan, - UnsignedLessThanOrEqual, - ] { - let intcc_cc = Literal::enumerator_for(&imm.intcc, cc.to_static_str()); - let cc1 = Literal::enumerator_for(&imm.intcc, cc.without_equal().to_static_str()); - let cc2 = - Literal::enumerator_for(&imm.intcc, cc.inverse().without_equal().to_static_str()); - let cc3 = Literal::enumerator_for(&imm.intcc, cc.unsigned().to_static_str()); - narrow.legalize( - def!(b = icmp.int_ty(intcc_cc, x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - // X = cc1 || (!cc2 && cc3) - def!(b1 = icmp.int_ty_half(cc1, xh, yh)), - def!(b2 = icmp.int_ty_half(cc2, xh, yh)), - def!(b3 = icmp.int_ty_half(cc3, xl, yl)), - def!(c1 = bnot(b2)), - def!(c2 = band(c1, b3)), - def!(b = bor(b1, c2)), - ], - ); - } - } - - // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a - // separate legalization for x86. - for &ty in &[I64, I128] { - narrow.legalize( - def!(a = imul.ty(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!(a1 = imul(xh, yl)), - def!(a2 = imul(xl, yh)), - def!(a3 = iadd(a1, a2)), - def!(a4 = umulhi(xl, yl)), - def!(ah = iadd(a3, a4)), - def!(al = imul(xl, yl)), - def!(a = iconcat(al, ah)), - ], - ); - } - - let zero = Literal::constant(&imm.imm64, 0); - narrow.legalize( - def!(a = iadd_imm.I128(x, c)), - vec![ - def!(yh = iconst.I64(zero)), - def!(yl = iconst.I64(c)), - def!(y = iconcat.I64(yh, yl)), - def!(a = iadd(x, y)), - ], - ); - - // Widen instructions with one input operand. - for &op in &[bnot, popcnt] { - for &int_ty in &[I8, I16] { - widen.legalize( - def!(a = op.int_ty(b)), - vec![ - def!(x = uextend.I32(b)), - def!(z = op.I32(x)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - } - - // Widen instructions with two input operands. - let mut widen_two_arg = |signed: bool, op: &Instruction| { - for &int_ty in &[I8, I16] { - let sign_ext_op = if signed { sextend } else { uextend }; - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = sign_ext_op.I32(b)), - def!(y = sign_ext_op.I32(c)), - def!(z = op.I32(x, y)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - }; - - for bin_op in &[ - iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not, - ] { - widen_two_arg(false, bin_op); - } - for bin_op in &[sdiv, srem] { - widen_two_arg(true, bin_op); - } - - // Widen instructions using immediate operands. - let mut widen_imm = |signed: bool, op: &Instruction| { - for &int_ty in &[I8, I16] { - let sign_ext_op = if signed { sextend } else { uextend }; - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = sign_ext_op.I32(b)), - def!(z = op.I32(x, c)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - }; - - for bin_op in &[ - iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm, - ] { - widen_imm(false, bin_op); - } - for bin_op in &[sdiv_imm, srem_imm] { - widen_imm(true, bin_op); - } - - for &(int_ty, num) in &[(I8, 24), (I16, 16)] { - let imm = Literal::constant(&imm.imm64, -num); - - widen.legalize( - def!(a = clz.int_ty(b)), - vec![ - def!(c = uextend.I32(b)), - def!(d = clz.I32(c)), - def!(e = iadd_imm(d, imm)), - def!(a = ireduce.int_ty(e)), - ], - ); - - widen.legalize( - def!(a = cls.int_ty(b)), - vec![ - def!(c = sextend.I32(b)), - def!(d = cls.I32(c)), - def!(e = iadd_imm(d, imm)), - def!(a = ireduce.int_ty(e)), - ], - ); - } - - for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] { - let num = Literal::constant(&imm.imm64, num); - widen.legalize( - def!(a = ctz.int_ty(b)), - vec![ - def!(c = uextend.I32(b)), - // When `b` is zero, returns the size of x in bits. - def!(d = bor_imm(c, num)), - def!(e = ctz.I32(d)), - def!(a = ireduce.int_ty(e)), - ], - ); - } - - // iconst - for &int_ty in &[I8, I16] { - widen.legalize( - def!(a = iconst.int_ty(b)), - vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))], - ); - } - - for &extend_op in &[uextend, sextend] { - // The sign extension operators have two typevars: the result has one and controls the - // instruction, then the input has one. - let bound = extend_op.bind(I16).bind(I8); - widen.legalize( - def!(a = bound(b)), - vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))], - ); - } - - widen.legalize( - def!(store.I8(flags, a, ptr, offset)), - vec![ - def!(b = uextend.I32(a)), - def!(istore8(flags, b, ptr, offset)), - ], - ); - - widen.legalize( - def!(store.I16(flags, a, ptr, offset)), - vec![ - def!(b = uextend.I32(a)), - def!(istore16(flags, b, ptr, offset)), - ], - ); - - widen.legalize( - def!(a = load.I8(flags, ptr, offset)), - vec![ - def!(b = uload8.I32(flags, ptr, offset)), - def!(a = ireduce(b)), - ], - ); - - widen.legalize( - def!(a = load.I16(flags, ptr, offset)), - vec![ - def!(b = uload16.I32(flags, ptr, offset)), - def!(a = ireduce(b)), - ], - ); - - for &int_ty in &[I8, I16] { - widen.legalize( - def!(br_table.int_ty(x, y, z)), - vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))], - ); - } - - for &int_ty in &[I8, I16] { - widen.legalize( - def!(a = bint.int_ty(b)), - vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))], - ); - } - - for &int_ty in &[I8, I16] { - for &op in &[ishl, ishl_imm, ushr, ushr_imm] { - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = uextend.I32(b)), - def!(z = op.I32(x, c)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - - for &op in &[sshr, sshr_imm] { - widen.legalize( - def!(a = op.int_ty(b, c)), - vec![ - def!(x = sextend.I32(b)), - def!(z = op.I32(x, c)), - def!(a = ireduce.int_ty(z)), - ], - ); - } - - for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] { - let w_cc = Literal::enumerator_for(&imm.intcc, cc); - widen.legalize( - def!(a = icmp_imm.int_ty(w_cc, b, c)), - vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], - ); - widen.legalize( - def!(a = icmp.int_ty(w_cc, b, c)), - vec![ - def!(x = uextend.I32(b)), - def!(y = uextend.I32(c)), - def!(a = icmp.I32(w_cc, x, y)), - ], - ); - } - - for cc in &["sgt", "slt", "sge", "sle"] { - let w_cc = Literal::enumerator_for(&imm.intcc, cc); - widen.legalize( - def!(a = icmp_imm.int_ty(w_cc, b, c)), - vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], - ); - - widen.legalize( - def!(a = icmp.int_ty(w_cc, b, c)), - vec![ - def!(x = sextend.I32(b)), - def!(y = sextend.I32(c)), - def!(a = icmp(w_cc, x, y)), - ], - ); - } - } - - for &ty in &[I8, I16] { - widen.legalize( - def!(brz.ty(x, block, vararg)), - vec![def!(a = uextend.I32(x)), def!(brz(a, block, vararg))], - ); - - widen.legalize( - def!(brnz.ty(x, block, vararg)), - vec![def!(a = uextend.I32(x)), def!(brnz(a, block, vararg))], - ); - } - - for &(ty_half, ty) in &[(I64, I128), (I32, I64)] { - let inst = ireduce.bind(ty_half).bind(ty); - expand.legalize( - def!(a = inst(x)), - vec![def!((b, c) = isplit(x)), def!(a = copy(b))], - ); - } - - // Expand integer operations with carry for RISC architectures that don't have - // the flags. - let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult"); - expand.legalize( - def!((a, c) = iadd_cout(x, y)), - vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))], - ); - - let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt"); - expand.legalize( - def!((a, b) = isub_bout(x, y)), - vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))], - ); - - expand.legalize( - def!(a = iadd_cin(x, y, c)), - vec![ - def!(a1 = iadd(x, y)), - def!(c_int = bint(c)), - def!(a = iadd(a1, c_int)), - ], - ); - - expand.legalize( - def!(a = isub_bin(x, y, b)), - vec![ - def!(a1 = isub(x, y)), - def!(b_int = bint(b)), - def!(a = isub(a1, b_int)), - ], - ); - - expand.legalize( - def!((a, c) = iadd_carry(x, y, c_in)), - vec![ - def!((a1, c1) = iadd_cout(x, y)), - def!(c_int = bint(c_in)), - def!((a, c2) = iadd_cout(a1, c_int)), - def!(c = bor(c1, c2)), - ], - ); - - expand.legalize( - def!((a, b) = isub_borrow(x, y, b_in)), - vec![ - def!((a1, b1) = isub_bout(x, y)), - def!(b_int = bint(b_in)), - def!((a, b2) = isub_bout(a1, b_int)), - def!(b = bor(b1, b2)), - ], - ); - - // Expansion for fcvt_from_sint for smaller integer types. - // This uses expand and not widen because the controlling type variable for - // this instruction is f32/f64, which is legalized as part of the expand - // group. - for &dest_ty in &[F32, F64] { - for &src_ty in &[I8, I16] { - let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty); - expand.legalize( - def!(a = bound_inst(b)), - vec![ - def!(x = sextend.I32(b)), - def!(a = fcvt_from_sint.dest_ty(x)), - ], - ); - } - } - - // Expansions for immediate operands that are out of range. - for &(inst_imm, inst) in &[ - (iadd_imm, iadd), - (imul_imm, imul), - (sdiv_imm, sdiv), - (udiv_imm, udiv), - (srem_imm, srem), - (urem_imm, urem), - (band_imm, band), - (bor_imm, bor), - (bxor_imm, bxor), - (ifcmp_imm, ifcmp), - ] { - expand.legalize( - def!(a = inst_imm(x, y)), - vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))], - ); - } - - expand.legalize( - def!(a = irsub_imm(y, x)), - vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))], - ); - - // Rotates and shifts. - for &(inst_imm, inst) in &[ - (rotl_imm, rotl), - (rotr_imm, rotr), - (ishl_imm, ishl), - (sshr_imm, sshr), - (ushr_imm, ushr), - ] { - expand.legalize( - def!(a = inst_imm(x, y)), - vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))], - ); - } - - expand.legalize( - def!(a = icmp_imm(cc, x, y)), - vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))], - ); - - //# Expansions for *_not variants of bitwise ops. - for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] { - expand.legalize( - def!(a = inst_not(x, y)), - vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))], - ); - } - - //# Expand bnot using xor. - let minus_one = Literal::constant(&imm.imm64, -1); - expand.legalize( - def!(a = bnot(x)), - vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))], - ); - - //# Expand bitrev - //# Adapted from Stack Overflow. - //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c - let imm64_1 = Literal::constant(&imm.imm64, 1); - let imm64_2 = Literal::constant(&imm.imm64, 2); - let imm64_4 = Literal::constant(&imm.imm64, 4); - - widen.legalize( - def!(a = bitrev.I8(x)), - vec![ - def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(a = bor(c2, c4)), - ], - ); - - let imm64_8 = Literal::constant(&imm.imm64, 8); - - widen.legalize( - def!(a = bitrev.I16(x)), - vec![ - def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(d = bor(c2, c4)), - def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))), - def!(d2 = ushr_imm(d1, imm64_8)), - def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))), - def!(d4 = ishl_imm(d3, imm64_8)), - def!(a = bor(d2, d4)), - ], - ); - - let imm64_16 = Literal::constant(&imm.imm64, 16); - - expand.legalize( - def!(a = bitrev.I32(x)), - vec![ - def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa_aaaa))), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555_5555))), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc_cccc))), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333_3333))), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0_f0f0))), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f_0f0f))), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(d = bor(c2, c4)), - def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00_ff00))), - def!(d2 = ushr_imm(d1, imm64_8)), - def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff_00ff))), - def!(d4 = ishl_imm(d3, imm64_8)), - def!(e = bor(d2, d4)), - def!(e1 = ushr_imm(e, imm64_16)), - def!(e2 = ishl_imm(e, imm64_16)), - def!(a = bor(e1, e2)), - ], - ); - - #[allow(overflowing_literals)] - let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaa_aaaa_aaaa_aaaa); - let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555_5555_5555_5555); - #[allow(overflowing_literals)] - let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccc_cccc_cccc_cccc); - let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333_3333_3333_3333); - #[allow(overflowing_literals)] - let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0_f0f0_f0f0_f0f0); - let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f_0f0f_0f0f_0f0f); - #[allow(overflowing_literals)] - let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00_ff00_ff00_ff00); - let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff_00ff_00ff_00ff); - #[allow(overflowing_literals)] - let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff_0000_ffff_0000); - let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000_ffff_0000_ffff); - let imm64_32 = Literal::constant(&imm.imm64, 32); - - expand.legalize( - def!(a = bitrev.I64(x)), - vec![ - def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)), - def!(a2 = ushr_imm(a1, imm64_1)), - def!(a3 = band_imm(x, imm64_0x5555555555555555)), - def!(a4 = ishl_imm(a3, imm64_1)), - def!(b = bor(a2, a4)), - def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)), - def!(b2 = ushr_imm(b1, imm64_2)), - def!(b3 = band_imm(b, imm64_0x3333333333333333)), - def!(b4 = ishl_imm(b3, imm64_2)), - def!(c = bor(b2, b4)), - def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)), - def!(c2 = ushr_imm(c1, imm64_4)), - def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)), - def!(c4 = ishl_imm(c3, imm64_4)), - def!(d = bor(c2, c4)), - def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)), - def!(d2 = ushr_imm(d1, imm64_8)), - def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)), - def!(d4 = ishl_imm(d3, imm64_8)), - def!(e = bor(d2, d4)), - def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)), - def!(e2 = ushr_imm(e1, imm64_16)), - def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)), - def!(e4 = ishl_imm(e3, imm64_16)), - def!(f = bor(e2, e4)), - def!(f1 = ushr_imm(f, imm64_32)), - def!(f2 = ishl_imm(f, imm64_32)), - def!(a = bor(f1, f2)), - ], - ); - - narrow.legalize( - def!(a = bitrev.I128(x)), - vec![ - def!((xl, xh) = isplit(x)), - def!(yh = bitrev(xl)), - def!(yl = bitrev(xh)), - def!(a = iconcat(yl, yh)), - ], - ); - - // Floating-point sign manipulations. - for &(ty, const_inst, minus_zero) in &[ - (F32, f32const, &Literal::bits(&imm.ieee32, 0x8000_0000)), - ( - F64, - f64const, - &Literal::bits(&imm.ieee64, 0x8000_0000_0000_0000), - ), - ] { - expand.legalize( - def!(a = fabs.ty(x)), - vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))], - ); - - expand.legalize( - def!(a = fneg.ty(x)), - vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))], - ); - - expand.legalize( - def!(a = fcopysign.ty(x, y)), - vec![ - def!(b = const_inst(minus_zero)), - def!(a1 = band_not(x, b)), - def!(a2 = band(y, b)), - def!(a = bor(a1, a2)), - ], - ); - } - - expand.custom_legalize(br_icmp, "expand_br_icmp"); - - let mut groups = TransformGroups::new(); - - let narrow_id = narrow.build_and_add_to(&mut groups); - let expand_id = expand.build_and_add_to(&mut groups); - - // Expansions using CPU flags. - let mut expand_flags = TransformGroupBuilder::new( - "expand_flags", - r#" - Instruction expansions for architectures with flags. - - Expand some instructions using CPU flags, then fall back to the normal - expansions. Not all architectures support CPU flags, so these patterns - are kept separate. - "#, - ) - .chain_with(expand_id); - - let imm64_0 = Literal::constant(&imm.imm64, 0); - let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - - expand_flags.legalize( - def!(trapnz(x, c)), - vec![ - def!(a = ifcmp_imm(x, imm64_0)), - def!(trapif(intcc_ne, a, c)), - ], - ); - - expand_flags.legalize( - def!(trapz(x, c)), - vec![ - def!(a = ifcmp_imm(x, imm64_0)), - def!(trapif(intcc_eq, a, c)), - ], - ); - - expand_flags.build_and_add_to(&mut groups); - - // Narrow legalizations using CPU flags. - let mut narrow_flags = TransformGroupBuilder::new( - "narrow_flags", - r#" - Narrow instructions for architectures with flags. - - Narrow some instructions using CPU flags, then fall back to the normal - legalizations. Not all architectures support CPU flags, so these - patterns are kept separate. - "#, - ) - .chain_with(narrow_id); - - narrow_flags.legalize( - def!(a = iadd(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, c) = iadd_ifcout(xl, yl)), - def!(ah = iadd_ifcin(xh, yh, c)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_flags.legalize( - def!(a = isub(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, b) = isub_ifbout(xl, yl)), - def!(ah = isub_ifbin(xh, yh, b)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_flags.build_and_add_to(&mut groups); - - // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and - // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required. - // Narrow legalizations for ISAs that don't have CPU flags. - let mut narrow_no_flags = TransformGroupBuilder::new( - "narrow_no_flags", - r#" - Narrow instructions for architectures without flags. - - Narrow some instructions avoiding the use of CPU flags, then fall back - to the normal legalizations. Not all architectures support CPU flags, - so these patterns are kept separate. - "#, - ) - .chain_with(narrow_id); - - narrow_no_flags.legalize( - def!(a = iadd(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, c) = iadd_cout(xl, yl)), - def!(ah = iadd_cin(xh, yh, c)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_no_flags.legalize( - def!(a = isub(x, y)), - vec![ - def!((xl, xh) = isplit(x)), - def!((yl, yh) = isplit(y)), - def!((al, b) = isub_bout(xl, yl)), - def!(ah = isub_bin(xh, yh, b)), - def!(a = iconcat(al, ah)), - ], - ); - - narrow_no_flags.build_and_add_to(&mut groups); - - // TODO The order of declarations unfortunately matters to be compatible with the Python code. - // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls - // above. - widen.build_and_add_to(&mut groups); - - groups -} diff --git a/cranelift/codegen/meta/src/shared/mod.rs b/cranelift/codegen/meta/src/shared/mod.rs index b185262ccd..549ae7cf99 100644 --- a/cranelift/codegen/meta/src/shared/mod.rs +++ b/cranelift/codegen/meta/src/shared/mod.rs @@ -4,14 +4,12 @@ pub mod entities; pub mod formats; pub mod immediates; pub mod instructions; -pub mod legalize; pub mod settings; pub mod types; use crate::cdsl::formats::{FormatStructure, InstructionFormat}; -use crate::cdsl::instructions::{AllInstructions, InstructionGroup}; +use crate::cdsl::instructions::AllInstructions; use crate::cdsl::settings::SettingGroup; -use crate::cdsl::xform::TransformGroups; use crate::shared::entities::EntityRefs; use crate::shared::formats::Formats; @@ -24,11 +22,6 @@ use std::rc::Rc; pub(crate) struct Definitions { pub settings: SettingGroup, pub all_instructions: AllInstructions, - pub instructions: InstructionGroup, - pub imm: Immediates, - pub formats: Formats, - pub transform_groups: TransformGroups, - pub entities: EntityRefs, } pub(crate) fn define() -> Definitions { @@ -37,18 +30,11 @@ pub(crate) fn define() -> Definitions { let immediates = Immediates::new(); let entities = EntityRefs::new(); let formats = Formats::new(&immediates, &entities); - let instructions = - instructions::define(&mut all_instructions, &formats, &immediates, &entities); - let transform_groups = legalize::define(&instructions, &immediates); + instructions::define(&mut all_instructions, &formats, &immediates, &entities); Definitions { settings: settings::define(), all_instructions, - instructions, - imm: immediates, - formats, - transform_groups, - entities, } } diff --git a/cranelift/codegen/meta/src/srcgen.rs b/cranelift/codegen/meta/src/srcgen.rs index 0e8d4eccf0..21e3d5e904 100644 --- a/cranelift/codegen/meta/src/srcgen.rs +++ b/cranelift/codegen/meta/src/srcgen.rs @@ -77,15 +77,6 @@ impl Formatter { } } - /// Get a string containing whitespace outdented one level. Used for - /// lines of code that are inside a single indented block. - fn get_outdent(&mut self) -> String { - self.indent_pop(); - let s = self.get_indent(); - self.indent_push(); - s - } - /// Add an indented line. pub fn line(&mut self, contents: impl AsRef) { let indented_line = format!("{}{}\n", self.get_indent(), contents.as_ref()); @@ -97,12 +88,6 @@ impl Formatter { self.lines.push("\n".to_string()); } - /// Emit a line outdented one level. - pub fn outdented_line(&mut self, s: &str) { - let new_line = format!("{}{}\n", self.get_outdent(), s); - self.lines.push(new_line); - } - /// Write `self.lines` to a file. pub fn update_file( &self, diff --git a/cranelift/codegen/meta/src/unique_table.rs b/cranelift/codegen/meta/src/unique_table.rs index 65ef7e8b4a..50c664e4df 100644 --- a/cranelift/codegen/meta/src/unique_table.rs +++ b/cranelift/codegen/meta/src/unique_table.rs @@ -32,9 +32,6 @@ impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> { pub fn len(&self) -> usize { self.table.len() } - pub fn get(&self, index: usize) -> &T { - self.table[index] - } pub fn iter(&self) -> slice::Iter<&'entries T> { self.table.iter() } diff --git a/cranelift/codegen/shared/src/isa/mod.rs b/cranelift/codegen/shared/src/isa/mod.rs deleted file mode 100644 index 4d8e485f6c..0000000000 --- a/cranelift/codegen/shared/src/isa/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Shared ISA-specific definitions. - -pub mod x86; diff --git a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs deleted file mode 100644 index 9edb2a6e6f..0000000000 --- a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs +++ /dev/null @@ -1,419 +0,0 @@ -//! Provides a named interface to the `u16` Encoding bits. - -use std::ops::RangeInclusive; - -/// Named interface to the `u16` Encoding bits, representing an opcode. -/// -/// Cranelift requires each recipe to have a single encoding size in bytes. -/// X86 opcodes are variable length, so we use separate recipes for different -/// styles of opcodes and prefixes. The opcode format is indicated by the -/// recipe name prefix. -/// -/// VEX/XOP and EVEX prefixes are not yet supported. -/// Encodings using any of these prefixes are represented by separate recipes. -/// -/// The encoding bits are: -/// -/// 0-7: The opcode byte . -/// 8-9: pp, mandatory prefix: -/// 00: none (Op*) -/// 01: 66 (Mp*) -/// 10: F3 (Mp*) -/// 11: F2 (Mp*) -/// 10-11: mm, opcode map: -/// 00: (Op1/Mp1) -/// 01: 0F (Op2/Mp2) -/// 10: 0F 38 (Op3/Mp3) -/// 11: 0F 3A (Op3/Mp3) -/// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. -/// 15: REX.W bit (or VEX.W/E) -#[derive(Copy, Clone, PartialEq)] -pub struct EncodingBits(u16); -const OPCODE: RangeInclusive = 0..=7; -const OPCODE_PREFIX: RangeInclusive = 8..=11; // Includes pp and mm. -const RRR: RangeInclusive = 12..=14; -const REX_W: RangeInclusive = 15..=15; - -impl From for EncodingBits { - fn from(bits: u16) -> Self { - Self(bits) - } -} - -impl EncodingBits { - /// Constructs a new EncodingBits from parts. - pub fn new(op_bytes: &[u8], rrr: u16, rex_w: u16) -> Self { - assert!( - !op_bytes.is_empty(), - "op_bytes must include at least one opcode byte" - ); - let mut new = Self::from(0); - let last_byte = op_bytes[op_bytes.len() - 1]; - new.write(OPCODE, last_byte as u16); - let prefix: u8 = OpcodePrefix::from_opcode(op_bytes).into(); - new.write(OPCODE_PREFIX, prefix as u16); - new.write(RRR, rrr); - new.write(REX_W, rex_w); - new - } - - /// Returns a copy of the EncodingBits with the RRR bits set. - #[inline] - pub fn with_rrr(mut self, rrr: u8) -> Self { - debug_assert_eq!(self.rrr(), 0); - self.write(RRR, rrr.into()); - self - } - - /// Returns a copy of the EncodingBits with the REX.W bit set. - #[inline] - pub fn with_rex_w(mut self) -> Self { - debug_assert_eq!(self.rex_w(), 0); - self.write(REX_W, 1); - self - } - - /// Returns the raw bits. - #[inline] - pub fn bits(self) -> u16 { - self.0 - } - - /// Convenience method for writing bits to specific range. - #[inline] - fn write(&mut self, range: RangeInclusive, value: u16) { - assert!(ExactSizeIterator::len(&range) > 0); - let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. - let mask = (1 << size) - 1; // Generate a bit mask. - debug_assert!( - value <= mask, - "The written value should have fewer than {} bits.", - size - ); - let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask. - self.0 &= mask_complement; // Clear the bits in `range`. - let value = (value & mask) << *range.start(); // Place the value in the correct location. - self.0 |= value; // Modify the bits in `range`. - } - - /// Convenience method for reading bits from a specific range. - #[inline] - fn read(self, range: RangeInclusive) -> u8 { - assert!(ExactSizeIterator::len(&range) > 0); - let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. - debug_assert!(size <= 8, "This structure expects ranges of at most 8 bits"); - let mask = (1 << size) - 1; // Generate a bit mask. - ((self.0 >> *range.start()) & mask) as u8 - } - - /// Instruction opcode byte, without the prefix. - #[inline] - pub fn opcode_byte(self) -> u8 { - self.read(OPCODE) - } - - /// Prefix kind for the instruction, as an enum. - #[inline] - pub fn prefix(self) -> OpcodePrefix { - OpcodePrefix::from(self.read(OPCODE_PREFIX)) - } - - /// Extracts the PP bits of the OpcodePrefix. - #[inline] - pub fn pp(self) -> u8 { - self.prefix().to_primitive() & 0x3 - } - - /// Extracts the MM bits of the OpcodePrefix. - #[inline] - pub fn mm(self) -> u8 { - (self.prefix().to_primitive() >> 2) & 0x3 - } - - /// Bits for the ModR/M byte for certain opcodes. - #[inline] - pub fn rrr(self) -> u8 { - self.read(RRR) - } - - /// REX.W bit (or VEX.W/E). - #[inline] - pub fn rex_w(self) -> u8 { - self.read(REX_W) - } -} - -/// Opcode prefix representation. -/// -/// The prefix type occupies four of the EncodingBits. -#[allow(non_camel_case_types)] -#[allow(missing_docs)] -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum OpcodePrefix { - Op1, - Mp1_66, - Mp1_f3, - Mp1_f2, - Op2_0f, - Mp2_66_0f, - Mp2_f3_0f, - Mp2_f2_0f, - Op3_0f_38, - Mp3_66_0f_38, - Mp3_f3_0f_38, - Mp3_f2_0f_38, - Op3_0f_3a, - Mp3_66_0f_3a, - Mp3_f3_0f_3a, - Mp3_f2_0f_3a, -} - -impl From for OpcodePrefix { - fn from(n: u8) -> Self { - use OpcodePrefix::*; - match n { - 0b0000 => Op1, - 0b0001 => Mp1_66, - 0b0010 => Mp1_f3, - 0b0011 => Mp1_f2, - 0b0100 => Op2_0f, - 0b0101 => Mp2_66_0f, - 0b0110 => Mp2_f3_0f, - 0b0111 => Mp2_f2_0f, - 0b1000 => Op3_0f_38, - 0b1001 => Mp3_66_0f_38, - 0b1010 => Mp3_f3_0f_38, - 0b1011 => Mp3_f2_0f_38, - 0b1100 => Op3_0f_3a, - 0b1101 => Mp3_66_0f_3a, - 0b1110 => Mp3_f3_0f_3a, - 0b1111 => Mp3_f2_0f_3a, - _ => panic!("invalid opcode prefix"), - } - } -} - -impl Into for OpcodePrefix { - fn into(self) -> u8 { - use OpcodePrefix::*; - match self { - Op1 => 0b0000, - Mp1_66 => 0b0001, - Mp1_f3 => 0b0010, - Mp1_f2 => 0b0011, - Op2_0f => 0b0100, - Mp2_66_0f => 0b0101, - Mp2_f3_0f => 0b0110, - Mp2_f2_0f => 0b0111, - Op3_0f_38 => 0b1000, - Mp3_66_0f_38 => 0b1001, - Mp3_f3_0f_38 => 0b1010, - Mp3_f2_0f_38 => 0b1011, - Op3_0f_3a => 0b1100, - Mp3_66_0f_3a => 0b1101, - Mp3_f3_0f_3a => 0b1110, - Mp3_f2_0f_3a => 0b1111, - } - } -} - -impl OpcodePrefix { - /// Convert an opcode prefix to a `u8`; this is a convenience proxy for `Into`. - fn to_primitive(self) -> u8 { - self.into() - } - - /// Extracts the OpcodePrefix from the opcode. - pub fn from_opcode(op_bytes: &[u8]) -> Self { - assert!(!op_bytes.is_empty(), "at least one opcode byte"); - - let prefix_bytes = &op_bytes[..op_bytes.len() - 1]; - match prefix_bytes { - [] => Self::Op1, - [0x66] => Self::Mp1_66, - [0xf3] => Self::Mp1_f3, - [0xf2] => Self::Mp1_f2, - [0x0f] => Self::Op2_0f, - [0x66, 0x0f] => Self::Mp2_66_0f, - [0xf3, 0x0f] => Self::Mp2_f3_0f, - [0xf2, 0x0f] => Self::Mp2_f2_0f, - [0x0f, 0x38] => Self::Op3_0f_38, - [0x66, 0x0f, 0x38] => Self::Mp3_66_0f_38, - [0xf3, 0x0f, 0x38] => Self::Mp3_f3_0f_38, - [0xf2, 0x0f, 0x38] => Self::Mp3_f2_0f_38, - [0x0f, 0x3a] => Self::Op3_0f_3a, - [0x66, 0x0f, 0x3a] => Self::Mp3_66_0f_3a, - [0xf3, 0x0f, 0x3a] => Self::Mp3_f3_0f_3a, - [0xf2, 0x0f, 0x3a] => Self::Mp3_f2_0f_3a, - _ => { - panic!("unexpected opcode sequence: {:?}", op_bytes); - } - } - } - - /// Returns the recipe name prefix. - /// - /// At the moment, each similar OpcodePrefix group is given its own Recipe. - /// In order to distinguish them, this string is prefixed. - pub fn recipe_name_prefix(self) -> &'static str { - use OpcodePrefix::*; - match self { - Op1 => "Op1", - Op2_0f => "Op2", - Op3_0f_38 | Op3_0f_3a => "Op3", - Mp1_66 | Mp1_f3 | Mp1_f2 => "Mp1", - Mp2_66_0f | Mp2_f3_0f | Mp2_f2_0f => "Mp2", - Mp3_66_0f_38 | Mp3_f3_0f_38 | Mp3_f2_0f_38 => "Mp3", - Mp3_66_0f_3a | Mp3_f3_0f_3a | Mp3_f2_0f_3a => "Mp3", - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - /// Helper function for prefix_roundtrip() to avoid long lines. - fn test_roundtrip(p: OpcodePrefix) { - assert_eq!(p, OpcodePrefix::from(p.to_primitive())); - } - - /// Tests that to/from each opcode matches. - #[test] - fn prefix_roundtrip() { - test_roundtrip(OpcodePrefix::Op1); - test_roundtrip(OpcodePrefix::Mp1_66); - test_roundtrip(OpcodePrefix::Mp1_f3); - test_roundtrip(OpcodePrefix::Mp1_f2); - test_roundtrip(OpcodePrefix::Op2_0f); - test_roundtrip(OpcodePrefix::Mp2_66_0f); - test_roundtrip(OpcodePrefix::Mp2_f3_0f); - test_roundtrip(OpcodePrefix::Mp2_f2_0f); - test_roundtrip(OpcodePrefix::Op3_0f_38); - test_roundtrip(OpcodePrefix::Mp3_66_0f_38); - test_roundtrip(OpcodePrefix::Mp3_f3_0f_38); - test_roundtrip(OpcodePrefix::Mp3_f2_0f_38); - test_roundtrip(OpcodePrefix::Op3_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_66_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_f3_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_f2_0f_3a); - } - - #[test] - fn prefix_to_name() { - assert_eq!(OpcodePrefix::Op1.recipe_name_prefix(), "Op1"); - assert_eq!(OpcodePrefix::Op2_0f.recipe_name_prefix(), "Op2"); - assert_eq!(OpcodePrefix::Op3_0f_38.recipe_name_prefix(), "Op3"); - assert_eq!(OpcodePrefix::Mp1_66.recipe_name_prefix(), "Mp1"); - assert_eq!(OpcodePrefix::Mp2_66_0f.recipe_name_prefix(), "Mp2"); - assert_eq!(OpcodePrefix::Mp3_66_0f_3a.recipe_name_prefix(), "Mp3"); - } - - /// Tests that the opcode_byte is the lower of the EncodingBits. - #[test] - fn encodingbits_opcode_byte() { - let enc = EncodingBits::from(0x00ff); - assert_eq!(enc.opcode_byte(), 0xff); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - - let enc = EncodingBits::from(0x00cd); - assert_eq!(enc.opcode_byte(), 0xcd); - } - - /// Tests that the OpcodePrefix is encoded correctly. - #[test] - fn encodingbits_prefix() { - let enc = EncodingBits::from(0x0c00); - assert_eq!(enc.opcode_byte(), 0x00); - assert_eq!(enc.prefix().to_primitive(), 0xc); - assert_eq!(enc.prefix(), OpcodePrefix::Op3_0f_3a); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the PP bits are encoded correctly. - #[test] - fn encodingbits_pp() { - let enc = EncodingBits::from(0x0300); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.pp(), 0x3); - assert_eq!(enc.mm(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the MM bits are encoded correctly. - #[test] - fn encodingbits_mm() { - let enc = EncodingBits::from(0x0c00); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.pp(), 0x00); - assert_eq!(enc.mm(), 0x3); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the ModR/M bits are encoded correctly. - #[test] - fn encodingbits_rrr() { - let enc = EncodingBits::from(0x5000); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x5); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the REX.W bit is encoded correctly. - #[test] - fn encodingbits_rex_w() { - let enc = EncodingBits::from(0x8000); - assert_eq!(enc.opcode_byte(), 0x00); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x1); - } - - /// Tests setting and unsetting a bit using EncodingBits::write. - #[test] - fn encodingbits_flip() { - let mut bits = EncodingBits::from(0); - let range = 2..=2; - - bits.write(range.clone(), 1); - assert_eq!(bits.bits(), 0b100); - - bits.write(range, 0); - assert_eq!(bits.bits(), 0b000); - } - - /// Tests a round-trip of EncodingBits from/to a u16 (hardcoded endianness). - #[test] - fn encodingbits_roundtrip() { - let bits: u16 = 0x1234; - assert_eq!(EncodingBits::from(bits).bits(), bits); - } - - #[test] - // I purposely want to divide the bits using the ranges defined above. - #[allow(clippy::inconsistent_digit_grouping)] - fn encodingbits_construction() { - assert_eq!( - EncodingBits::new(&[0x66, 0x40], 5, 1).bits(), - 0b1_101_0001_01000000 // 1 = rex_w, 101 = rrr, 0001 = prefix, 01000000 = opcode - ); - } - - #[test] - #[should_panic] - fn encodingbits_panics_at_write_to_invalid_range() { - EncodingBits::from(0).write(1..=0, 42); - } - - #[test] - #[should_panic] - fn encodingbits_panics_at_read_to_invalid_range() { - EncodingBits::from(0).read(1..=0); - } -} diff --git a/cranelift/codegen/shared/src/isa/x86/mod.rs b/cranelift/codegen/shared/src/isa/x86/mod.rs deleted file mode 100644 index fb45ae56c3..0000000000 --- a/cranelift/codegen/shared/src/isa/x86/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Shared x86-specific definitions. - -mod encoding_bits; -pub use encoding_bits::*; diff --git a/cranelift/codegen/shared/src/lib.rs b/cranelift/codegen/shared/src/lib.rs index 9b4cb941ed..5af30f8698 100644 --- a/cranelift/codegen/shared/src/lib.rs +++ b/cranelift/codegen/shared/src/lib.rs @@ -19,10 +19,8 @@ ) )] -pub mod condcodes; pub mod constant_hash; pub mod constants; -pub mod isa; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/codegen/src/abi.rs b/cranelift/codegen/src/abi.rs deleted file mode 100644 index 883ec546e4..0000000000 --- a/cranelift/codegen/src/abi.rs +++ /dev/null @@ -1,270 +0,0 @@ -//! Common helper code for ABI lowering. -//! -//! This module provides functions and data structures that are useful for implementing the -//! `TargetIsa::legalize_signature()` method. - -use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type}; -use alloc::borrow::Cow; -use alloc::vec::Vec; -use core::cmp::Ordering; - -/// Legalization action to perform on a single argument or return value when converting a -/// signature. -/// -/// An argument may go through a sequence of legalization steps before it reaches the final -/// `Assign` action. -#[derive(Clone, Copy, Debug)] -pub enum ArgAction { - /// Assign the argument to the given location. - Assign(ArgumentLoc), - - /// Assign the argument to the given location and change the type to the specified type. - /// This is used by [`ArgumentPurpose::StructArgument`]. - AssignAndChangeType(ArgumentLoc, Type), - - /// Convert the argument, then call again. - /// - /// This action can split an integer type into two smaller integer arguments, or it can split a - /// SIMD vector into halves. - Convert(ValueConversion), -} - -impl From for ArgAction { - fn from(x: ArgumentLoc) -> Self { - Self::Assign(x) - } -} - -impl From for ArgAction { - fn from(x: ValueConversion) -> Self { - Self::Convert(x) - } -} - -/// Legalization action to be applied to a value that is being passed to or from a legalized ABI. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum ValueConversion { - /// Split an integer types into low and high parts, using `isplit`. - IntSplit, - - /// Split a vector type into halves with identical lane types, using `vsplit`. - VectorSplit, - - /// Bit-cast to an integer type of the same size. - IntBits, - - /// Sign-extend integer value to the required type. - Sext(Type), - - /// Unsigned zero-extend value to the required type. - Uext(Type), - - /// Pass value by pointer of given integer type. - Pointer(Type), -} - -impl ValueConversion { - /// Apply this conversion to a type, return the converted type. - pub fn apply(self, ty: Type) -> Type { - match self { - Self::IntSplit => ty.half_width().expect("Integer type too small to split"), - Self::VectorSplit => ty.half_vector().expect("Not a vector"), - Self::IntBits => Type::int(ty.bits()).expect("Bad integer size"), - Self::Sext(nty) | Self::Uext(nty) | Self::Pointer(nty) => nty, - } - } - - /// Is this a split conversion that results in two arguments? - pub fn is_split(self) -> bool { - match self { - Self::IntSplit | Self::VectorSplit => true, - _ => false, - } - } - - /// Is this a conversion to pointer? - pub fn is_pointer(self) -> bool { - match self { - Self::Pointer(_) => true, - _ => false, - } - } -} - -/// Common trait for assigning arguments to registers or stack locations. -/// -/// This will be implemented by individual ISAs. -pub trait ArgAssigner { - /// Pick an assignment action for function argument (or return value) `arg`. - fn assign(&mut self, arg: &AbiParam) -> ArgAction; -} - -/// Legalize the arguments in `args` using the given argument assigner. -/// -/// This function can be used for both arguments and return values. -pub fn legalize_args(args: &[AbiParam], aa: &mut AA) -> Option> { - let mut args = Cow::Borrowed(args); - - // Iterate over the arguments. - // We may need to mutate the vector in place, so don't use a normal iterator, and clone the - // argument to avoid holding a reference. - let mut argno = 0; - while let Some(arg) = args.get(argno).cloned() { - // Leave the pre-assigned arguments alone. - // We'll assume that they don't interfere with our assignments. - if arg.location.is_assigned() { - argno += 1; - continue; - } - - match aa.assign(&arg) { - // Assign argument to a location and move on to the next one. - ArgAction::Assign(loc) => { - args.to_mut()[argno].location = loc; - argno += 1; - } - // Assign argument to a location, change type to the requested one and move on to the - // next one. - ArgAction::AssignAndChangeType(loc, ty) => { - let arg = &mut args.to_mut()[argno]; - arg.location = loc; - arg.value_type = ty; - argno += 1; - } - // Split this argument into two smaller ones. Then revisit both. - ArgAction::Convert(conv) => { - debug_assert!( - !arg.legalized_to_pointer, - "No more conversions allowed after conversion to pointer" - ); - let value_type = conv.apply(arg.value_type); - args.to_mut()[argno].value_type = value_type; - if conv.is_pointer() { - args.to_mut()[argno].legalized_to_pointer = true; - } else if conv.is_split() { - let new_arg = AbiParam { value_type, ..arg }; - args.to_mut().insert(argno + 1, new_arg); - } - } - } - } - - match args { - Cow::Borrowed(_) => None, - Cow::Owned(a) => Some(a), - } -} - -/// Determine the right action to take when passing a `have` value type to a call signature where -/// the next argument is `arg` which has a different value type. -/// -/// The signature legalization process in `legalize_args` above can replace a single argument value -/// with multiple arguments of smaller types. It can also change the type of an integer argument to -/// a larger integer type, requiring the smaller value to be sign- or zero-extended. -/// -/// The legalizer needs to repair the values at all ABI boundaries: -/// -/// - Incoming function arguments to the entry block. -/// - Function arguments passed to a call. -/// - Return values from a call. -/// - Return values passed to a return instruction. -/// -/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer -/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value -/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type -/// for the argument. -/// -/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the -/// desired argument type appears. This will happen when a vector or integer type needs to be split -/// more than once, for example. -pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion { - let have_bits = have.bits(); - let arg_bits = arg.value_type.bits(); - - if arg.legalized_to_pointer { - return ValueConversion::Pointer(arg.value_type); - } - - match have_bits.cmp(&arg_bits) { - // We have fewer bits than the ABI argument. - Ordering::Less => { - debug_assert!( - have.is_int() && arg.value_type.is_int(), - "Can only extend integer values" - ); - match arg.extension { - ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type), - ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type), - _ => panic!("No argument extension specified"), - } - } - // We have the same number of bits as the argument. - Ordering::Equal => { - // This must be an integer vector that is split and then extended. - debug_assert!(arg.value_type.is_int()); - debug_assert!(have.is_vector(), "expected vector type, got {}", have); - ValueConversion::VectorSplit - } - // We have more bits than the argument. - Ordering::Greater => { - if have.is_vector() { - ValueConversion::VectorSplit - } else if have.is_float() { - // Convert a float to int so it can be split the next time. - // ARM would do this to pass an `f64` in two registers. - ValueConversion::IntBits - } else { - ValueConversion::IntSplit - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::ir::types; - use crate::ir::AbiParam; - - #[test] - fn legalize() { - let mut arg = AbiParam::new(types::I32); - - assert_eq!( - legalize_abi_value(types::I64X2, &arg), - ValueConversion::VectorSplit - ); - assert_eq!( - legalize_abi_value(types::I64, &arg), - ValueConversion::IntSplit - ); - - // Vector of integers is broken down, then sign-extended. - arg.extension = ArgumentExtension::Sext; - assert_eq!( - legalize_abi_value(types::I16X4, &arg), - ValueConversion::VectorSplit - ); - assert_eq!( - legalize_abi_value(types::I16.by(2).unwrap(), &arg), - ValueConversion::VectorSplit - ); - assert_eq!( - legalize_abi_value(types::I16, &arg), - ValueConversion::Sext(types::I32) - ); - - // 64-bit float is split as an integer. - assert_eq!( - legalize_abi_value(types::F64, &arg), - ValueConversion::IntBits - ); - - // Value is passed by reference - arg.legalized_to_pointer = true; - assert_eq!( - legalize_abi_value(types::F64, &arg), - ValueConversion::Pointer(types::I32) - ); - } -} diff --git a/cranelift/codegen/src/binemit/memorysink.rs b/cranelift/codegen/src/binemit/memorysink.rs index d50d1c10eb..dc86530b7c 100644 --- a/cranelift/codegen/src/binemit/memorysink.rs +++ b/cranelift/codegen/src/binemit/memorysink.rs @@ -15,9 +15,7 @@ //! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe. use super::{Addend, CodeInfo, CodeOffset, CodeSink, Reloc}; use crate::binemit::stack_map::StackMap; -use crate::ir::entities::Value; -use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode}; -use crate::isa::TargetIsa; +use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; use core::ptr::write_unaligned; /// A `CodeSink` that writes binary machine code directly into memory. @@ -38,7 +36,6 @@ pub struct MemoryCodeSink<'a> { offset: isize, relocs: &'a mut dyn RelocSink, traps: &'a mut dyn TrapSink, - stack_maps: &'a mut dyn StackMapSink, /// Information about the generated code and read-only data. pub info: CodeInfo, } @@ -54,7 +51,6 @@ impl<'a> MemoryCodeSink<'a> { data: *mut u8, relocs: &'a mut dyn RelocSink, traps: &'a mut dyn TrapSink, - stack_maps: &'a mut dyn StackMapSink, ) -> Self { Self { data, @@ -67,7 +63,6 @@ impl<'a> MemoryCodeSink<'a> { }, relocs, traps, - stack_maps, } } } @@ -84,12 +79,6 @@ pub trait RelocSink { _: Addend, ); - /// Add a relocation referencing a constant. - fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset); - - /// Add a relocation referencing a jump table. - fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable); - /// Track a call site whose return address is the given CodeOffset, for the given opcode. Does /// nothing in general, only useful for certain embedders (SpiderMonkey). fn add_call_site(&mut self, _: Opcode, _: CodeOffset, _: SourceLoc) {} @@ -146,16 +135,6 @@ impl<'a> CodeSink for MemoryCodeSink<'a> { self.relocs.reloc_external(ofs, srcloc, rel, name, addend); } - fn reloc_constant(&mut self, rel: Reloc, constant_offset: ConstantOffset) { - let ofs = self.offset(); - self.relocs.reloc_constant(ofs, rel, constant_offset); - } - - fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) { - let ofs = self.offset(); - self.relocs.reloc_jt(ofs, rel, jt); - } - fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) { let ofs = self.offset(); self.traps.trap(ofs, srcloc, code); @@ -174,12 +153,6 @@ impl<'a> CodeSink for MemoryCodeSink<'a> { self.info.total_size = self.offset(); } - fn add_stack_map(&mut self, val_list: &[Value], func: &Function, isa: &dyn TargetIsa) { - let ofs = self.offset(); - let stack_map = StackMap::from_values(&val_list, func, isa); - self.stack_maps.add_stack_map(ofs, stack_map); - } - fn add_call_site(&mut self, opcode: Opcode, loc: SourceLoc) { debug_assert!( opcode.is_call(), @@ -205,8 +178,6 @@ impl RelocSink for NullRelocSink { _: Addend, ) { } - fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset) {} - fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable) {} } /// A `TrapSink` implementation that does nothing, which is convenient when diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index 62602d5a88..9532e34538 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -4,23 +4,14 @@ //! binary machine code. mod memorysink; -mod relaxation; -mod shrink; mod stack_map; pub use self::memorysink::{ MemoryCodeSink, NullRelocSink, NullStackMapSink, NullTrapSink, RelocSink, StackMapSink, TrapSink, }; -pub use self::relaxation::relax_branches; -pub use self::shrink::shrink_instructions; pub use self::stack_map::StackMap; -use crate::ir::entities::Value; -use crate::ir::{ - ConstantOffset, ExternalName, Function, Inst, JumpTable, Opcode, SourceLoc, TrapCode, -}; -use crate::isa::TargetIsa; -pub use crate::regalloc::RegDiversions; +use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; use core::fmt; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -44,8 +35,6 @@ pub enum Reloc { Abs8, /// x86 PC-relative 4-byte X86PCRel4, - /// x86 PC-relative 4-byte offset to trailing rodata - X86PCRelRodata4, /// x86 call to PC-relative 4-byte X86CallPCRel4, /// x86 call to PLT-relative 4-byte @@ -58,8 +47,6 @@ pub enum Reloc { /// value is sign-extended, multiplied by 4, and added to the PC of /// the call instruction to form the destination address. Arm64Call, - /// RISC-V call target - RiscvCall, /// s390x PC-relative 4-byte offset S390xPCRel32Dbl, @@ -89,11 +76,10 @@ impl fmt::Display for Reloc { Self::Abs8 => write!(f, "Abs8"), Self::S390xPCRel32Dbl => write!(f, "PCRel32Dbl"), Self::X86PCRel4 => write!(f, "PCRel4"), - Self::X86PCRelRodata4 => write!(f, "PCRelRodata4"), Self::X86CallPCRel4 => write!(f, "CallPCRel4"), Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), - Self::Arm32Call | Self::Arm64Call | Self::RiscvCall => write!(f, "Call"), + Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), @@ -158,12 +144,6 @@ pub trait CodeSink { /// Add a relocation referencing an external symbol plus the addend at the current offset. fn reloc_external(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend); - /// Add a relocation referencing a constant. - fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset); - - /// Add a relocation referencing a jump table. - fn reloc_jt(&mut self, _: Reloc, _: JumpTable); - /// Add trap information for the current offset. fn trap(&mut self, _: TrapCode, _: SourceLoc); @@ -176,62 +156,8 @@ pub trait CodeSink { /// Read-only data output is complete, we're done. fn end_codegen(&mut self); - /// Add a stack map at the current code offset. - fn add_stack_map(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa); - /// Add a call site for a call with the given opcode, returning at the current offset. fn add_call_site(&mut self, _: Opcode, _: SourceLoc) { // Default implementation doesn't need to do anything. } } - -/// Report a bad encoding error. -#[cold] -pub fn bad_encoding(func: &Function, inst: Inst) -> ! { - panic!( - "Bad encoding {} for {}", - func.encodings[inst], - func.dfg.display_inst(inst, None) - ); -} - -/// Emit a function to `sink`, given an instruction emitter function. -/// -/// This function is called from the `TargetIsa::emit_function()` implementations with the -/// appropriate instruction emitter. -pub fn emit_function(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa) -where - CS: CodeSink, - EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa), -{ - let mut divert = RegDiversions::new(); - for block in func.layout.blocks() { - divert.at_block(&func.entry_diversions, block); - debug_assert_eq!(func.offsets[block], sink.offset()); - for inst in func.layout.block_insts(block) { - emit_inst(func, inst, &mut divert, sink, isa); - } - } - - sink.begin_jumptables(); - - // Output jump tables. - for (jt, jt_data) in func.jump_tables.iter() { - let jt_offset = func.jt_offsets[jt]; - for block in jt_data.iter() { - let rel_offset: i32 = func.offsets[*block] as i32 - jt_offset as i32; - sink.put4(rel_offset as u32) - } - } - - sink.begin_rodata(); - - // Output constants. - for (_, constant_data) in func.dfg.constants.iter() { - for byte in constant_data.iter() { - sink.put1(*byte) - } - } - - sink.end_codegen(); -} diff --git a/cranelift/codegen/src/binemit/relaxation.rs b/cranelift/codegen/src/binemit/relaxation.rs deleted file mode 100644 index 0657f878dd..0000000000 --- a/cranelift/codegen/src/binemit/relaxation.rs +++ /dev/null @@ -1,396 +0,0 @@ -//! Branch relaxation and offset computation. -//! -//! # block header offsets -//! -//! Before we can generate binary machine code for branch instructions, we need to know the final -//! offsets of all the block headers in the function. This information is encoded in the -//! `func.offsets` table. -//! -//! # Branch relaxation -//! -//! Branch relaxation is the process of ensuring that all branches in the function have enough -//! range to encode their destination. It is common to have multiple branch encodings in an ISA. -//! For example, x86 branches can have either an 8-bit or a 32-bit displacement. -//! -//! On RISC architectures, it can happen that conditional branches have a shorter range than -//! unconditional branches: -//! -//! ```clif -//! brz v1, block17 -//! ``` -//! -//! can be transformed into: -//! -//! ```clif -//! brnz v1, block23 -//! jump block17 -//! block23: -//! ``` - -use crate::binemit::{CodeInfo, CodeOffset}; -use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList}; -use crate::isa::{EncInfo, TargetIsa}; -use crate::iterators::IteratorExtras; -use crate::regalloc::RegDiversions; -use crate::timing; -use crate::CodegenResult; -use core::convert::TryFrom; - -/// Relax branches and compute the final layout of block headers in `func`. -/// -/// Fill in the `func.offsets` table so the function is ready for binary emission. -pub fn relax_branches( - func: &mut Function, - _cfg: &mut ControlFlowGraph, - _domtree: &mut DominatorTree, - isa: &dyn TargetIsa, -) -> CodegenResult { - let _tt = timing::relax_branches(); - - let encinfo = isa.encoding_info(); - - // Clear all offsets so we can recognize blocks that haven't been visited yet. - func.offsets.clear(); - func.offsets.resize(func.dfg.num_blocks()); - - // Start by removing redundant jumps. - fold_redundant_jumps(func, _cfg, _domtree); - - // Convert jumps to fallthrough instructions where possible. - fallthroughs(func); - - let mut offset = 0; - let mut divert = RegDiversions::new(); - - // First, compute initial offsets for every block. - { - let mut cur = FuncCursor::new(func); - while let Some(block) = cur.next_block() { - divert.at_block(&cur.func.entry_diversions, block); - cur.func.offsets[block] = offset; - while let Some(inst) = cur.next_inst() { - divert.apply(&cur.func.dfg[inst]); - let enc = cur.func.encodings[inst]; - offset += encinfo.byte_size(enc, inst, &divert, &cur.func); - } - } - } - - // Then, run the relaxation algorithm until it converges. - let mut go_again = true; - while go_again { - go_again = false; - offset = 0; - - // Visit all instructions in layout order. - let mut cur = FuncCursor::new(func); - while let Some(block) = cur.next_block() { - divert.at_block(&cur.func.entry_diversions, block); - - // Record the offset for `block` and make sure we iterate until offsets are stable. - if cur.func.offsets[block] != offset { - cur.func.offsets[block] = offset; - go_again = true; - } - - while let Some(inst) = cur.next_inst() { - divert.apply(&cur.func.dfg[inst]); - - let enc = cur.func.encodings[inst]; - - // See if this is a branch has a range and a destination, and if the target is in - // range. - if let Some(range) = encinfo.branch_range(enc) { - if let Some(dest) = cur.func.dfg[inst].branch_destination() { - let dest_offset = cur.func.offsets[dest]; - if !range.contains(offset, dest_offset) { - offset += - relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa); - continue; - } - } - } - - offset += encinfo.byte_size(enc, inst, &divert, &cur.func); - } - } - } - - let code_size = offset; - let jumptables = offset; - - for (jt, jt_data) in func.jump_tables.iter() { - func.jt_offsets[jt] = offset; - // TODO: this should be computed based on the min size needed to hold the furthest branch. - offset += jt_data.len() as u32 * 4; - } - - let jumptables_size = offset - jumptables; - let rodata = offset; - - for constant in func.dfg.constants.entries_mut() { - constant.set_offset(offset); - offset += - u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32") - } - - let rodata_size = offset - rodata; - - Ok(CodeInfo { - code_size, - jumptables_size, - rodata_size, - total_size: offset, - }) -} - -/// Folds an instruction if it is a redundant jump. -/// Returns whether folding was performed (which invalidates the CFG). -fn try_fold_redundant_jump( - func: &mut Function, - cfg: &mut ControlFlowGraph, - block: Block, - first_inst: Inst, -) -> bool { - let first_dest = match func.dfg[first_inst].branch_destination() { - Some(block) => block, // The instruction was a single-target branch. - None => { - return false; // The instruction was either multi-target or not a branch. - } - }; - - // For the moment, only attempt to fold a branch to a block that is parameterless. - // These blocks are mainly produced by critical edge splitting. - // - // TODO: Allow folding blocks that define SSA values and function as phi nodes. - if func.dfg.num_block_params(first_dest) != 0 { - return false; - } - - // Look at the first instruction of the first branch's destination. - // If it is an unconditional branch, maybe the second jump can be bypassed. - let second_inst = func.layout.first_inst(first_dest).expect("Instructions"); - if func.dfg[second_inst].opcode() != Opcode::Jump { - return false; - } - - // Now we need to fix up first_inst's block parameters to match second_inst's, - // without changing the branch-specific arguments. - // - // The intermediary block is allowed to reference any SSA value that dominates it, - // but that SSA value may not necessarily also dominate the instruction that's - // being patched. - - // Get the arguments and parameters passed by the first branch. - let num_fixed = func.dfg[first_inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let (first_args, first_params) = func.dfg[first_inst] - .arguments(&func.dfg.value_lists) - .split_at(num_fixed); - - // Get the parameters passed by the second jump. - let num_fixed = func.dfg[second_inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let (_, second_params) = func.dfg[second_inst] - .arguments(&func.dfg.value_lists) - .split_at(num_fixed); - let mut second_params = second_params.to_vec(); // Clone for rewriting below. - - // For each parameter passed by the second jump, if any of those parameters - // was a block parameter, rewrite it to refer to the value that the first jump - // passed in its parameters. Otherwise, make sure it dominates first_inst. - // - // For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`, - // we want to rewrite the original jump to `jump block2(v1)`. - let block_params: &[Value] = func.dfg.block_params(first_dest); - debug_assert!(block_params.len() == first_params.len()); - - for value in second_params.iter_mut() { - if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) { - // This value was the Nth parameter passed to the second_inst's block. - // Rewrite it as the Nth parameter passed by first_inst. - *value = first_params[n]; - } - } - - // Build a value list of first_args (unchanged) followed by second_params (rewritten). - let arguments_vec: alloc::vec::Vec<_> = first_args - .iter() - .chain(second_params.iter()) - .copied() - .collect(); - let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists); - - func.dfg[first_inst].take_value_list(); // Drop the current list. - func.dfg[first_inst].put_value_list(value_list); // Put the new list. - - // Bypass the second jump. - // This can disconnect the Block containing `second_inst`, to be cleaned up later. - let second_dest = func.dfg[second_inst].branch_destination().expect("Dest"); - func.change_branch_destination(first_inst, second_dest); - cfg.recompute_block(func, block); - - // The previously-intermediary Block may now be unreachable. Update CFG. - if cfg.pred_iter(first_dest).count() == 0 { - // Remove all instructions from that block. - while let Some(inst) = func.layout.first_inst(first_dest) { - func.layout.remove_inst(inst); - } - - // Remove the block... - cfg.recompute_block(func, first_dest); // ...from predecessor lists. - func.layout.remove_block(first_dest); // ...from the layout. - } - - true -} - -/// Redirects `jump` instructions that point to other `jump` instructions to the final destination. -/// This transformation may orphan some blocks. -fn fold_redundant_jumps( - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, -) { - let mut folded = false; - - // Postorder iteration guarantees that a chain of jumps is visited from - // the end of the chain to the start of the chain. - for &block in domtree.cfg_postorder() { - // Only proceed if the first terminator instruction is a single-target branch. - let first_inst = func - .layout - .last_inst(block) - .expect("Block has no terminator"); - folded |= try_fold_redundant_jump(func, cfg, block, first_inst); - - // Also try the previous instruction. - if let Some(prev_inst) = func.layout.prev_inst(first_inst) { - folded |= try_fold_redundant_jump(func, cfg, block, prev_inst); - } - } - - // Folding jumps invalidates the dominator tree. - if folded { - domtree.compute(func, cfg); - } -} - -/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any -/// existing `fallthrough` instructions are correct. -fn fallthroughs(func: &mut Function) { - for (block, succ) in func.layout.blocks().adjacent_pairs() { - let term = func - .layout - .last_inst(block) - .expect("block has no terminator."); - if let InstructionData::Jump { - ref mut opcode, - destination, - .. - } = func.dfg[term] - { - match *opcode { - Opcode::Fallthrough => { - // Somebody used a fall-through instruction before the branch relaxation pass. - // Make sure it is correct, i.e. the destination is the layout successor. - debug_assert_eq!( - destination, succ, - "Illegal fallthrough from {} to {}, but {}'s successor is {}", - block, destination, block, succ - ) - } - Opcode::Jump => { - // If this is a jump to the successor block, change it to a fall-through. - if destination == succ { - *opcode = Opcode::Fallthrough; - func.encodings[term] = Default::default(); - } - } - _ => {} - } - } - } -} - -/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`. -/// -/// Return the size of the replacement instructions up to and including the location where `cur` is -/// left. -fn relax_branch( - cur: &mut FuncCursor, - divert: &RegDiversions, - offset: CodeOffset, - dest_offset: CodeOffset, - encinfo: &EncInfo, - isa: &dyn TargetIsa, -) -> CodeOffset { - let inst = cur.current_inst().unwrap(); - log::trace!( - "Relaxing [{}] {} for {:#x}-{:#x} range", - encinfo.display(cur.func.encodings[inst]), - cur.func.dfg.display_inst(inst, isa), - offset, - dest_offset - ); - - // Pick the smallest encoding that can handle the branch range. - let dfg = &cur.func.dfg; - let ctrl_type = dfg.ctrl_typevar(inst); - if let Some(enc) = isa - .legal_encodings(cur.func, &dfg[inst], ctrl_type) - .filter(|&enc| { - let range = encinfo.branch_range(enc).expect("Branch with no range"); - if !range.contains(offset, dest_offset) { - log::trace!(" trying [{}]: out of range", encinfo.display(enc)); - false - } else if encinfo.operand_constraints(enc) - != encinfo.operand_constraints(cur.func.encodings[inst]) - { - // Conservatively give up if the encoding has different constraints - // than the original, so that we don't risk picking a new encoding - // which the existing operands don't satisfy. We can't check for - // validity directly because we don't have a RegDiversions active so - // we don't know which registers are actually in use. - log::trace!(" trying [{}]: constraints differ", encinfo.display(enc)); - false - } else { - log::trace!(" trying [{}]: OK", encinfo.display(enc)); - true - } - }) - .min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func)) - { - debug_assert!(enc != cur.func.encodings[inst]); - cur.func.encodings[inst] = enc; - return encinfo.byte_size(enc, inst, &divert, &cur.func); - } - - // Note: On some RISC ISAs, conditional branches have shorter range than unconditional - // branches, so one way of extending the range of a conditional branch is to invert its - // condition and make it branch over an unconditional jump which has the larger range. - // - // Splitting the block is problematic this late because there may be register diversions in - // effect across the conditional branch, and they can't survive the control flow edge to a new - // block. We have two options for handling that: - // - // 1. Set a flag on the new block that indicates it wants the preserve the register diversions of - // its layout predecessor, or - // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block. - // - // It seems that 1. would allow us to share code among RISC ISAs that need this. - // - // We can't allow register diversions to survive from the layout predecessor because the layout - // predecessor could contain kill points for some values that are live in this block, and - // diversions are not automatically cancelled when the live range of a value ends. - - // This assumes solution 2. above: - panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset); -} diff --git a/cranelift/codegen/src/binemit/shrink.rs b/cranelift/codegen/src/binemit/shrink.rs deleted file mode 100644 index 1e961c9829..0000000000 --- a/cranelift/codegen/src/binemit/shrink.rs +++ /dev/null @@ -1,72 +0,0 @@ -//! Instruction shrinking. -//! -//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially -//! chooses the largest one, because this typically provides the register allocator the most -//! flexibility. However, once register allocation is done, this is no longer important, and we -//! can switch to smaller encodings when possible. - -use crate::ir::instructions::InstructionData; -use crate::ir::Function; -use crate::isa::TargetIsa; -use crate::regalloc::RegDiversions; -use crate::timing; - -/// Pick the smallest valid encodings for instructions. -pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) { - let _tt = timing::shrink_instructions(); - - let encinfo = isa.encoding_info(); - let mut divert = RegDiversions::new(); - - for block in func.layout.blocks() { - // Load diversions from predecessors. - divert.at_block(&func.entry_diversions, block); - - for inst in func.layout.block_insts(block) { - let enc = func.encodings[inst]; - if enc.is_legal() { - // regmove/regfill/regspill are special instructions with register immediates - // that represented as normal operands, so the normal predicates below don't - // handle them correctly. - // - // Also, they need to be presented to the `RegDiversions` to update the - // location tracking. - // - // TODO: Eventually, we want the register allocator to avoid leaving these special - // instructions behind, but for now, just temporarily avoid trying to shrink them. - let inst_data = &func.dfg[inst]; - match inst_data { - InstructionData::RegMove { .. } - | InstructionData::RegFill { .. } - | InstructionData::RegSpill { .. } => { - divert.apply(inst_data); - continue; - } - _ => (), - } - - let ctrl_type = func.dfg.ctrl_typevar(inst); - - // Pick the last encoding with constraints that are satisfied. - let best_enc = isa - .legal_encodings(func, &func.dfg[inst], ctrl_type) - .filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func)) - .min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func)) - .unwrap(); - - if best_enc != enc { - func.encodings[inst] = best_enc; - - log::trace!( - "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}", - encinfo.display(enc), - encinfo.display(best_enc), - func.dfg.display_inst(inst, isa), - encinfo.byte_size(enc, inst, &divert, &func), - encinfo.byte_size(best_enc, inst, &divert, &func) - ); - } - } - } - } -} diff --git a/cranelift/codegen/src/binemit/stack_map.rs b/cranelift/codegen/src/binemit/stack_map.rs index c3055a0154..f9b99269ab 100644 --- a/cranelift/codegen/src/binemit/stack_map.rs +++ b/cranelift/codegen/src/binemit/stack_map.rs @@ -1,6 +1,4 @@ use crate::bitset::BitSet; -use crate::ir; -use crate::isa::TargetIsa; use alloc::vec::Vec; type Num = u32; @@ -76,57 +74,6 @@ pub struct StackMap { } impl StackMap { - /// Create a `StackMap` based on where references are located on a - /// function's stack. - pub fn from_values( - args: &[ir::entities::Value], - func: &ir::Function, - isa: &dyn TargetIsa, - ) -> Self { - let loc = &func.locations; - let mut live_ref_in_stack_slot = crate::HashSet::new(); - // References can be in registers, and live registers values are pushed onto the stack before calls and traps. - // TODO: Implement register maps. If a register containing a reference is spilled and reused after a safepoint, - // it could contain a stale reference value if the garbage collector relocated the value. - for val in args { - if let Some(value_loc) = loc.get(*val) { - match *value_loc { - ir::ValueLoc::Stack(stack_slot) => { - live_ref_in_stack_slot.insert(stack_slot); - } - _ => {} - } - } - } - - let stack = &func.stack_slots; - let info = func.stack_slots.layout_info.unwrap(); - - // Refer to the doc comment for `StackMap` above to understand the - // bitmap representation used here. - let map_size = (info.frame_size + info.inbound_args_size) as usize; - let word_size = isa.pointer_bytes() as usize; - let num_words = map_size / word_size; - - let mut vec = alloc::vec::Vec::with_capacity(num_words); - vec.resize(num_words, false); - - for (ss, ssd) in stack.iter() { - if !live_ref_in_stack_slot.contains(&ss) - || ssd.kind == ir::stackslot::StackSlotKind::OutgoingArg - { - continue; - } - - debug_assert!(ssd.size as usize == word_size); - let bytes_from_bottom = info.frame_size as i32 + ssd.offset.unwrap(); - let words_from_bottom = (bytes_from_bottom as usize) / word_size; - vec[words_from_bottom] = true; - } - - Self::from_slice(&vec) - } - /// Create a vec of Bitsets from a slice of bools. pub fn from_slice(vec: &[bool]) -> Self { let len = vec.len(); diff --git a/cranelift/codegen/src/cfg_printer.rs b/cranelift/codegen/src/cfg_printer.rs index 4ceae473b2..843b66f277 100644 --- a/cranelift/codegen/src/cfg_printer.rs +++ b/cranelift/codegen/src/cfg_printer.rs @@ -51,11 +51,11 @@ impl<'a> CFGPrinter<'a> { for block in &self.func.layout { write!(w, " {} [shape=record, label=\"{{", block)?; - crate::write::write_block_header(w, self.func, None, block, 4)?; + crate::write::write_block_header(w, self.func, block, 4)?; // Add all outgoing branch instructions to the label. for inst in self.func.layout.block_likely_branches(block) { write!(w, " | <{}>", inst)?; - PlainWriter.write_instruction(w, self.func, &aliases, None, inst, 0)?; + PlainWriter.write_instruction(w, self.func, &aliases, inst, 0)?; } writeln!(w, "}}\"]")? } diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 8214fc7781..f7cfb8c0c7 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -9,24 +9,17 @@ //! contexts concurrently. Typically, you would have one context per compilation thread and only a //! single ISA instance. -use crate::binemit::{ - relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, StackMapSink, - TrapSink, -}; +use crate::binemit::{CodeInfo, MemoryCodeSink, RelocSink, StackMapSink, TrapSink}; use crate::dce::do_dce; use crate::dominator_tree::DominatorTree; use crate::flowgraph::ControlFlowGraph; use crate::ir::Function; use crate::isa::TargetIsa; -use crate::legalize_function; use crate::legalizer::simple_legalize; use crate::licm::do_licm; use crate::loop_analysis::LoopAnalysis; use crate::machinst::{MachCompileResult, MachStackMap}; use crate::nan_canonicalization::do_nan_canonicalization; -use crate::postopt::do_postopt; -use crate::redundant_reload_remover::RedundantReloadRemover; -use crate::regalloc; use crate::remove_constant_phis::do_remove_constant_phis; use crate::result::CodegenResult; use crate::settings::{FlagsOrIsa, OptLevel}; @@ -34,8 +27,7 @@ use crate::simple_gvn::do_simple_gvn; use crate::simple_preopt::do_preopt; use crate::timing; use crate::unreachable_code::eliminate_unreachable_code; -use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges}; -use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult}; +use crate::verifier::{verify_context, VerifierErrors, VerifierResult}; #[cfg(feature = "souper-harvest")] use alloc::string::String; use alloc::vec::Vec; @@ -54,15 +46,9 @@ pub struct Context { /// Dominator tree for `func`. pub domtree: DominatorTree, - /// Register allocation context. - pub regalloc: regalloc::Context, - /// Loop analysis of `func`. pub loop_analysis: LoopAnalysis, - /// Redundant-reload remover context. - pub redundant_reload_remover: RedundantReloadRemover, - /// Result of MachBackend compilation, if computed. pub mach_compile_result: Option, @@ -88,9 +74,7 @@ impl Context { func, cfg: ControlFlowGraph::new(), domtree: DominatorTree::new(), - regalloc: regalloc::Context::new(), loop_analysis: LoopAnalysis::new(), - redundant_reload_remover: RedundantReloadRemover::new(), mach_compile_result: None, want_disasm: false, } @@ -101,9 +85,7 @@ impl Context { self.func.clear(); self.cfg.clear(); self.domtree.clear(); - self.regalloc.clear(); self.loop_analysis.clear(); - self.redundant_reload_remover.clear(); self.mach_compile_result = None; self.want_disasm = false; } @@ -137,13 +119,7 @@ impl Context { let old_len = mem.len(); mem.resize(old_len + info.total_size as usize, 0); let new_info = unsafe { - self.emit_to_memory( - isa, - mem.as_mut_ptr().add(old_len), - relocs, - traps, - stack_maps, - ) + self.emit_to_memory(mem.as_mut_ptr().add(old_len), relocs, traps, stack_maps) }; debug_assert!(new_info == info); Ok(info) @@ -164,7 +140,7 @@ impl Context { log::debug!( "Compiling (opt level {:?}):\n{}", opt_level, - self.func.display(isa) + self.func.display() ); self.compute_cfg(); @@ -177,7 +153,6 @@ impl Context { self.legalize(isa)?; if opt_level != OptLevel::None { - self.postopt(isa)?; self.compute_domtree(); self.compute_loop_analysis(); self.licm(isa)?; @@ -192,25 +167,12 @@ impl Context { self.remove_constant_phis(isa)?; - if let Some(backend) = isa.get_mach_backend() { - let result = backend.compile_function(&self.func, self.want_disasm)?; - let info = result.code_info(); - self.mach_compile_result = Some(result); - Ok(info) - } else { - self.regalloc(isa)?; - self.prologue_epilogue(isa)?; - if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize { - self.redundant_reload_remover(isa)?; - } - if opt_level == OptLevel::SpeedAndSize { - self.shrink_instructions(isa)?; - } - let result = self.relax_branches(isa); - - log::trace!("Compiled:\n{}", self.func.display(isa)); - result - } + // FIXME: make this non optional + let backend = isa.get_mach_backend().expect("only mach backends nowadays"); + let result = backend.compile_function(&self.func, self.want_disasm)?; + let info = result.code_info(); + self.mach_compile_result = Some(result); + Ok(info) } /// Emit machine code directly into raw memory. @@ -228,33 +190,31 @@ impl Context { /// Returns information about the emitted code and data. pub unsafe fn emit_to_memory( &self, - isa: &dyn TargetIsa, mem: *mut u8, relocs: &mut dyn RelocSink, traps: &mut dyn TrapSink, stack_maps: &mut dyn StackMapSink, ) -> CodeInfo { let _tt = timing::binemit(); - let mut sink = MemoryCodeSink::new(mem, relocs, traps, stack_maps); - if let Some(ref result) = &self.mach_compile_result { - result.buffer.emit(&mut sink); - let info = sink.info; - // New backends do not emit StackMaps through the `CodeSink` because its interface - // requires `Value`s; instead, the `StackMap` objects are directly accessible via - // `result.buffer.stack_maps()`. - for &MachStackMap { - offset_end, - ref stack_map, - .. - } in result.buffer.stack_maps() - { - stack_maps.add_stack_map(offset_end, stack_map.clone()); - } - info - } else { - isa.emit_function_to_memory(&self.func, &mut sink); - sink.info + let mut sink = MemoryCodeSink::new(mem, relocs, traps); + let result = self + .mach_compile_result + .as_ref() + .expect("only using mach backend now"); + result.buffer.emit(&mut sink); + let info = sink.info; + // New backends do not emit StackMaps through the `CodeSink` because its interface + // requires `Value`s; instead, the `StackMap` objects are directly accessible via + // `result.buffer.stack_maps()`. + for &MachStackMap { + offset_end, + ref stack_map, + .. + } in result.buffer.stack_maps() + { + stack_maps.add_stack_map(offset_end, stack_map.clone()); } + info } /// If available, return information about the code layout in the @@ -314,26 +274,6 @@ impl Context { Ok(()) } - /// Run the locations verifier on the function. - pub fn verify_locations(&self, isa: &dyn TargetIsa) -> VerifierResult<()> { - let mut errors = VerifierErrors::default(); - let _ = verify_locations(isa, &self.func, &self.cfg, None, &mut errors); - - if errors.is_empty() { - Ok(()) - } else { - Err(errors) - } - } - - /// Run the locations verifier only if the `enable_verifier` setting is true. - pub fn verify_locations_if(&self, isa: &dyn TargetIsa) -> CodegenResult<()> { - if isa.flags().enable_verifier() { - self.verify_locations(isa)?; - } - Ok(()) - } - /// Perform dead-code elimination on the function. pub fn dce<'a, FOI: Into>>(&mut self, fisa: FOI) -> CodegenResult<()> { do_dce(&mut self.func, &mut self.domtree); @@ -370,22 +310,10 @@ impl Context { // TODO: Avoid doing this when legalization doesn't actually mutate the CFG. self.domtree.clear(); self.loop_analysis.clear(); - if isa.get_mach_backend().is_some() { - // Run some specific legalizations only. - simple_legalize(&mut self.func, &mut self.cfg, isa); - self.verify_if(isa) - } else { - legalize_function(&mut self.func, &mut self.cfg, isa); - log::trace!("Legalized:\n{}", self.func.display(isa)); - self.verify_if(isa) - } - } - /// Perform post-legalization rewrites on the function. - pub fn postopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - do_postopt(&mut self.func, isa); - self.verify_if(isa)?; - Ok(()) + // Run some specific legalizations only. + simple_legalize(&mut self.func, &mut self.cfg, isa); + self.verify_if(isa) } /// Compute the control flow graph. @@ -419,7 +347,6 @@ impl Context { /// Perform LICM on the function. pub fn licm(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { do_licm( - isa, &mut self.func, &mut self.cfg, &mut self.domtree, @@ -437,58 +364,6 @@ impl Context { self.verify_if(fisa) } - /// Run the register allocator. - pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - self.regalloc - .run(isa, &mut self.func, &mut self.cfg, &mut self.domtree) - } - - /// Insert prologue and epilogues after computing the stack frame layout. - pub fn prologue_epilogue(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - isa.prologue_epilogue(&mut self.func)?; - self.verify_if(isa)?; - self.verify_locations_if(isa)?; - Ok(()) - } - - /// Do redundant-reload removal after allocation of both registers and stack slots. - pub fn redundant_reload_remover(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - self.redundant_reload_remover - .run(isa, &mut self.func, &self.cfg); - self.verify_if(isa)?; - Ok(()) - } - - /// Run the instruction shrinking pass. - pub fn shrink_instructions(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - shrink_instructions(&mut self.func, isa); - self.verify_if(isa)?; - self.verify_locations_if(isa)?; - Ok(()) - } - - /// Run the branch relaxation pass and return information about the function's code and - /// read-only data. - pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult { - let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?; - self.verify_if(isa)?; - self.verify_locations_if(isa)?; - Ok(info) - } - - /// Builds ranges and location for specified value labels. - pub fn build_value_labels_ranges( - &self, - isa: &dyn TargetIsa, - ) -> CodegenResult { - Ok(build_value_labels_ranges::( - &self.func, - &self.regalloc, - self.mach_compile_result.as_ref(), - isa, - )) - } - /// Harvest candidate left-hand sides for superoptimization with Souper. #[cfg(feature = "souper-harvest")] pub fn souper_harvest( diff --git a/cranelift/codegen/src/cursor.rs b/cranelift/codegen/src/cursor.rs index c95aa5a3fb..2dc8ce7a2b 100644 --- a/cranelift/codegen/src/cursor.rs +++ b/cranelift/codegen/src/cursor.rs @@ -3,7 +3,6 @@ //! This module defines cursor data types that can be used for inserting instructions. use crate::ir; -use crate::isa::TargetIsa; /// The possible positions of a cursor. #[derive(Clone, Copy, PartialEq, Eq, Debug)] @@ -634,7 +633,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> { &mut self.func.dfg } - fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph { + fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph { // TODO: Remove this assertion once #796 is fixed. #[cfg(debug_assertions)] { @@ -664,152 +663,3 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> { &mut self.func.dfg } } - -/// Encoding cursor. -/// -/// An `EncCursor` can be used to insert instructions that are immediately assigned an encoding. -/// The cursor holds a mutable reference to the whole function which can be re-borrowed from the -/// public `pos.func` member. -pub struct EncCursor<'f> { - pos: CursorPosition, - srcloc: ir::SourceLoc, - built_inst: Option, - - /// The referenced function. - pub func: &'f mut ir::Function, - - /// The target ISA that will be used to encode instructions. - pub isa: &'f dyn TargetIsa, -} - -impl<'f> EncCursor<'f> { - /// Create a new `EncCursor` pointing nowhere. - pub fn new(func: &'f mut ir::Function, isa: &'f dyn TargetIsa) -> Self { - Self { - pos: CursorPosition::Nowhere, - srcloc: Default::default(), - built_inst: None, - func, - isa, - } - } - - /// Use the source location of `inst` for future instructions. - pub fn use_srcloc(&mut self, inst: ir::Inst) { - self.srcloc = self.func.srclocs[inst]; - } - - /// Create an instruction builder that will insert an encoded instruction at the current - /// position. - /// - /// The builder will panic if it is used to insert an instruction that can't be encoded for - /// `self.isa`. - pub fn ins(&mut self) -> ir::InsertBuilder<&mut EncCursor<'f>> { - ir::InsertBuilder::new(self) - } - - /// Get the last built instruction. - /// - /// This returns the last instruction that was built using the `ins()` method on this cursor. - /// Panics if no instruction was built. - pub fn built_inst(&self) -> ir::Inst { - self.built_inst.expect("No instruction was inserted") - } - - /// Return an object that can display `inst`. - /// - /// This is a convenience wrapper for the DFG equivalent. - pub fn display_inst(&self, inst: ir::Inst) -> ir::dfg::DisplayInst { - self.func.dfg.display_inst(inst, self.isa) - } -} - -impl<'f> Cursor for EncCursor<'f> { - fn position(&self) -> CursorPosition { - self.pos - } - - fn set_position(&mut self, pos: CursorPosition) { - self.pos = pos - } - - fn srcloc(&self) -> ir::SourceLoc { - self.srcloc - } - - fn set_srcloc(&mut self, srcloc: ir::SourceLoc) { - self.srcloc = srcloc; - } - - fn layout(&self) -> &ir::Layout { - &self.func.layout - } - - fn layout_mut(&mut self) -> &mut ir::Layout { - &mut self.func.layout - } -} - -impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> { - fn data_flow_graph(&self) -> &ir::DataFlowGraph { - &self.func.dfg - } - - fn data_flow_graph_mut(&mut self) -> &mut ir::DataFlowGraph { - &mut self.func.dfg - } - - fn insert_built_inst( - self, - inst: ir::Inst, - ctrl_typevar: ir::Type, - ) -> &'c mut ir::DataFlowGraph { - // TODO: Remove this assertion once #796 is fixed. - #[cfg(debug_assertions)] - { - if let CursorPosition::At(_) = self.position() { - if let Some(curr) = self.current_inst() { - if let Some(prev) = self.layout().prev_inst(curr) { - let prev_op = self.data_flow_graph()[prev].opcode(); - let inst_op = self.data_flow_graph()[inst].opcode(); - if prev_op.is_branch() - && !prev_op.is_terminator() - && !inst_op.is_terminator() - { - panic!( - "Inserting instruction {} after {} and before {}", - self.display_inst(inst), - self.display_inst(prev), - self.display_inst(curr) - ) - } - }; - }; - }; - } - // Insert the instruction and remember the reference. - self.insert_inst(inst); - self.built_inst = Some(inst); - - if !self.srcloc.is_default() { - self.func.srclocs[inst] = self.srcloc; - } - - // Skip the encoding update if we're using a new (MachInst) backend; encodings come later, - // during lowering. - if self.isa.get_mach_backend().is_none() { - // Assign an encoding. - // XXX Is there a way to describe this error to the user? - #[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))] - match self - .isa - .encode(&self.func, &self.func.dfg[inst], ctrl_typevar) - { - Ok(e) => self.func.encodings[inst] = e, - Err(_) => panic!("can't encode {}", self.display_inst(inst)), - } - } - - &mut self.func.dfg - } -} diff --git a/cranelift/codegen/src/data_value.rs b/cranelift/codegen/src/data_value.rs index 965ce1bec5..13aa237674 100644 --- a/cranelift/codegen/src/data_value.rs +++ b/cranelift/codegen/src/data_value.rs @@ -86,6 +86,7 @@ impl DataValue { DataValue::I16(i) => dst[..2].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::I32(i) => dst[..4].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::I64(i) => dst[..8].copy_from_slice(&i.to_ne_bytes()[..]), + DataValue::I128(i) => dst[..16].copy_from_slice(&i.to_ne_bytes()[..]), DataValue::F32(f) => dst[..4].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::F64(f) => dst[..8].copy_from_slice(&f.bits().to_ne_bytes()[..]), DataValue::V128(v) => dst[..16].copy_from_slice(&v[..]), @@ -104,6 +105,7 @@ impl DataValue { types::I16 => DataValue::I16(i16::from_ne_bytes(src[..2].try_into().unwrap())), types::I32 => DataValue::I32(i32::from_ne_bytes(src[..4].try_into().unwrap())), types::I64 => DataValue::I64(i64::from_ne_bytes(src[..8].try_into().unwrap())), + types::I128 => DataValue::I128(i128::from_ne_bytes(src[..16].try_into().unwrap())), types::F32 => DataValue::F32(Ieee32::with_bits(u32::from_ne_bytes( src[..4].try_into().unwrap(), ))), diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 2160c83e4f..518487af2b 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -78,10 +78,3 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option { _ => None, } } - -/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the -/// embedding, and so requires reftyped values to be enumerated with a stack map)? -pub fn is_safepoint(func: &Function, inst: Inst) -> bool { - let op = func.dfg[inst].opcode(); - op.is_resumable_trap() || op.is_call() -} diff --git a/cranelift/codegen/src/ir/builder.rs b/cranelift/codegen/src/ir/builder.rs index 63054928f2..3191f9dae1 100644 --- a/cranelift/codegen/src/ir/builder.rs +++ b/cranelift/codegen/src/ir/builder.rs @@ -7,7 +7,6 @@ use crate::ir; use crate::ir::types; use crate::ir::{DataFlowGraph, InstructionData}; use crate::ir::{Inst, Opcode, Type, Value}; -use crate::isa; /// Base trait for instruction builders. /// @@ -56,7 +55,7 @@ pub trait InstInserterBase<'f>: Sized { fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph; /// Insert a new instruction which belongs to the DFG. - fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph; + fn insert_built_inst(self, inst: Inst) -> &'f mut DataFlowGraph; } use core::marker::PhantomData; @@ -129,7 +128,7 @@ impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, II inst = dfg.make_inst(data); dfg.make_inst_results(inst, ctrl_typevar); } - (inst, self.inserter.insert_built_inst(inst, ctrl_typevar)) + (inst, self.inserter.insert_built_inst(inst)) } } @@ -166,7 +165,7 @@ where let ru = self.reuse.as_ref().iter().cloned(); dfg.make_inst_results_reusing(inst, ctrl_typevar, ru); } - (inst, self.inserter.insert_built_inst(inst, ctrl_typevar)) + (inst, self.inserter.insert_built_inst(inst)) } } diff --git a/cranelift/codegen/shared/src/condcodes.rs b/cranelift/codegen/src/ir/condcodes.rs similarity index 100% rename from cranelift/codegen/shared/src/condcodes.rs rename to cranelift/codegen/src/ir/condcodes.rs diff --git a/cranelift/codegen/src/ir/constant.rs b/cranelift/codegen/src/ir/constant.rs index a9aa5d3a64..3cd88d5546 100644 --- a/cranelift/codegen/src/ir/constant.rs +++ b/cranelift/codegen/src/ir/constant.rs @@ -167,38 +167,6 @@ impl FromStr for ConstantData { } } -/// This type describes an offset in bytes within a constant pool. -pub type ConstantOffset = u32; - -/// Inner type for storing data and offset together in the constant pool. The offset is optional -/// because it must be set relative to the function code size (i.e. constants are emitted after the -/// function body); because the function is not yet compiled when constants are inserted, -/// [`set_offset`](crate::ir::ConstantPool::set_offset) must be called once a constant's offset -/// from the beginning of the function is known (see -/// `relaxation` in `relaxation.rs`). -#[derive(Clone)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct ConstantPoolEntry { - data: ConstantData, - offset: Option, -} - -impl ConstantPoolEntry { - fn new(data: ConstantData) -> Self { - Self { data, offset: None } - } - - /// Return the size of the constant at this entry. - pub fn len(&self) -> usize { - self.data.len() - } - - /// Assign a new offset to the constant at this entry. - pub fn set_offset(&mut self, offset: ConstantOffset) { - self.offset = Some(offset) - } -} - /// Maintains the mapping between a constant handle (i.e. [`Constant`](crate::ir::Constant)) and /// its constant data (i.e. [`ConstantData`](crate::ir::ConstantData)). #[derive(Clone)] @@ -206,7 +174,7 @@ impl ConstantPoolEntry { pub struct ConstantPool { /// This mapping maintains the insertion order as long as Constants are created with /// sequentially increasing integers. - handles_to_values: BTreeMap, + handles_to_values: BTreeMap, /// This mapping is unordered (no need for lexicographic ordering) but allows us to map /// constant data back to handles. @@ -244,64 +212,34 @@ impl ConstantPool { /// Retrieve the constant data given a handle. pub fn get(&self, constant_handle: Constant) -> &ConstantData { assert!(self.handles_to_values.contains_key(&constant_handle)); - &self.handles_to_values.get(&constant_handle).unwrap().data + self.handles_to_values.get(&constant_handle).unwrap() } /// Link a constant handle to its value. This does not de-duplicate data but does avoid /// replacing any existing constant values. use `set` to tie a specific `const42` to its value; /// use `insert` to add a value and return the next available `const` entity. pub fn set(&mut self, constant_handle: Constant, constant_value: ConstantData) { - let replaced = self.handles_to_values.insert( - constant_handle, - ConstantPoolEntry::new(constant_value.clone()), - ); + let replaced = self + .handles_to_values + .insert(constant_handle, constant_value.clone()); assert!( replaced.is_none(), "attempted to overwrite an existing constant {:?}: {:?} => {:?}", constant_handle, &constant_value, - replaced.unwrap().data + replaced.unwrap() ); self.values_to_handles .insert(constant_value, constant_handle); } - /// Assign an offset to a given constant, where the offset is the number of bytes from the - /// beginning of the function to the beginning of the constant data inside the pool. - pub fn set_offset(&mut self, constant_handle: Constant, constant_offset: ConstantOffset) { - assert!( - self.handles_to_values.contains_key(&constant_handle), - "A constant handle must have already been inserted into the pool; perhaps a \ - constant pool was created outside of the pool?" - ); - self.handles_to_values - .entry(constant_handle) - .and_modify(|e| e.offset = Some(constant_offset)); - } - - /// Retrieve the offset of a given constant, where the offset is the number of bytes from the - /// beginning of the function to the beginning of the constant data inside the pool. - pub fn get_offset(&self, constant_handle: Constant) -> ConstantOffset { - self.handles_to_values - .get(&constant_handle) - .expect( - "A constant handle must have a corresponding constant value; was a constant \ - handle created outside of the pool?", - ) - .offset - .expect( - "A constant offset has not yet been set; verify that `set_offset` has been \ - called before this point", - ) - } - /// Iterate over the constants in insertion order. pub fn iter(&self) -> impl Iterator { - self.handles_to_values.iter().map(|(h, e)| (h, &e.data)) + self.handles_to_values.iter() } /// Iterate over mutable entries in the constant pool in insertion order. - pub fn entries_mut(&mut self) -> impl Iterator { + pub fn entries_mut(&mut self) -> impl Iterator { self.handles_to_values.values_mut() } @@ -398,22 +336,6 @@ mod tests { sut.get(a); // panics, only use constants returned by ConstantPool } - #[test] - fn get_offset() { - let mut sut = ConstantPool::new(); - let a = sut.insert(vec![1].into()); - sut.set_offset(a, 42); - assert_eq!(sut.get_offset(a), 42) - } - - #[test] - #[should_panic] - fn get_nonexistent_offset() { - let mut sut = ConstantPool::new(); - let a = sut.insert(vec![1].into()); - sut.get_offset(a); // panics, set_offset should have been called - } - #[test] fn display_constant_data() { assert_eq!(ConstantData::from([0].as_ref()).to_string(), "0x00"); diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index f64cf896c4..91f3711f57 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -10,7 +10,6 @@ use crate::ir::{ Block, FuncRef, Inst, SigRef, Signature, SourceLoc, Type, Value, ValueLabelAssignments, ValueList, ValueListPool, }; -use crate::isa::TargetIsa; use crate::packed_option::ReservedValue; use crate::write::write_operands; use crate::HashMap; @@ -466,12 +465,8 @@ impl DataFlowGraph { } /// Returns an object that displays `inst`. - pub fn display_inst<'a, I: Into>>( - &'a self, - inst: Inst, - isa: I, - ) -> DisplayInst<'a> { - DisplayInst(self, isa.into(), inst) + pub fn display_inst<'a>(&'a self, inst: Inst) -> DisplayInst<'a> { + DisplayInst(self, inst) } /// Get all value arguments on `inst` as a slice. @@ -657,7 +652,7 @@ impl DataFlowGraph { old_value, "{} wasn't detached from {}", old_value, - self.display_inst(inst, None) + self.display_inst(inst) ); new_value } @@ -963,13 +958,12 @@ impl BlockData { } /// Object that can display an instruction. -pub struct DisplayInst<'a>(&'a DataFlowGraph, Option<&'a dyn TargetIsa>, Inst); +pub struct DisplayInst<'a>(&'a DataFlowGraph, Inst); impl<'a> fmt::Display for DisplayInst<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let dfg = self.0; - let isa = self.1; - let inst = self.2; + let inst = self.1; if let Some((first, rest)) = dfg.inst_results(inst).split_first() { write!(f, "{}", first)?; @@ -985,7 +979,7 @@ impl<'a> fmt::Display for DisplayInst<'a> { } else { write!(f, "{}.{}", dfg[inst].opcode(), typevar)?; } - write_operands(f, dfg, isa, inst) + write_operands(f, dfg, inst) } } @@ -1150,10 +1144,7 @@ mod tests { dfg.make_inst_results(inst, types::I32); assert_eq!(inst.to_string(), "inst0"); - assert_eq!( - dfg.display_inst(inst, None).to_string(), - "v0 = iconst.i32 0" - ); + assert_eq!(dfg.display_inst(inst).to_string(), "v0 = iconst.i32 0"); // Immutable reference resolution. { @@ -1188,7 +1179,7 @@ mod tests { code: TrapCode::User(0), }; let inst = dfg.make_inst(idata); - assert_eq!(dfg.display_inst(inst, None).to_string(), "trap user0"); + assert_eq!(dfg.display_inst(inst).to_string(), "trap user0"); // Result slice should be empty. assert_eq!(dfg.inst_results(inst), &[]); diff --git a/cranelift/codegen/src/ir/extfunc.rs b/cranelift/codegen/src/ir/extfunc.rs index 6c1d26f4ab..1a623095a7 100644 --- a/cranelift/codegen/src/ir/extfunc.rs +++ b/cranelift/codegen/src/ir/extfunc.rs @@ -5,8 +5,8 @@ //! //! This module declares the data types used to represent external functions and call signatures. -use crate::ir::{ArgumentLoc, ExternalName, SigRef, Type}; -use crate::isa::{CallConv, RegInfo, RegUnit}; +use crate::ir::{ExternalName, SigRef, Type}; +use crate::isa::CallConv; use crate::machinst::RelocDistance; use alloc::vec::Vec; use core::fmt; @@ -50,11 +50,6 @@ impl Signature { self.call_conv = call_conv; } - /// Return an object that can display `self` with correct register names. - pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplaySignature<'a> { - DisplaySignature(self, regs.into()) - } - /// Find the index of a presumed unique special-purpose parameter. pub fn special_param_index(&self, purpose: ArgumentPurpose) -> Option { self.params.iter().rposition(|arg| arg.purpose == purpose) @@ -108,38 +103,29 @@ impl Signature { } } -/// Wrapper type capable of displaying a `Signature` with correct register names. -pub struct DisplaySignature<'a>(&'a Signature, Option<&'a RegInfo>); - -fn write_list(f: &mut fmt::Formatter, args: &[AbiParam], regs: Option<&RegInfo>) -> fmt::Result { +fn write_list(f: &mut fmt::Formatter, args: &[AbiParam]) -> fmt::Result { match args.split_first() { None => {} Some((first, rest)) => { - write!(f, "{}", first.display(regs))?; + write!(f, "{}", first)?; for arg in rest { - write!(f, ", {}", arg.display(regs))?; + write!(f, ", {}", arg)?; } } } Ok(()) } -impl<'a> fmt::Display for DisplaySignature<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "(")?; - write_list(f, &self.0.params, self.1)?; - write!(f, ")")?; - if !self.0.returns.is_empty() { - write!(f, " -> ")?; - write_list(f, &self.0.returns, self.1)?; - } - write!(f, " {}", self.0.call_conv) - } -} - impl fmt::Display for Signature { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(None).fmt(f) + write!(f, "(")?; + write_list(f, &self.params)?; + write!(f, ")")?; + if !self.returns.is_empty() { + write!(f, " -> ")?; + write_list(f, &self.returns)?; + } + write!(f, " {}", self.call_conv) } } @@ -157,9 +143,6 @@ pub struct AbiParam { /// Method for extending argument to a full register. pub extension: ArgumentExtension, - /// ABI-specific location of this argument, or `Unassigned` for arguments that have not yet - /// been legalized. - pub location: ArgumentLoc, /// Was the argument converted to pointer during legalization? pub legalized_to_pointer: bool, } @@ -171,7 +154,6 @@ impl AbiParam { value_type: vt, extension: ArgumentExtension::None, purpose: ArgumentPurpose::Normal, - location: Default::default(), legalized_to_pointer: false, } } @@ -182,18 +164,6 @@ impl AbiParam { value_type: vt, extension: ArgumentExtension::None, purpose, - location: Default::default(), - legalized_to_pointer: false, - } - } - - /// Create a parameter for a special-purpose register. - pub fn special_reg(vt: Type, purpose: ArgumentPurpose, regunit: RegUnit) -> Self { - Self { - value_type: vt, - extension: ArgumentExtension::None, - purpose, - location: ArgumentLoc::Reg(regunit), legalized_to_pointer: false, } } @@ -215,42 +185,23 @@ impl AbiParam { ..self } } - - /// Return an object that can display `self` with correct register names. - pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplayAbiParam<'a> { - DisplayAbiParam(self, regs.into()) - } -} - -/// Wrapper type capable of displaying a `AbiParam` with correct register names. -pub struct DisplayAbiParam<'a>(&'a AbiParam, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayAbiParam<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0.value_type)?; - if self.0.legalized_to_pointer { - write!(f, " ptr")?; - } - match self.0.extension { - ArgumentExtension::None => {} - ArgumentExtension::Uext => write!(f, " uext")?, - ArgumentExtension::Sext => write!(f, " sext")?, - } - if self.0.purpose != ArgumentPurpose::Normal { - write!(f, " {}", self.0.purpose)?; - } - - if self.0.location.is_assigned() { - write!(f, " [{}]", self.0.location.display(self.1))?; - } - - Ok(()) - } } impl fmt::Display for AbiParam { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(None).fmt(f) + write!(f, "{}", self.value_type)?; + if self.legalized_to_pointer { + write!(f, " ptr")?; + } + match self.extension { + ArgumentExtension::None => {} + ArgumentExtension::Uext => write!(f, " uext")?, + ArgumentExtension::Sext => write!(f, " sext")?, + } + if self.purpose != ArgumentPurpose::Normal { + write!(f, " {}", self.purpose)?; + } + Ok(()) } } @@ -519,15 +470,5 @@ mod tests { sig.to_string(), "(i32, i32x4) -> f32, b8 baldrdash_system_v" ); - - // Order does not matter. - sig.params[0].location = ArgumentLoc::Stack(24); - sig.params[1].location = ArgumentLoc::Stack(8); - - // Writing ABI-annotated signatures. - assert_eq!( - sig.to_string(), - "(i32 [24], i32x4 [8]) -> f32, b8 baldrdash_system_v" - ); } } diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index 7b1c957518..fe33970d86 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -3,24 +3,21 @@ //! The `Function` struct defined in this module owns all of its basic blocks and //! instructions. -use crate::binemit::CodeOffset; use crate::entity::{PrimaryMap, SecondaryMap}; use crate::ir; +use crate::ir::JumpTables; use crate::ir::{ instructions::BranchInfo, Block, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstructionData, JumpTable, JumpTableData, Opcode, SigRef, StackSlot, StackSlotData, Table, TableData, }; -use crate::ir::{BlockOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations}; use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature}; -use crate::ir::{JumpTableOffsets, JumpTables}; -use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa}; -use crate::regalloc::{EntryRegDiversions, RegDiversions}; +use crate::ir::{SourceLocs, StackSlots}; +use crate::isa::CallConv; use crate::value_label::ValueLabelsRanges; use crate::write::write_function; #[cfg(feature = "enable-serde")] use alloc::string::String; -use alloc::vec::Vec; use core::fmt; #[cfg(feature = "enable-serde")] @@ -81,10 +78,6 @@ pub struct Function { /// Signature of this function. pub signature: Signature, - /// The old signature of this function, before the most recent legalization, - /// if any. - pub old_signature: Option, - /// Stack slots allocated in this function. pub stack_slots: StackSlots, @@ -106,45 +99,12 @@ pub struct Function { /// Layout of blocks and instructions in the function body. pub layout: Layout, - /// Encoding recipe and bits for the legal instructions. - /// Illegal instructions have the `Encoding::default()` value. - pub encodings: InstEncodings, - - /// Location assigned to every value. - pub locations: ValueLocations, - - /// Non-default locations assigned to value at the entry of basic blocks. - /// - /// At the entry of each basic block, we might have values which are not in their default - /// ValueLocation. This field records these register-to-register moves as Diversions. - pub entry_diversions: EntryRegDiversions, - - /// Code offsets of the block headers. - /// - /// This information is only transiently available after the `binemit::relax_branches` function - /// computes it, and it can easily be recomputed by calling that function. It is not included - /// in the textual IR format. - pub offsets: BlockOffsets, - - /// Code offsets of Jump Table headers. - pub jt_offsets: JumpTableOffsets, - /// Source locations. /// /// Track the original source location for each instruction. The source locations are not /// interpreted by Cranelift, only preserved. pub srclocs: SourceLocs, - /// Instruction that marks the end (inclusive) of the function's prologue. - /// - /// This is used for some ABIs to generate unwind information. - pub prologue_end: Option, - - /// The instructions that mark the start (inclusive) of an epilogue in the function. - /// - /// This is used for some ABIs to generate unwind information. - pub epilogues_start: Vec<(Inst, Block)>, - /// An optional global value which represents an expression evaluating to /// the stack limit for this function. This `GlobalValue` will be /// interpreted in the prologue, if necessary, to insert a stack check to @@ -160,7 +120,6 @@ impl Function { version_marker: VersionMarker, name, signature: sig, - old_signature: None, stack_slots: StackSlots::new(), global_values: PrimaryMap::new(), heaps: PrimaryMap::new(), @@ -168,14 +127,7 @@ impl Function { jump_tables: PrimaryMap::new(), dfg: DataFlowGraph::new(), layout: Layout::new(), - encodings: SecondaryMap::new(), - locations: SecondaryMap::new(), - entry_diversions: EntryRegDiversions::new(), - offsets: SecondaryMap::new(), - jt_offsets: SecondaryMap::new(), srclocs: SecondaryMap::new(), - prologue_end: None, - epilogues_start: Vec::new(), stack_limit: None, } } @@ -190,14 +142,7 @@ impl Function { self.jump_tables.clear(); self.dfg.clear(); self.layout.clear(); - self.encodings.clear(); - self.locations.clear(); - self.entry_diversions.clear(); - self.offsets.clear(); - self.jt_offsets.clear(); self.srclocs.clear(); - self.prologue_end = None; - self.epilogues_start.clear(); self.stack_limit = None; } @@ -243,11 +188,8 @@ impl Function { } /// Return an object that can display this function with correct ISA-specific annotations. - pub fn display<'a, I: Into>>( - &'a self, - isa: I, - ) -> DisplayFunction<'a> { - DisplayFunction(self, isa.into().into()) + pub fn display(&self) -> DisplayFunction<'_> { + DisplayFunction(self, Default::default()) } /// Return an object that can display this function with correct ISA-specific annotations. @@ -268,51 +210,6 @@ impl Function { .map(|i| self.dfg.block_params(entry)[i]) } - /// Get an iterator over the instructions in `block`, including offsets and encoded instruction - /// sizes. - /// - /// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes - /// from the beginning of the function to the instruction, and `size` is the size of the - /// instruction in bytes, or 0 for unencoded instructions. - /// - /// This function can only be used after the code layout has been computed by the - /// `binemit::relax_branches()` function. - pub fn inst_offsets<'a>(&'a self, block: Block, encinfo: &EncInfo) -> InstOffsetIter<'a> { - assert!( - !self.offsets.is_empty(), - "Code layout must be computed first" - ); - let mut divert = RegDiversions::new(); - divert.at_block(&self.entry_diversions, block); - InstOffsetIter { - encinfo: encinfo.clone(), - func: self, - divert, - encodings: &self.encodings, - offset: self.offsets[block], - iter: self.layout.block_insts(block), - } - } - - /// Wrapper around `encode` which assigns `inst` the resulting encoding. - pub fn update_encoding(&mut self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<(), Legalize> { - if isa.get_mach_backend().is_some() { - Ok(()) - } else { - self.encode(inst, isa).map(|e| self.encodings[inst] = e) - } - } - - /// Wrapper around `TargetIsa::encode` for encoding an existing instruction - /// in the `Function`. - pub fn encode(&self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result { - if isa.get_mach_backend().is_some() { - Ok(Encoding::new(0, 0)) - } else { - isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst)) - } - } - /// Starts collection of debug information. pub fn collect_debug_info(&mut self) { self.dfg.collect_debug_info(); @@ -356,7 +253,7 @@ impl Function { } _ => panic!( "Unexpected instruction {} having default destination", - self.dfg.display_inst(inst, None) + self.dfg.display_inst(inst) ), } } @@ -433,65 +330,27 @@ impl Function { /// Additional annotations for function display. #[derive(Default)] pub struct DisplayFunctionAnnotations<'a> { - /// Enable ISA annotations. - pub isa: Option<&'a dyn TargetIsa>, - /// Enable value labels annotations. pub value_ranges: Option<&'a ValueLabelsRanges>, } -impl<'a> From> for DisplayFunctionAnnotations<'a> { - fn from(isa: Option<&'a dyn TargetIsa>) -> DisplayFunctionAnnotations { - DisplayFunctionAnnotations { - isa, - value_ranges: None, - } - } -} - /// Wrapper type capable of displaying a `Function` with correct ISA annotations. pub struct DisplayFunction<'a>(&'a Function, DisplayFunctionAnnotations<'a>); impl<'a> fmt::Display for DisplayFunction<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write_function(fmt, self.0, &self.1) + write_function(fmt, self.0) } } impl fmt::Display for Function { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write_function(fmt, self, &DisplayFunctionAnnotations::default()) + write_function(fmt, self) } } impl fmt::Debug for Function { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - write_function(fmt, self, &DisplayFunctionAnnotations::default()) - } -} - -/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`. -pub struct InstOffsetIter<'a> { - encinfo: EncInfo, - divert: RegDiversions, - func: &'a Function, - encodings: &'a InstEncodings, - offset: CodeOffset, - iter: ir::layout::Insts<'a>, -} - -impl<'a> Iterator for InstOffsetIter<'a> { - type Item = (CodeOffset, ir::Inst, CodeOffset); - - fn next(&mut self) -> Option { - self.iter.next().map(|inst| { - self.divert.apply(&self.func.dfg[inst]); - let byte_size = - self.encinfo - .byte_size(self.encodings[inst], inst, &self.divert, self.func); - let offset = self.offset; - self.offset += byte_size; - (offset, inst, byte_size) - }) + write_function(fmt, self) } } diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index 351921f6af..3580f27376 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -25,7 +25,6 @@ use crate::ir::{ trapcode::TrapCode, types, Block, FuncRef, JumpTable, MemFlags, SigRef, StackSlot, Type, Value, }; -use crate::isa; /// Some instructions use an external list of argument values because there is not enough space in /// the 16-byte `InstructionData` struct. These value lists are stored in a memory pool in diff --git a/cranelift/codegen/src/ir/libcall.rs b/cranelift/codegen/src/ir/libcall.rs index 632b04a4c3..e8298d8ee7 100644 --- a/cranelift/codegen/src/ir/libcall.rs +++ b/cranelift/codegen/src/ir/libcall.rs @@ -1,10 +1,6 @@ //! Naming well-known routines in the runtime library. -use crate::ir::{ - types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode, - Signature, Type, -}; -use crate::isa::{CallConv, RegUnit, TargetIsa}; +use crate::ir::{types, ExternalName, FuncRef, Function, Opcode, Type}; use core::fmt; use core::str::FromStr; #[cfg(feature = "enable-serde")] @@ -166,32 +162,11 @@ impl LibCall { } } -/// Get a function reference for `libcall` in `func`, following the signature -/// for `inst`. -/// -/// If there is an existing reference, use it, otherwise make a new one. -pub(crate) fn get_libcall_funcref( - libcall: LibCall, - call_conv: CallConv, - func: &mut Function, - inst: Inst, - isa: &dyn TargetIsa, -) -> FuncRef { - find_funcref(libcall, func) - .unwrap_or_else(|| make_funcref_for_inst(libcall, call_conv, func, inst, isa)) -} - /// Get a function reference for the probestack function in `func`. /// /// If there is an existing reference, use it, otherwise make a new one. -pub fn get_probestack_funcref( - func: &mut Function, - reg_type: Type, - arg_reg: RegUnit, - isa: &dyn TargetIsa, -) -> FuncRef { +pub fn get_probestack_funcref(func: &mut Function) -> Option { find_funcref(LibCall::Probestack, func) - .unwrap_or_else(|| make_funcref_for_probestack(func, reg_type, arg_reg, isa)) } /// Get the existing function reference for `libcall` in `func` if it exists. @@ -211,65 +186,6 @@ fn find_funcref(libcall: LibCall, func: &Function) -> Option { None } -/// Create a funcref for `LibCall::Probestack`. -fn make_funcref_for_probestack( - func: &mut Function, - reg_type: Type, - arg_reg: RegUnit, - isa: &dyn TargetIsa, -) -> FuncRef { - let mut sig = Signature::new(CallConv::Probestack); - let rax = AbiParam::special_reg(reg_type, ArgumentPurpose::Normal, arg_reg); - sig.params.push(rax); - if !isa.flags().probestack_func_adjusts_sp() { - sig.returns.push(rax); - } - make_funcref(LibCall::Probestack, func, sig, isa) -} - -/// Create a funcref for `libcall` with a signature matching `inst`. -fn make_funcref_for_inst( - libcall: LibCall, - call_conv: CallConv, - func: &mut Function, - inst: Inst, - isa: &dyn TargetIsa, -) -> FuncRef { - let mut sig = Signature::new(call_conv); - for &v in func.dfg.inst_args(inst) { - sig.params.push(AbiParam::new(func.dfg.value_type(v))); - } - for &v in func.dfg.inst_results(inst) { - sig.returns.push(AbiParam::new(func.dfg.value_type(v))); - } - - if call_conv.extends_baldrdash() { - // Adds the special VMContext parameter to the signature. - sig.params.push(AbiParam::special( - isa.pointer_type(), - ArgumentPurpose::VMContext, - )); - } - - make_funcref(libcall, func, sig, isa) -} - -/// Create a funcref for `libcall`. -fn make_funcref( - libcall: LibCall, - func: &mut Function, - sig: Signature, - isa: &dyn TargetIsa, -) -> FuncRef { - let sigref = func.import_signature(sig); - - func.import_function(ExtFuncData { - name: ExternalName::LibCall(libcall), - signature: sigref, - colocated: isa.flags().use_colocated_libcalls(), - }) -} - #[cfg(test)] mod tests { use super::*; diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index c075da6824..713d2fd37a 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -2,6 +2,7 @@ mod atomic_rmw_op; mod builder; +pub mod condcodes; pub mod constant; pub mod dfg; pub mod entities; @@ -22,7 +23,6 @@ pub mod stackslot; mod table; mod trapcode; pub mod types; -mod valueloc; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -31,7 +31,7 @@ pub use crate::ir::atomic_rmw_op::AtomicRmwOp; pub use crate::ir::builder::{ InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase, ReplaceBuilder, }; -pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool}; +pub use crate::ir::constant::{ConstantData, ConstantPool}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; pub use crate::ir::entities::{ Block, Constant, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot, @@ -53,33 +53,17 @@ pub use crate::ir::libcall::{get_probestack_funcref, LibCall}; pub use crate::ir::memflags::{Endianness, MemFlags}; pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint}; pub use crate::ir::sourceloc::SourceLoc; -pub use crate::ir::stackslot::{StackLayoutInfo, StackSlotData, StackSlotKind, StackSlots}; +pub use crate::ir::stackslot::{StackSlotData, StackSlotKind, StackSlots}; pub use crate::ir::table::TableData; pub use crate::ir::trapcode::TrapCode; pub use crate::ir::types::Type; -pub use crate::ir::valueloc::{ArgumentLoc, ValueLoc}; pub use crate::value_label::LabelValueLoc; -pub use cranelift_codegen_shared::condcodes; -use crate::binemit; use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; -use crate::isa; - -/// Map of value locations. -pub type ValueLocations = SecondaryMap; /// Map of jump tables. pub type JumpTables = PrimaryMap; -/// Map of instruction encodings. -pub type InstEncodings = SecondaryMap; - -/// Code offsets for blocks. -pub type BlockOffsets = SecondaryMap; - -/// Code offsets for Jump Tables. -pub type JumpTableOffsets = SecondaryMap; - /// Source locations for instructions. pub type SourceLocs = SecondaryMap; diff --git a/cranelift/codegen/src/ir/stackslot.rs b/cranelift/codegen/src/ir/stackslot.rs index 13d35d37b9..993fcc8ee1 100644 --- a/cranelift/codegen/src/ir/stackslot.rs +++ b/cranelift/codegen/src/ir/stackslot.rs @@ -162,23 +162,6 @@ impl fmt::Display for StackSlotData { } } -/// Stack frame layout information. -/// -/// This is computed by the `layout_stack()` method. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct StackLayoutInfo { - /// The total size of the stack frame. - /// - /// This is the distance from the stack pointer in the current function to the stack pointer in - /// the calling function, so it includes a pushed return address as well as space for outgoing - /// call arguments. - pub frame_size: StackSize, - - /// The total size of the stack frame for inbound arguments pushed by the caller. - pub inbound_args_size: StackSize, -} - /// Stack frame manager. /// /// Keep track of all the stack slots used by a function. @@ -193,9 +176,6 @@ pub struct StackSlots { /// All the emergency slots. emergency: Vec, - - /// Layout information computed from `layout_stack`. - pub layout_info: Option, } /// Stack slot manager functions that behave mostly like an entity map. @@ -210,7 +190,6 @@ impl StackSlots { self.slots.clear(); self.outgoing.clear(); self.emergency.clear(); - self.layout_info = None; } /// Allocate a new stack slot. diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index 6eaa937fbd..bb2056926b 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -79,6 +79,30 @@ impl Type { } } + /// Get the (minimum, maximum) values represented by each lane in the type. + /// Note that these are returned as unsigned 'bit patterns'. + pub fn bounds(self, signed: bool) -> (u128, u128) { + if signed { + match self.lane_type() { + I8 => (i8::MIN as u128, i8::MAX as u128), + I16 => (i16::MIN as u128, i16::MAX as u128), + I32 => (i32::MIN as u128, i32::MAX as u128), + I64 => (i64::MIN as u128, i64::MAX as u128), + I128 => (i128::MIN as u128, i128::MAX as u128), + _ => unimplemented!(), + } + } else { + match self.lane_type() { + I8 => (u8::MIN as u128, u8::MAX as u128), + I16 => (u16::MIN as u128, u16::MAX as u128), + I32 => (u32::MIN as u128, u32::MAX as u128), + I64 => (u64::MIN as u128, u64::MAX as u128), + I128 => (u128::MIN, u128::MAX), + _ => unimplemented!(), + } + } + } + /// Get an integer type with the requested number of bits. pub fn int(bits: u16) -> Option { match bits { @@ -376,7 +400,6 @@ impl Display for Type { f.write_str(match *self { IFLAGS => "iflags", FFLAGS => "fflags", - SARG_T => "sarg_t", INVALID => panic!("INVALID encountered"), _ => panic!("Unknown Type(0x{:x})", self.0), }) diff --git a/cranelift/codegen/src/ir/valueloc.rs b/cranelift/codegen/src/ir/valueloc.rs deleted file mode 100644 index d0b924886a..0000000000 --- a/cranelift/codegen/src/ir/valueloc.rs +++ /dev/null @@ -1,166 +0,0 @@ -//! Value locations. -//! -//! The register allocator assigns every SSA value to either a register or a stack slot. This -//! assignment is represented by a `ValueLoc` object. - -use crate::ir::StackSlot; -use crate::isa::{RegInfo, RegUnit}; -use core::fmt; - -#[cfg(feature = "enable-serde")] -use serde::{Deserialize, Serialize}; - -/// Value location. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub enum ValueLoc { - /// This value has not been assigned to a location yet. - Unassigned, - /// Value is assigned to a register. - Reg(RegUnit), - /// Value is assigned to a stack slot. - Stack(StackSlot), -} - -impl Default for ValueLoc { - fn default() -> Self { - Self::Unassigned - } -} - -impl ValueLoc { - /// Is this an assigned location? (That is, not `Unassigned`). - pub fn is_assigned(self) -> bool { - match self { - Self::Unassigned => false, - _ => true, - } - } - - /// Get the register unit of this location, or panic. - pub fn unwrap_reg(self) -> RegUnit { - match self { - Self::Reg(ru) => ru, - _ => panic!("unwrap_reg expected register, found {:?}", self), - } - } - - /// Get the stack slot of this location, or panic. - pub fn unwrap_stack(self) -> StackSlot { - match self { - Self::Stack(ss) => ss, - _ => panic!("unwrap_stack expected stack slot, found {:?}", self), - } - } - - /// Return an object that can display this value location, using the register info from the - /// target ISA. - pub fn display<'a, R: Into>>(self, regs: R) -> DisplayValueLoc<'a> { - DisplayValueLoc(self, regs.into()) - } -} - -/// Displaying a `ValueLoc` correctly requires the associated `RegInfo` from the target ISA. -/// Without the register info, register units are simply show as numbers. -/// -/// The `DisplayValueLoc` type can display the contained `ValueLoc`. -pub struct DisplayValueLoc<'a>(ValueLoc, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayValueLoc<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - ValueLoc::Unassigned => write!(f, "-"), - ValueLoc::Reg(ru) => match self.1 { - Some(regs) => write!(f, "{}", regs.display_regunit(ru)), - None => write!(f, "%{}", ru), - }, - ValueLoc::Stack(ss) => write!(f, "{}", ss), - } - } -} - -/// Function argument location. -/// -/// The ABI specifies how arguments are passed to a function, and where return values appear after -/// the call. Just like a `ValueLoc`, function arguments can be passed in registers or on the -/// stack. -/// -/// Function arguments on the stack are accessed differently for the incoming arguments to the -/// current function and the outgoing arguments to a called external function. For this reason, -/// the location of stack arguments is described as an offset into the array of function arguments -/// on the stack. -/// -/// An `ArgumentLoc` can be translated to a `ValueLoc` only when we know if we're talking about an -/// incoming argument or an outgoing argument. -/// -/// - For stack arguments, different `StackSlot` entities are used to represent incoming and -/// outgoing arguments. -/// - For register arguments, there is usually no difference, but if we ever add support for a -/// register-window ISA like SPARC, register arguments would also need to be translated. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub enum ArgumentLoc { - /// This argument has not been assigned to a location yet. - Unassigned, - /// Argument is passed in a register. - Reg(RegUnit), - /// Argument is passed on the stack, at the given byte offset into the argument array. - Stack(i32), -} - -impl Default for ArgumentLoc { - fn default() -> Self { - Self::Unassigned - } -} - -impl ArgumentLoc { - /// Is this an assigned location? (That is, not `Unassigned`). - pub fn is_assigned(self) -> bool { - match self { - Self::Unassigned => false, - _ => true, - } - } - - /// Is this a register location? - pub fn is_reg(self) -> bool { - match self { - Self::Reg(_) => true, - _ => false, - } - } - - /// Is this a stack location? - pub fn is_stack(self) -> bool { - match self { - Self::Stack(_) => true, - _ => false, - } - } - - /// Return an object that can display this argument location, using the register info from the - /// target ISA. - pub fn display<'a, R: Into>>(self, regs: R) -> DisplayArgumentLoc<'a> { - DisplayArgumentLoc(self, regs.into()) - } -} - -/// Displaying a `ArgumentLoc` correctly requires the associated `RegInfo` from the target ISA. -/// Without the register info, register units are simply show as numbers. -/// -/// The `DisplayArgumentLoc` type can display the contained `ArgumentLoc`. -pub struct DisplayArgumentLoc<'a>(ArgumentLoc, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayArgumentLoc<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - ArgumentLoc::Unassigned => write!(f, "-"), - ArgumentLoc::Reg(ru) => match self.1 { - Some(regs) => write!(f, "{}", regs.display_regunit(ru)), - None => write!(f, "%{}", ru), - }, - ArgumentLoc::Stack(offset) => write!(f, "{}", offset), - } - } -} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index b0122cbd79..19c2764e94 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -504,6 +504,33 @@ fn enc_dmb_ish() -> u32 { 0xD5033BBF } +fn enc_ldal(ty: Type, op: AtomicRMWOp, rs: Reg, rt: Writable, rn: Reg) -> u32 { + assert!(machreg_to_gpr(rt.to_reg()) != 31); + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + let op = match op { + AtomicRMWOp::Add => 0b000, + AtomicRMWOp::Clr => 0b001, + AtomicRMWOp::Eor => 0b010, + AtomicRMWOp::Set => 0b011, + AtomicRMWOp::Smax => 0b100, + AtomicRMWOp::Smin => 0b101, + AtomicRMWOp::Umax => 0b110, + AtomicRMWOp::Umin => 0b111, + }; + 0b00_111_000_111_00000_0_000_00_00000_00000 + | (sz << 30) + | (machreg_to_gpr(rs) << 16) + | (op << 12) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + fn enc_ldar(ty: Type, rt: Writable, rn: Reg) -> u32 { let sz = match ty { I64 => 0b11, @@ -1318,7 +1345,10 @@ impl MachInstEmit for Inst { } => { sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); } - &Inst::AtomicRMW { ty, op } => { + &Inst::AtomicRMW { ty, op, rs, rt, rn } => { + sink.put4(enc_ldal(ty, op, rs, rt, rn)); + } + &Inst::AtomicRMWLoop { ty, op } => { /* Emit this: again: ldaxr{,b,h} x/w27, [x25] @@ -1340,7 +1370,7 @@ impl MachInstEmit for Inst { so that we simply write in the destination, the "2nd arg for op". */ // TODO: We should not hardcode registers here, a better idea would be to - // pass some scratch registers in the AtomicRMW pseudo-instruction, and use those + // pass some scratch registers in the AtomicRMWLoop pseudo-instruction, and use those let xzr = zero_reg(); let x24 = xreg(24); let x25 = xreg(25); @@ -2308,7 +2338,11 @@ impl MachInstEmit for Inst { VecALUOp::Orr => (0b000_01110_10_1, 0b000111), VecALUOp::Eor => (0b001_01110_00_1, 0b000111), VecALUOp::Bsl => (0b001_01110_01_1, 0b000111), - VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001), + VecALUOp::Umaxp => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b101001) + } VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Mul => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index fa3aa722d4..5b8fe374d8 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -5986,7 +5986,7 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::AtomicRMW { + Inst::AtomicRMWLoop { ty: I16, op: inst_common::AtomicRmwOp::Xor, }, @@ -5996,6 +5996,359 @@ fn test_aarch64_binemit() { insns.push(( Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Add, + rs: xreg(1), + rt: writable_xreg(2), + rn: xreg(3), + }, + "6200E138", + "ldaddalb w1, w2, [x3]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Add, + rs: xreg(4), + rt: writable_xreg(5), + rn: xreg(6), + }, + "C500E478", + "ldaddalh w4, w5, [x6]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Add, + rs: xreg(7), + rt: writable_xreg(8), + rn: xreg(9), + }, + "2801E7B8", + "ldaddal w7, w8, [x9]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Add, + rs: xreg(10), + rt: writable_xreg(11), + rn: xreg(12), + }, + "8B01EAF8", + "ldaddal x10, x11, [x12]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Clr, + rs: xreg(13), + rt: writable_xreg(14), + rn: xreg(15), + }, + "EE11ED38", + "ldclralb w13, w14, [x15]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Clr, + rs: xreg(16), + rt: writable_xreg(17), + rn: xreg(18), + }, + "5112F078", + "ldclralh w16, w17, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Clr, + rs: xreg(19), + rt: writable_xreg(20), + rn: xreg(21), + }, + "B412F3B8", + "ldclral w19, w20, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Clr, + rs: xreg(22), + rt: writable_xreg(23), + rn: xreg(24), + }, + "1713F6F8", + "ldclral x22, x23, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Eor, + rs: xreg(25), + rt: writable_xreg(26), + rn: xreg(27), + }, + "7A23F938", + "ldeoralb w25, w26, [x27]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Eor, + rs: xreg(28), + rt: writable_xreg(29), + rn: xreg(30), + }, + "DD23FC78", + "ldeoralh w28, fp, [lr]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Eor, + rs: xreg(29), + rt: writable_xreg(28), + rn: xreg(27), + }, + "7C23FDB8", + "ldeoral fp, w28, [x27]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Eor, + rs: xreg(26), + rt: writable_xreg(25), + rn: xreg(24), + }, + "1923FAF8", + "ldeoral x26, x25, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Set, + rs: xreg(23), + rt: writable_xreg(22), + rn: xreg(21), + }, + "B632F738", + "ldsetalb w23, w22, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Set, + rs: xreg(20), + rt: writable_xreg(19), + rn: xreg(18), + }, + "5332F478", + "ldsetalh w20, w19, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Set, + rs: xreg(17), + rt: writable_xreg(16), + rn: xreg(15), + }, + "F031F1B8", + "ldsetal w17, w16, [x15]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Set, + rs: xreg(14), + rt: writable_xreg(13), + rn: xreg(12), + }, + "8D31EEF8", + "ldsetal x14, x13, [x12]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Smax, + rs: xreg(11), + rt: writable_xreg(10), + rn: xreg(9), + }, + "2A41EB38", + "ldsmaxalb w11, w10, [x9]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Smax, + rs: xreg(8), + rt: writable_xreg(7), + rn: xreg(6), + }, + "C740E878", + "ldsmaxalh w8, w7, [x6]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Smax, + rs: xreg(5), + rt: writable_xreg(4), + rn: xreg(3), + }, + "6440E5B8", + "ldsmaxal w5, w4, [x3]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Smax, + rs: xreg(2), + rt: writable_xreg(1), + rn: xreg(0), + }, + "0140E2F8", + "ldsmaxal x2, x1, [x0]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Smin, + rs: xreg(1), + rt: writable_xreg(2), + rn: xreg(3), + }, + "6250E138", + "ldsminalb w1, w2, [x3]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Smin, + rs: xreg(4), + rt: writable_xreg(5), + rn: xreg(6), + }, + "C550E478", + "ldsminalh w4, w5, [x6]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Smin, + rs: xreg(7), + rt: writable_xreg(8), + rn: xreg(9), + }, + "2851E7B8", + "ldsminal w7, w8, [x9]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Smin, + rs: xreg(10), + rt: writable_xreg(11), + rn: xreg(12), + }, + "8B51EAF8", + "ldsminal x10, x11, [x12]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Umax, + rs: xreg(13), + rt: writable_xreg(14), + rn: xreg(15), + }, + "EE61ED38", + "ldumaxalb w13, w14, [x15]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Umax, + rs: xreg(16), + rt: writable_xreg(17), + rn: xreg(18), + }, + "5162F078", + "ldumaxalh w16, w17, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Umax, + rs: xreg(19), + rt: writable_xreg(20), + rn: xreg(21), + }, + "B462F3B8", + "ldumaxal w19, w20, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Umax, + rs: xreg(22), + rt: writable_xreg(23), + rn: xreg(24), + }, + "1763F6F8", + "ldumaxal x22, x23, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I8, + op: AtomicRMWOp::Umin, + rs: xreg(16), + rt: writable_xreg(17), + rn: xreg(18), + }, + "5172F038", + "lduminalb w16, w17, [x18]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Umin, + rs: xreg(19), + rt: writable_xreg(20), + rn: xreg(21), + }, + "B472F378", + "lduminalh w19, w20, [x21]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Umin, + rs: xreg(22), + rt: writable_xreg(23), + rn: xreg(24), + }, + "1773F6B8", + "lduminal w22, w23, [x24]", + )); + insns.push(( + Inst::AtomicRMW { + ty: I64, + op: AtomicRMWOp::Umin, + rs: xreg(25), + rt: writable_xreg(26), + rn: xreg(27), + }, + "7A73F9F8", + "lduminal x25, x26, [x27]", + )); + + insns.push(( + Inst::AtomicRMWLoop { ty: I32, op: inst_common::AtomicRmwOp::Xchg, }, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 9e1aa34d28..98e3a85cff 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -451,6 +451,19 @@ pub enum VecShiftImmOp { Sshr, } +/// Atomic read-modify-write operations with acquire-release semantics +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum AtomicRMWOp { + Add, + Clr, + Eor, + Set, + Smax, + Smin, + Umax, + Umin, +} + /// An operation on the bits of a register. This can be paired with several instruction formats /// below (see `Inst`) in any combination. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] @@ -775,11 +788,22 @@ pub enum Inst { /// x27 (wr) old value /// x24 (wr) scratch reg; value afterwards has no meaning /// x28 (wr) scratch reg; value afterwards has no meaning - AtomicRMW { + AtomicRMWLoop { ty: Type, // I8, I16, I32 or I64 op: inst_common::AtomicRmwOp, }, + /// An atomic read-modify-write operation. These instructions require the + /// Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have + /// acquire-release semantics. + AtomicRMW { + op: AtomicRMWOp, + rs: Reg, + rt: Writable, + rn: Reg, + ty: Type, + }, + /// An atomic compare-and-swap operation. This instruction is sequentially consistent. AtomicCAS { rs: Writable, @@ -788,10 +812,10 @@ pub enum Inst { ty: Type, }, - /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked + /// Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked /// store-conditional loop. /// This instruction is sequentially consistent. - /// Note that the operand conventions, although very similar to AtomicRMW, are different: + /// Note that the operand conventions, although very similar to AtomicRMWLoop, are different: /// /// x25 (rd) address /// x26 (rd) expected value @@ -1920,13 +1944,18 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::CCmpImm { rn, .. } => { collector.add_use(rn); } - &Inst::AtomicRMW { .. } => { + &Inst::AtomicRMWLoop { .. } => { collector.add_use(xreg(25)); collector.add_use(xreg(26)); collector.add_def(writable_xreg(24)); collector.add_def(writable_xreg(27)); collector.add_def(writable_xreg(28)); } + &Inst::AtomicRMW { rs, rt, rn, .. } => { + collector.add_use(rs); + collector.add_def(rt); + collector.add_use(rn); + } &Inst::AtomicCAS { rs, rt, rn, .. } => { collector.add_mod(rs); collector.add_use(rt); @@ -2562,9 +2591,19 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut Inst::CCmpImm { ref mut rn, .. } => { map_use(mapper, rn); } - &mut Inst::AtomicRMW { .. } => { + &mut Inst::AtomicRMWLoop { .. } => { // There are no vregs to map in this insn. } + &mut Inst::AtomicRMW { + ref mut rs, + ref mut rt, + ref mut rn, + .. + } => { + map_use(mapper, rs); + map_def(mapper, rt); + map_use(mapper, rn); + } &mut Inst::AtomicCAS { ref mut rs, ref mut rt, @@ -3618,7 +3657,31 @@ impl Inst { let cond = cond.show_rru(mb_rru); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } - &Inst::AtomicRMW { ty, op, .. } => { + &Inst::AtomicRMW { rs, rt, rn, ty, op } => { + let op = match op { + AtomicRMWOp::Add => "ldaddal", + AtomicRMWOp::Clr => "ldclral", + AtomicRMWOp::Eor => "ldeoral", + AtomicRMWOp::Set => "ldsetal", + AtomicRMWOp::Smax => "ldsmaxal", + AtomicRMWOp::Umax => "ldumaxal", + AtomicRMWOp::Smin => "ldsminal", + AtomicRMWOp::Umin => "lduminal", + }; + + let size = OperandSize::from_ty(ty); + let rs = show_ireg_sized(rs, mb_rru, size); + let rt = show_ireg_sized(rt.to_reg(), mb_rru, size); + let rn = rn.show_rru(mb_rru); + + let ty_suffix = match ty { + I8 => "b", + I16 => "h", + _ => "", + }; + format!("{}{} {}, {}, [{}]", op, ty_suffix, rs, rt, rn) + } + &Inst::AtomicRMWLoop { ty, op, .. } => { format!( "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}", ty.bits(), op) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 990fc5cd02..34a627dfba 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1407,7 +1407,7 @@ pub(crate) fn lower_i64x2_mul>(c: &mut C, insn: IRInst) { // rd = |dg+ch|be+af||dg+ch|be+af| c.emit(Inst::VecRRR { alu_op: VecALUOp::Addp, - rd: rd, + rd, rn: rd.to_reg(), rm: rd.to_reg(), size: VectorSize::Size32x4, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5b8e8b67f2..1015e054c4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -482,9 +482,9 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::AluRRRR { alu_op: ALUOp3::MSub64, - rd: rd, + rd, rn: rd.to_reg(), - rm: rm, + rm, ra: rn, }); } else { @@ -1529,20 +1529,41 @@ pub(crate) fn lower_insn_to_regs>( let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ty_access = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty_access)); - // Make sure that both args are in virtual regs, since in effect - // we have to do a parallel copy to get them safely to the AtomicRMW input - // regs, and that's not guaranteed safe if either is in a real reg. - r_addr = ctx.ensure_in_vreg(r_addr, I64); - r_arg2 = ctx.ensure_in_vreg(r_arg2, I64); - // Move the args to the preordained AtomicRMW input regs - ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); - ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64)); - // Now the AtomicRMW insn itself + let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap()); - ctx.emit(Inst::AtomicRMW { ty: ty_access, op }); - // And finally, copy the preordained AtomicRMW output reg to its destination. - ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); - // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. + let lse_op = match op { + AtomicRmwOp::Add => Some(AtomicRMWOp::Add), + AtomicRmwOp::And => Some(AtomicRMWOp::Clr), + AtomicRmwOp::Xor => Some(AtomicRMWOp::Eor), + AtomicRmwOp::Or => Some(AtomicRMWOp::Set), + AtomicRmwOp::Smax => Some(AtomicRMWOp::Smax), + AtomicRmwOp::Umax => Some(AtomicRMWOp::Umax), + AtomicRmwOp::Smin => Some(AtomicRMWOp::Smin), + AtomicRmwOp::Umin => Some(AtomicRMWOp::Umin), + _ => None, + }; + if isa_flags.use_lse() && lse_op.is_some() { + ctx.emit(Inst::AtomicRMW { + op: lse_op.unwrap(), + rs: r_arg2, + rt: r_dst, + rn: r_addr, + ty: ty_access, + }); + } else { + // Make sure that both args are in virtual regs, since in effect + // we have to do a parallel copy to get them safely to the AtomicRMW input + // regs, and that's not guaranteed safe if either is in a real reg. + r_addr = ctx.ensure_in_vreg(r_addr, I64); + r_arg2 = ctx.ensure_in_vreg(r_arg2, I64); + // Move the args to the preordained AtomicRMW input regs + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64)); + ctx.emit(Inst::AtomicRMWLoop { ty: ty_access, op }); + // And finally, copy the preordained AtomicRMW output reg to its destination. + ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); + // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. + } } Opcode::AtomicCas => { @@ -2144,16 +2165,11 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Spill | Opcode::Fill | Opcode::FillNop - | Opcode::Regmove - | Opcode::CopySpecial - | Opcode::CopyToSsa | Opcode::CopyNop | Opcode::AdjustSpDown | Opcode::AdjustSpUpImm | Opcode::AdjustSpDownImm - | Opcode::IfcmpSp - | Opcode::Regspill - | Opcode::Regfill => { + | Opcode::IfcmpSp => { panic!("Unused opcode should not be encountered."); } @@ -2376,14 +2392,22 @@ pub(crate) fn lower_insn_to_regs>( // cmp xm, #0 // cset xm, ne - let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + let s = VectorSize::from_ty(src_ty); + let size = if s == VectorSize::Size64x2 { + // `vall_true` with 64-bit elements is handled elsewhere. + debug_assert_ne!(op, Opcode::VallTrue); + + VectorSize::Size32x4 + } else { + s + }; if op == Opcode::VanyTrue { ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Umaxp, rd: tmp, rn: rm, - rm: rm, + rm, size, }); } else { @@ -2806,9 +2830,9 @@ pub(crate) fn lower_insn_to_regs>( let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Addp, - rd: rd, - rn: rn, - rm: rm, + rd, + rn, + rm, size: VectorSize::from_ty(ty), }); } @@ -2905,42 +2929,62 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::FminPseudo | Opcode::FmaxPseudo => { - let ty = ctx.input_ty(insn, 0); - if ty == F32X4 || ty == F64X2 { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let (ra, rb) = if op == Opcode::FminPseudo { + (rm, rn) + } else { + (rn, rm) + }; + let ty = ty.unwrap(); + let lane_type = ty.lane_type(); + + debug_assert!(lane_type == F32 || lane_type == F64); + + if ty.is_vector() { + let size = VectorSize::from_ty(ty); + // pmin(a,b) => bitsel(b, a, cmpgt(a, b)) // pmax(a,b) => bitsel(b, a, cmpgt(b, a)) - let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - // Since we're going to write the output register `r_dst` anyway, we might as - // well first use it to hold the comparison result. This has the slightly unusual + // Since we're going to write the output register `rd` anyway, we might as well + // first use it to hold the comparison result. This has the slightly unusual // effect that we modify the output register in the first instruction (`fcmgt`) // but read both the inputs again in the second instruction (`bsl`), which means // that the output register can't be either of the input registers. Regalloc // should handle this correctly, nevertheless. ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Fcmgt, - rd: r_dst, - rn: if op == Opcode::FminPseudo { r_a } else { r_b }, - rm: if op == Opcode::FminPseudo { r_b } else { r_a }, - size: if ty == F32X4 { - VectorSize::Size32x4 - } else { - VectorSize::Size64x2 - }, + rd, + rn: ra, + rm: rb, + size, }); ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Bsl, - rd: r_dst, - rn: r_b, - rm: r_a, - size: VectorSize::Size8x16, + rd, + rn, + rm, + size, }); } else { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, ty - ))); + if lane_type == F32 { + ctx.emit(Inst::FpuCmp32 { rn: ra, rm: rb }); + ctx.emit(Inst::FpuCSel32 { + rd, + rn, + rm, + cond: Cond::Gt, + }); + } else { + ctx.emit(Inst::FpuCmp64 { rn: ra, rm: rb }); + ctx.emit(Inst::FpuCSel64 { + rd, + rn, + rm, + cond: Cond::Gt, + }); + } } } @@ -3397,7 +3441,7 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::FpuRRR { fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), rd: rtmp2, - rn: rn, + rn, rm: rtmp1.to_reg(), }); if in_bits == 32 { @@ -3419,7 +3463,7 @@ pub(crate) fn lower_insn_to_regs>( } } if in_bits == 32 { - ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); + ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); ctx.emit(Inst::FpuCSel32 { rd: rtmp2, rn: rtmp1.to_reg(), @@ -3427,7 +3471,7 @@ pub(crate) fn lower_insn_to_regs>( cond: Cond::Ne, }); } else { - ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); + ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); ctx.emit(Inst::FpuCSel64 { rd: rtmp2, rn: rtmp1.to_reg(), @@ -3516,47 +3560,6 @@ pub(crate) fn lower_insn_to_regs>( panic!("ALU+imm and ALU+carry ops should not appear here!"); } - #[cfg(feature = "x86")] - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Palignr - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } - - Opcode::DummySargT => unreachable!(), - Opcode::Iabs => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index c239beef05..183fe1c776 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -11,7 +11,6 @@ use crate::machinst::{ use crate::result::CodegenResult; use crate::settings as shared_settings; use alloc::{boxed::Box, vec::Vec}; -use core::hash::{Hash, Hasher}; use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::{Aarch64Architecture, Architecture, Triple}; @@ -111,11 +110,6 @@ impl MachBackend for AArch64Backend { self.isa_flags.iter().collect() } - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - fn reg_universe(&self) -> &RealRegUniverse { &self.reg_universe } @@ -126,13 +120,6 @@ impl MachBackend for AArch64Backend { IntCC::UnsignedGreaterThanOrEqual } - fn unsigned_sub_overflow_condition(&self) -> IntCC { - // unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on - // underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the - // opposite of x86). - IntCC::UnsignedLessThan - } - #[cfg(feature = "unwind")] fn emit_unwind_info( &self, diff --git a/cranelift/codegen/src/isa/arm32/mod.rs b/cranelift/codegen/src/isa/arm32/mod.rs index e4280894a1..959be2196b 100644 --- a/cranelift/codegen/src/isa/arm32/mod.rs +++ b/cranelift/codegen/src/isa/arm32/mod.rs @@ -11,7 +11,6 @@ use crate::result::CodegenResult; use crate::settings; use alloc::{boxed::Box, vec::Vec}; -use core::hash::{Hash, Hasher}; use regalloc::{PrettyPrint, RealRegUniverse}; use target_lexicon::{Architecture, ArmArchitecture, Triple}; @@ -101,10 +100,6 @@ impl MachBackend for Arm32Backend { Vec::new() } - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.flags.hash(&mut hasher); - } - fn reg_universe(&self) -> &RealRegUniverse { &self.reg_universe } @@ -114,11 +109,6 @@ impl MachBackend for Arm32Backend { IntCC::UnsignedGreaterThanOrEqual } - fn unsigned_sub_overflow_condition(&self) -> IntCC { - // Carry flag clear. - IntCC::UnsignedLessThan - } - fn text_section_builder(&self, num_funcs: u32) -> Box { Box::new(MachTextSectionBuilder::::new(num_funcs)) } diff --git a/cranelift/codegen/src/isa/constraints.rs b/cranelift/codegen/src/isa/constraints.rs deleted file mode 100644 index c87c3bd9d4..0000000000 --- a/cranelift/codegen/src/isa/constraints.rs +++ /dev/null @@ -1,207 +0,0 @@ -//! Register constraints for instruction operands. -//! -//! An encoding recipe specifies how an instruction is encoded as binary machine code, but it only -//! works if the operands and results satisfy certain constraints. Constraints on immediate -//! operands are checked by instruction predicates when the recipe is chosen. -//! -//! It is the register allocator's job to make sure that the register constraints on value operands -//! are satisfied. - -use crate::binemit::CodeOffset; -use crate::ir::{Function, Inst, ValueLoc}; -use crate::isa::{RegClass, RegUnit}; -use crate::regalloc::RegDiversions; - -/// Register constraint for a single value operand or instruction result. -#[derive(PartialEq, Debug)] -pub struct OperandConstraint { - /// The kind of constraint. - pub kind: ConstraintKind, - - /// The register class of the operand. - /// - /// This applies to all kinds of constraints, but with slightly different meaning. - pub regclass: RegClass, -} - -impl OperandConstraint { - /// Check if this operand constraint is satisfied by the given value location. - /// For tied constraints, this only checks the register class, not that the - /// counterpart operand has the same value location. - pub fn satisfied(&self, loc: ValueLoc) -> bool { - match self.kind { - ConstraintKind::Reg | ConstraintKind::Tied(_) => { - if let ValueLoc::Reg(reg) = loc { - self.regclass.contains(reg) - } else { - false - } - } - ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => { - loc == ValueLoc::Reg(reg) && self.regclass.contains(reg) - } - ConstraintKind::Stack => { - if let ValueLoc::Stack(_) = loc { - true - } else { - false - } - } - } - } -} - -/// The different kinds of operand constraints. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -pub enum ConstraintKind { - /// This operand or result must be a register from the given register class. - Reg, - - /// This operand or result must be a fixed register. - /// - /// The constraint's `regclass` field is the top-level register class containing the fixed - /// register. - FixedReg(RegUnit), - - /// This result value must use the same register as an input value operand. - /// - /// The associated number is the index of the input value operand this result is tied to. The - /// constraint's `regclass` field is the same as the tied operand's register class. - /// - /// When an (in, out) operand pair is tied, this constraint kind appears in both the `ins` and - /// the `outs` arrays. The constraint for the in operand is `Tied(out)`, and the constraint for - /// the out operand is `Tied(in)`. - Tied(u8), - - /// This operand must be a fixed register, and it has a tied counterpart. - /// - /// This works just like `FixedReg`, but additionally indicates that there are identical - /// input/output operands for this fixed register. For an input operand, this means that the - /// value will be clobbered by the instruction - FixedTied(RegUnit), - - /// This operand must be a value in a stack slot. - /// - /// The constraint's `regclass` field is the register class that would normally be used to load - /// and store values of this type. - Stack, -} - -/// Value operand constraints for an encoding recipe. -#[derive(PartialEq, Clone)] -pub struct RecipeConstraints { - /// Constraints for the instruction's fixed value operands. - /// - /// If the instruction takes a variable number of operands, the register constraints for those - /// operands must be computed dynamically. - /// - /// - For branches and jumps, block arguments must match the expectations of the destination block. - /// - For calls and returns, the calling convention ABI specifies constraints. - pub ins: &'static [OperandConstraint], - - /// Constraints for the instruction's fixed results. - /// - /// If the instruction produces a variable number of results, it's probably a call and the - /// constraints must be derived from the calling convention ABI. - pub outs: &'static [OperandConstraint], - - /// Are any of the input constraints `FixedReg` or `FixedTied`? - pub fixed_ins: bool, - - /// Are any of the output constraints `FixedReg` or `FixedTied`? - pub fixed_outs: bool, - - /// Are any of the input/output constraints `Tied` (but not `FixedTied`)? - pub tied_ops: bool, - - /// Does this instruction clobber the CPU flags? - /// - /// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction. - pub clobbers_flags: bool, -} - -impl RecipeConstraints { - /// Check that these constraints are satisfied by the operands on `inst`. - pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool { - for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) { - let loc = divert.get(arg, &func.locations); - - if let ConstraintKind::Tied(out_index) = constraint.kind { - let out_val = func.dfg.inst_results(inst)[out_index as usize]; - let out_loc = func.locations[out_val]; - if loc != out_loc { - return false; - } - } - - if !constraint.satisfied(loc) { - return false; - } - } - - for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) { - let loc = divert.get(arg, &func.locations); - if !constraint.satisfied(loc) { - return false; - } - } - - true - } -} - -/// Constraints on the range of a branch instruction. -/// -/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin. -/// The origin depends on the ISA and the specific instruction: -/// -/// - RISC-V and ARM Aarch64 use the address of the branch instruction, `origin = 0`. -/// - x86 uses the address of the instruction following the branch, `origin = 2` for a 2-byte -/// branch instruction. -/// - ARM's A32 encoding uses the address of the branch instruction + 8 bytes, `origin = 8`. -#[derive(Clone, Copy, Debug)] -pub struct BranchRange { - /// Offset in bytes from the address of the branch instruction to the origin used for computing - /// the branch displacement. This is the destination of a branch that encodes a 0 displacement. - pub origin: u8, - - /// Number of bits in the signed byte displacement encoded in the instruction. This does not - /// account for branches that can only target aligned addresses. - pub bits: u8, -} - -impl BranchRange { - /// Determine if this branch range can represent the range from `branch` to `dest`, where - /// `branch` is the code offset of the branch instruction itself and `dest` is the code offset - /// of the destination block header. - /// - /// This method does not detect if the range is larger than 2 GB. - pub fn contains(self, branch: CodeOffset, dest: CodeOffset) -> bool { - let d = dest.wrapping_sub(branch + CodeOffset::from(self.origin)) as i32; - let s = 32 - self.bits; - d == d << s >> s - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn branch_range() { - // ARM T1 branch. - let t1 = BranchRange { origin: 4, bits: 9 }; - assert!(t1.contains(0, 0)); - assert!(t1.contains(0, 2)); - assert!(t1.contains(2, 0)); - assert!(t1.contains(1000, 1000)); - - // Forward limit. - assert!(t1.contains(1000, 1258)); - assert!(!t1.contains(1000, 1260)); - - // Backward limit - assert!(t1.contains(1000, 748)); - assert!(!t1.contains(1000, 746)); - } -} diff --git a/cranelift/codegen/src/isa/enc_tables.rs b/cranelift/codegen/src/isa/enc_tables.rs deleted file mode 100644 index e21557497e..0000000000 --- a/cranelift/codegen/src/isa/enc_tables.rs +++ /dev/null @@ -1,292 +0,0 @@ -//! Support types for generated encoding tables. -//! -//! This module contains types and functions for working with the encoding tables generated by -//! `cranelift-codegen/meta/src/gen_encodings.rs`. - -use crate::constant_hash::{probe, Table}; -use crate::ir::{Function, InstructionData, Opcode, Type}; -use crate::isa::{Encoding, Legalize}; -use crate::settings::PredicateView; -use core::ops::Range; - -/// A recipe predicate. -/// -/// This is a predicate function capable of testing ISA and instruction predicates simultaneously. -/// -/// A None predicate is always satisfied. -pub type RecipePredicate = Option bool>; - -/// An instruction predicate. -/// -/// This is a predicate function that needs to be tested in addition to the recipe predicate. It -/// can't depend on ISA settings. -pub type InstPredicate = fn(&Function, &InstructionData) -> bool; - -/// Legalization action to perform when no encoding can be found for an instruction. -/// -/// This is an index into an ISA-specific table of legalization actions. -pub type LegalizeCode = u8; - -/// Level 1 hash table entry. -/// -/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type -/// variable, using `INVALID` for non-polymorphic instructions. -/// -/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2` -/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables -/// have a power-of-two size. -/// -/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the -/// size of the `LEVEL2` table. -/// -/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range. -/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of -/// bounds. -pub struct Level1Entry + Copy> { - pub ty: Type, - pub log2len: u8, - pub legalize: LegalizeCode, - pub offset: OffT, -} - -impl + Copy> Level1Entry { - /// Get the level 2 table range indicated by this entry. - fn range(&self) -> Range { - let b = self.offset.into() as usize; - b..b + (1 << self.log2len) - } -} - -impl + Copy> Table for [Level1Entry] { - fn len(&self) -> usize { - self.len() - } - - fn key(&self, idx: usize) -> Option { - if self[idx].log2len != !0 { - Some(self[idx].ty) - } else { - None - } - } -} - -/// Level 2 hash table entry. -/// -/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS` -/// table where the encoding recipes for the instruction are stored. -/// -/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16` -/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8` -/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16 -/// bits. -/// -/// Empty entries are encoded with a `NotAnOpcode` `opcode` field. -pub struct Level2Entry + Copy> { - pub opcode: Option, - pub offset: OffT, -} - -impl + Copy> Table for [Level2Entry] { - fn len(&self) -> usize { - self.len() - } - - fn key(&self, idx: usize) -> Option { - self[idx].opcode - } -} - -/// Two-level hash table lookup and iterator construction. -/// -/// Given the controlling type variable and instruction opcode, find the corresponding encoding -/// list. -/// -/// Returns an iterator that produces legal encodings for `inst`. -pub fn lookup_enclist<'a, OffT1, OffT2>( - ctrl_typevar: Type, - inst: &'a InstructionData, - func: &'a Function, - level1_table: &'static [Level1Entry], - level2_table: &'static [Level2Entry], - enclist: &'static [EncListEntry], - legalize_actions: &'static [Legalize], - recipe_preds: &'static [RecipePredicate], - inst_preds: &'static [InstPredicate], - isa_preds: PredicateView<'a>, -) -> Encodings<'a> -where - OffT1: Into + Copy, - OffT2: Into + Copy, -{ - let (offset, legalize) = match probe(level1_table, ctrl_typevar, ctrl_typevar.index()) { - Err(l1idx) => { - // No level 1 entry found for the type. - // We have a sentinel entry with the default legalization code. - (!0, level1_table[l1idx].legalize) - } - Ok(l1idx) => { - // We have a valid level 1 entry for this type. - let l1ent = &level1_table[l1idx]; - let offset = match level2_table.get(l1ent.range()) { - Some(l2tab) => { - let opcode = inst.opcode(); - match probe(l2tab, opcode, opcode as usize) { - Ok(l2idx) => l2tab[l2idx].offset.into() as usize, - Err(_) => !0, - } - } - // The l1ent range is invalid. This means that we just have a customized - // legalization code for this type. The level 2 table is empty. - None => !0, - }; - (offset, l1ent.legalize) - } - }; - - // Now we have an offset into `enclist` that is `!0` when no encoding list could be found. - // The default legalization code is always valid. - Encodings::new( - offset, - legalize, - inst, - func, - enclist, - legalize_actions, - recipe_preds, - inst_preds, - isa_preds, - ) -} - -/// Encoding list entry. -/// -/// Encoding lists are represented as sequences of u16 words. -pub type EncListEntry = u16; - -/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`. -const PRED_BITS: u8 = 12; -const PRED_MASK: usize = (1 << PRED_BITS) - 1; -/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`. -const PRED_START: usize = 0x1000; - -/// An iterator over legal encodings for the instruction. -pub struct Encodings<'a> { - // Current offset into `enclist`, or out of bounds after we've reached the end. - offset: usize, - // Legalization code to use of no encoding is found. - legalize: LegalizeCode, - inst: &'a InstructionData, - func: &'a Function, - enclist: &'static [EncListEntry], - legalize_actions: &'static [Legalize], - recipe_preds: &'static [RecipePredicate], - inst_preds: &'static [InstPredicate], - isa_preds: PredicateView<'a>, -} - -impl<'a> Encodings<'a> { - /// Creates a new instance of `Encodings`. - /// - /// This iterator provides search for encodings that applies to the given instruction. The - /// encoding lists are laid out such that first call to `next` returns valid entry in the list - /// or `None`. - pub fn new( - offset: usize, - legalize: LegalizeCode, - inst: &'a InstructionData, - func: &'a Function, - enclist: &'static [EncListEntry], - legalize_actions: &'static [Legalize], - recipe_preds: &'static [RecipePredicate], - inst_preds: &'static [InstPredicate], - isa_preds: PredicateView<'a>, - ) -> Self { - Encodings { - offset, - inst, - func, - legalize, - isa_preds, - recipe_preds, - inst_preds, - enclist, - legalize_actions, - } - } - - /// Get the legalization action that caused the enumeration of encodings to stop. - /// This can be the default legalization action for the type or a custom code for the - /// instruction. - /// - /// This method must only be called after the iterator returns `None`. - pub fn legalize(&self) -> Legalize { - debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()"); - self.legalize_actions[self.legalize as usize] - } - - /// Check if the `rpred` recipe predicate is satisfied. - fn check_recipe(&self, rpred: RecipePredicate) -> bool { - match rpred { - Some(p) => p(self.isa_preds, self.inst), - None => true, - } - } - - /// Check an instruction or isa predicate. - fn check_pred(&self, pred: usize) -> bool { - if let Some(&p) = self.inst_preds.get(pred) { - p(self.func, self.inst) - } else { - let pred = pred - self.inst_preds.len(); - self.isa_preds.test(pred) - } - } -} - -impl<'a> Iterator for Encodings<'a> { - type Item = Encoding; - - fn next(&mut self) -> Option { - while let Some(entryref) = self.enclist.get(self.offset) { - let entry = *entryref as usize; - - // Check for "recipe+bits". - let recipe = entry >> 1; - if let Some(&rpred) = self.recipe_preds.get(recipe) { - let bits = self.offset + 1; - if entry & 1 == 0 { - self.offset += 2; // Next entry. - } else { - self.offset = !0; // Stop. - } - if self.check_recipe(rpred) { - return Some(Encoding::new(recipe as u16, self.enclist[bits])); - } - continue; - } - - // Check for "stop with legalize". - if entry < PRED_START { - self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode; - self.offset = !0; // Stop. - return None; - } - - // Finally, this must be a predicate entry. - let pred_entry = entry - PRED_START; - let skip = pred_entry >> PRED_BITS; - let pred = pred_entry & PRED_MASK; - - if self.check_pred(pred) { - self.offset += 1; - } else if skip == 0 { - self.offset = !0; // Stop. - return None; - } else { - self.offset += 1 + skip; - } - } - None - } -} diff --git a/cranelift/codegen/src/isa/encoding.rs b/cranelift/codegen/src/isa/encoding.rs deleted file mode 100644 index 84001c5d36..0000000000 --- a/cranelift/codegen/src/isa/encoding.rs +++ /dev/null @@ -1,167 +0,0 @@ -//! The `Encoding` struct. - -use crate::binemit::CodeOffset; -use crate::ir::{Function, Inst}; -use crate::isa::constraints::{BranchRange, RecipeConstraints}; -use crate::regalloc::RegDiversions; -use core::fmt; - -#[cfg(feature = "enable-serde")] -use serde::{Deserialize, Serialize}; - -/// Bits needed to encode an instruction as binary machine code. -/// -/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and -/// encoding *bits*. The recipe determines the native instruction format and the mapping of -/// operands to encoded bits. The encoding bits provide additional information to the recipe, -/// typically parts of the opcode. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Encoding { - recipe: u16, - bits: u16, -} - -impl Encoding { - /// Create a new `Encoding` containing `(recipe, bits)`. - pub fn new(recipe: u16, bits: u16) -> Self { - Self { recipe, bits } - } - - /// Get the recipe number in this encoding. - pub fn recipe(self) -> usize { - self.recipe as usize - } - - /// Get the recipe-specific encoding bits. - pub fn bits(self) -> u16 { - self.bits - } - - /// Is this a legal encoding, or the default placeholder? - pub fn is_legal(self) -> bool { - self != Self::default() - } -} - -/// The default encoding is the illegal one. -impl Default for Encoding { - fn default() -> Self { - Self::new(0xffff, 0xffff) - } -} - -/// ISA-independent display of an encoding. -impl fmt::Display for Encoding { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_legal() { - write!(f, "{}#{:02x}", self.recipe, self.bits) - } else { - write!(f, "-") - } - } -} - -/// Temporary object that holds enough context to properly display an encoding. -/// This is meant to be created by `EncInfo::display()`. -pub struct DisplayEncoding { - pub encoding: Encoding, - pub recipe_names: &'static [&'static str], -} - -impl fmt::Display for DisplayEncoding { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.encoding.is_legal() { - write!( - f, - "{}#{:02x}", - self.recipe_names[self.encoding.recipe()], - self.encoding.bits - ) - } else { - write!(f, "-") - } - } -} - -type SizeCalculatorFn = fn(&RecipeSizing, Encoding, Inst, &RegDiversions, &Function) -> u8; - -/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most -/// encodings; others can be variable and longer than this base size, depending on the registers -/// they're using and use a different function, specific per platform. -pub fn base_size( - sizing: &RecipeSizing, - _: Encoding, - _: Inst, - _: &RegDiversions, - _: &Function, -) -> u8 { - sizing.base_size -} - -/// Code size information for an encoding recipe. -/// -/// Encoding recipes may have runtime-determined instruction size. -pub struct RecipeSizing { - /// Minimum size in bytes of instructions encoded with this recipe. - pub base_size: u8, - - /// Method computing the instruction's real size, given inputs and outputs. - pub compute_size: SizeCalculatorFn, - - /// Allowed branch range in this recipe, if any. - /// - /// All encoding recipes for branches have exact branch range information. - pub branch_range: Option, -} - -/// Information about all the encodings in this ISA. -#[derive(Clone)] -pub struct EncInfo { - /// Constraints on value operands per recipe. - pub constraints: &'static [RecipeConstraints], - - /// Code size information per recipe. - pub sizing: &'static [RecipeSizing], - - /// Names of encoding recipes. - pub names: &'static [&'static str], -} - -impl EncInfo { - /// Get the value operand constraints for `enc` if it is a legal encoding. - pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> { - self.constraints.get(enc.recipe()) - } - - /// Create an object that can display an ISA-dependent encoding properly. - pub fn display(&self, enc: Encoding) -> DisplayEncoding { - DisplayEncoding { - encoding: enc, - recipe_names: self.names, - } - } - - /// Get the size in bytes of `inst`, if it were encoded with `enc`. - /// - /// Returns 0 for illegal encodings. - pub fn byte_size( - &self, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, - ) -> CodeOffset { - self.sizing.get(enc.recipe()).map_or(0, |s| { - let compute_size = s.compute_size; - CodeOffset::from(compute_size(&s, enc, inst, divert, func)) - }) - } - - /// Get the branch range that is supported by `enc`, if any. - /// - /// This will never return `None` for a legal branch encoding. - pub fn branch_range(&self, enc: Encoding) -> Option { - self.sizing.get(enc.recipe()).and_then(|s| s.branch_range) - } -} diff --git a/cranelift/codegen/src/isa/legacy/mod.rs b/cranelift/codegen/src/isa/legacy/mod.rs deleted file mode 100644 index a89230f941..0000000000 --- a/cranelift/codegen/src/isa/legacy/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -//! Legacy ("old-style") backends that will be removed in the future. - -// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both -// included whenever building with x86 support. The new backend is the default, -// but the old can be requested with `BackendVariant::Legacy`. However, if this -// crate is built with the `old-x86-backend` feature, then the old backend is -// default instead. -#[cfg(feature = "x86")] -pub(crate) mod x86; - -#[cfg(feature = "riscv")] -pub(crate) mod riscv; diff --git a/cranelift/codegen/src/isa/legacy/riscv/abi.rs b/cranelift/codegen/src/isa/legacy/riscv/abi.rs deleted file mode 100644 index 44c5f36afe..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/abi.rs +++ /dev/null @@ -1,149 +0,0 @@ -//! RISC-V ABI implementation. -//! -//! This module implements the RISC-V calling convention through the primary `legalize_signature()` -//! entry point. -//! -//! This doesn't support the soft-float ABI at the moment. - -use super::registers::{FPR, GPR}; -use super::settings; -use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; -use crate::ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type}; -use crate::isa::RegClass; -use crate::regalloc::RegisterSet; -use alloc::borrow::Cow; -use core::i32; -use target_lexicon::Triple; - -struct Args { - pointer_bits: u8, - pointer_bytes: u8, - pointer_type: Type, - regs: u32, - reg_limit: u32, - offset: u32, -} - -impl Args { - fn new(bits: u8, enable_e: bool) -> Self { - Self { - pointer_bits: bits, - pointer_bytes: bits / 8, - pointer_type: Type::int(u16::from(bits)).unwrap(), - regs: 0, - reg_limit: if enable_e { 6 } else { 8 }, - offset: 0, - } - } -} - -impl ArgAssigner for Args { - fn assign(&mut self, arg: &AbiParam) -> ArgAction { - fn align(value: u32, to: u32) -> u32 { - (value + to - 1) & !(to - 1) - } - - let ty = arg.value_type; - - // Check for a legal type. - // RISC-V doesn't have SIMD at all, so break all vectors down. - if ty.is_vector() { - return ValueConversion::VectorSplit.into(); - } - - // Large integers and booleans are broken down to fit in a register. - if !ty.is_float() && ty.bits() > u16::from(self.pointer_bits) { - // Align registers and stack to a multiple of two pointers. - self.regs = align(self.regs, 2); - self.offset = align(self.offset, 2 * u32::from(self.pointer_bytes)); - return ValueConversion::IntSplit.into(); - } - - // Small integers are extended to the size of a pointer register. - if ty.is_int() && ty.bits() < u16::from(self.pointer_bits) { - match arg.extension { - ArgumentExtension::None => {} - ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), - ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), - } - } - - if self.regs < self.reg_limit { - // Assign to a register. - let reg = if ty.is_float() { - FPR.unit(10 + self.regs as usize) - } else { - GPR.unit(10 + self.regs as usize) - }; - self.regs += 1; - ArgumentLoc::Reg(reg).into() - } else { - // Assign a stack location. - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += u32::from(self.pointer_bytes); - debug_assert!(self.offset <= i32::MAX as u32); - loc.into() - } - } -} - -/// Legalize `sig` for RISC-V. -pub fn legalize_signature( - sig: &mut Cow, - triple: &Triple, - isa_flags: &settings::Flags, - current: bool, -) { - let bits = triple.pointer_width().unwrap().bits(); - - let mut args = Args::new(bits, isa_flags.enable_e()); - if let Some(new_params) = legalize_args(&sig.params, &mut args) { - sig.to_mut().params = new_params; - } - - let mut rets = Args::new(bits, isa_flags.enable_e()); - if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { - sig.to_mut().returns = new_returns; - } - - if current { - let ptr = Type::int(u16::from(bits)).unwrap(); - - // Add the link register as an argument and return value. - // - // The `jalr` instruction implementing a return can technically accept the return address - // in any register, but a micro-architecture with a return address predictor will only - // recognize it as a return if the address is in `x1`. - let link = AbiParam::special_reg(ptr, ArgumentPurpose::Link, GPR.unit(1)); - sig.to_mut().params.push(link); - sig.to_mut().returns.push(link); - } -} - -/// Get register class for a type appearing in a legalized signature. -pub fn regclass_for_abi_type(ty: Type) -> RegClass { - if ty.is_float() { - FPR - } else { - GPR - } -} - -pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet { - let mut regs = RegisterSet::new(); - regs.take(GPR, GPR.unit(0)); // Hard-wired 0. - // %x1 is the link register which is available for allocation. - regs.take(GPR, GPR.unit(2)); // Stack pointer. - regs.take(GPR, GPR.unit(3)); // Global pointer. - regs.take(GPR, GPR.unit(4)); // Thread pointer. - // TODO: %x8 is the frame pointer. Reserve it? - - // Remove %x16 and up for RV32E. - if isa_flags.enable_e() { - for u in 16..32 { - regs.take(GPR, GPR.unit(u)); - } - } - - regs -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/binemit.rs b/cranelift/codegen/src/isa/legacy/riscv/binemit.rs deleted file mode 100644 index a1d2b82e12..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/binemit.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Emitting binary RISC-V machine code. - -use crate::binemit::{bad_encoding, CodeSink, Reloc}; -use crate::ir::{Function, Inst, InstructionData}; -use crate::isa::{RegUnit, StackBaseMask, StackRef, TargetIsa}; -use crate::predicates::is_signed_int; -use crate::regalloc::RegDiversions; -use core::u32; - -include!(concat!(env!("OUT_DIR"), "/binemit-riscv.rs")); - -/// R-type instructions. -/// -/// 31 24 19 14 11 6 -/// funct7 rs2 rs1 funct3 rd opcode -/// 25 20 15 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`. -fn put_r(bits: u16, rs1: RegUnit, rs2: RegUnit, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let funct7 = (bits >> 8) & 0x7f; - let rs1 = u32::from(rs1) & 0x1f; - let rs2 = u32::from(rs2) & 0x1f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= funct3 << 12; - i |= rs1 << 15; - i |= rs2 << 20; - i |= funct7 << 25; - - sink.put4(i); -} - -/// R-type instructions with a shift amount instead of rs2. -/// -/// 31 25 19 14 11 6 -/// funct7 shamt rs1 funct3 rd opcode -/// 25 20 15 12 7 0 -/// -/// Both funct7 and shamt contribute to bit 25. In RV64, shamt uses it for shifts > 31. -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5) | (funct7 << 8)`. -fn put_rshamt( - bits: u16, - rs1: RegUnit, - shamt: i64, - rd: RegUnit, - sink: &mut CS, -) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let funct7 = (bits >> 8) & 0x7f; - let rs1 = u32::from(rs1) & 0x1f; - let shamt = shamt as u32 & 0x3f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= funct3 << 12; - i |= rs1 << 15; - i |= shamt << 20; - i |= funct7 << 25; - - sink.put4(i); -} - -/// I-type instructions. -/// -/// 31 19 14 11 6 -/// imm rs1 funct3 rd opcode -/// 20 15 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5)` -fn put_i(bits: u16, rs1: RegUnit, imm: i64, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let rs1 = u32::from(rs1) & 0x1f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= funct3 << 12; - i |= rs1 << 15; - i |= (imm << 20) as u32; - - sink.put4(i); -} - -/// U-type instructions. -/// -/// 31 11 6 -/// imm rd opcode -/// 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5)` -fn put_u(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let rd = u32::from(rd) & 0x1f; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - i |= imm as u32 & 0xfffff000; - - sink.put4(i); -} - -/// SB-type branch instructions. -/// -/// 31 24 19 14 11 6 -/// imm rs2 rs1 funct3 imm opcode -/// 25 20 15 12 7 0 -/// -/// Encoding bits: `opcode[6:2] | (funct3 << 5)` -fn put_sb(bits: u16, imm: i64, rs1: RegUnit, rs2: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let funct3 = (bits >> 5) & 0x7; - let rs1 = u32::from(rs1) & 0x1f; - let rs2 = u32::from(rs2) & 0x1f; - - debug_assert!(is_signed_int(imm, 13, 1), "SB out of range {:#x}", imm); - let imm = imm as u32; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= funct3 << 12; - i |= rs1 << 15; - i |= rs2 << 20; - - // The displacement is completely hashed up. - i |= ((imm >> 11) & 0x1) << 7; - i |= ((imm >> 1) & 0xf) << 8; - i |= ((imm >> 5) & 0x3f) << 25; - i |= ((imm >> 12) & 0x1) << 31; - - sink.put4(i); -} - -/// UJ-type jump instructions. -/// -/// 31 11 6 -/// imm rd opcode -/// 12 7 0 -/// -/// Encoding bits: `opcode[6:2]` -fn put_uj(bits: u16, imm: i64, rd: RegUnit, sink: &mut CS) { - let bits = u32::from(bits); - let opcode5 = bits & 0x1f; - let rd = u32::from(rd) & 0x1f; - - debug_assert!(is_signed_int(imm, 21, 1), "UJ out of range {:#x}", imm); - let imm = imm as u32; - - // 0-6: opcode - let mut i = 0x3; - i |= opcode5 << 2; - i |= rd << 7; - - // The displacement is completely hashed up. - i |= imm & 0xff000; - i |= ((imm >> 11) & 0x1) << 20; - i |= ((imm >> 1) & 0x3ff) << 21; - i |= ((imm >> 20) & 0x1) << 31; - - sink.put4(i); -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs b/cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs deleted file mode 100644 index 76184ad727..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/enc_tables.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Encoding tables for RISC-V. - -use super::registers::*; -use crate::ir; -use crate::isa; -use crate::isa::constraints::*; -use crate::isa::enc_tables::*; -use crate::isa::encoding::{base_size, RecipeSizing}; -use crate::predicates; - -// Include the generated encoding tables: -// - `LEVEL1_RV32` -// - `LEVEL1_RV64` -// - `LEVEL2` -// - `ENCLIST` -// - `INFO` -include!(concat!(env!("OUT_DIR"), "/encoding-riscv.rs")); -include!(concat!(env!("OUT_DIR"), "/legalize-riscv.rs")); diff --git a/cranelift/codegen/src/isa/legacy/riscv/mod.rs b/cranelift/codegen/src/isa/legacy/riscv/mod.rs deleted file mode 100644 index 2c1ebf1c85..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/mod.rs +++ /dev/null @@ -1,304 +0,0 @@ -//! RISC-V Instruction Set Architecture. - -mod abi; -mod binemit; -mod enc_tables; -mod registers; -pub mod settings; - -use super::super::settings as shared_settings; -#[cfg(feature = "testing_hooks")] -use crate::binemit::CodeSink; -use crate::binemit::{emit_function, MemoryCodeSink}; -use crate::ir; -use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; -use crate::isa::Builder as IsaBuilder; -use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; -use crate::regalloc; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; -use core::any::Any; -use core::fmt; -use core::hash::{Hash, Hasher}; -use target_lexicon::{PointerWidth, Triple}; - -#[allow(dead_code)] -struct Isa { - triple: Triple, - shared_flags: shared_settings::Flags, - isa_flags: settings::Flags, - cpumode: &'static [shared_enc_tables::Level1Entry], -} - -/// Get an ISA builder for creating RISC-V targets. -pub fn isa_builder(triple: Triple) -> IsaBuilder { - IsaBuilder { - triple, - setup: settings::builder(), - constructor: isa_constructor, - } -} - -fn isa_constructor( - triple: Triple, - shared_flags: shared_settings::Flags, - builder: shared_settings::Builder, -) -> Box { - let level1 = match triple.pointer_width().unwrap() { - PointerWidth::U16 => panic!("16-bit RISC-V unrecognized"), - PointerWidth::U32 => &enc_tables::LEVEL1_RV32[..], - PointerWidth::U64 => &enc_tables::LEVEL1_RV64[..], - }; - Box::new(Isa { - triple, - isa_flags: settings::Flags::new(&shared_flags, builder), - shared_flags, - cpumode: level1, - }) -} - -impl TargetIsa for Isa { - fn name(&self) -> &'static str { - "riscv" - } - - fn triple(&self) -> &Triple { - &self.triple - } - - fn flags(&self) -> &shared_settings::Flags { - &self.shared_flags - } - - fn isa_flags(&self) -> Vec { - self.isa_flags.iter().collect() - } - - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.shared_flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - - fn register_info(&self) -> RegInfo { - registers::INFO.clone() - } - - fn encoding_info(&self) -> EncInfo { - enc_tables::INFO.clone() - } - - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - lookup_enclist( - ctrl_typevar, - inst, - func, - self.cpumode, - &enc_tables::LEVEL2[..], - &enc_tables::ENCLISTS[..], - &enc_tables::LEGALIZE_ACTIONS[..], - &enc_tables::RECIPE_PREDICATES[..], - &enc_tables::INST_PREDICATES[..], - self.isa_flags.predicate_view(), - ) - } - - fn legalize_signature(&self, sig: &mut Cow, current: bool) { - abi::legalize_signature(sig, &self.triple, &self.isa_flags, current) - } - - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { - abi::regclass_for_abi_type(ty) - } - - fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet { - abi::allocatable_registers(func, &self.isa_flags) - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn CodeSink, - ) { - binemit::emit_inst(func, inst, divert, sink, self) - } - - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { - emit_function(func, binemit::emit_inst, sink, self) - } - - fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { - unimplemented!() - } - - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { - unimplemented!() - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } -} - -#[cfg(test)] -mod tests { - use crate::ir::{immediates, types}; - use crate::ir::{Function, InstructionData, Opcode}; - use crate::isa; - use crate::settings::{self, Configurable}; - use alloc::string::{String, ToString}; - use core::str::FromStr; - use target_lexicon::triple; - - fn encstr(isa: &dyn isa::TargetIsa, enc: Result) -> String { - match enc { - Ok(e) => isa.encoding_info().display(e).to_string(), - Err(_) => "no encoding".to_string(), - } - } - - #[test] - fn test_64bitenc() { - let shared_builder = settings::builder(); - let shared_flags = settings::Flags::new(shared_builder); - let isa = isa::lookup(triple!("riscv64")) - .unwrap() - .finish(shared_flags); - - let mut func = Function::new(); - let block = func.dfg.make_block(); - let arg64 = func.dfg.append_block_param(block, types::I64); - let arg32 = func.dfg.append_block_param(block, types::I32); - - // Try to encode iadd_imm.i64 v1, -10. - let inst64 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10), - }; - - // ADDI is I/0b00100 - assert_eq!( - encstr(&*isa, isa.encode(&func, &inst64, types::I64)), - "Ii#04" - ); - - // Try to encode iadd_imm.i64 v1, -10000. - let inst64_large = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10000), - }; - - // Immediate is out of range for ADDI. - assert!(isa.encode(&func, &inst64_large, types::I64).is_err()); - - // Create an iadd_imm.i32 which is encodable in RV64. - let inst32 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg32, - imm: immediates::Imm64::new(10), - }; - - // ADDIW is I/0b00110 - assert_eq!( - encstr(&*isa, isa.encode(&func, &inst32, types::I32)), - "Ii#06" - ); - } - - // Same as above, but for RV32. - #[test] - fn test_32bitenc() { - let shared_builder = settings::builder(); - let shared_flags = settings::Flags::new(shared_builder); - let isa = isa::lookup(triple!("riscv32")) - .unwrap() - .finish(shared_flags); - - let mut func = Function::new(); - let block = func.dfg.make_block(); - let arg64 = func.dfg.append_block_param(block, types::I64); - let arg32 = func.dfg.append_block_param(block, types::I32); - - // Try to encode iadd_imm.i64 v1, -10. - let inst64 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10), - }; - - // In 32-bit mode, an i64 bit add should be narrowed. - assert!(isa.encode(&func, &inst64, types::I64).is_err()); - - // Try to encode iadd_imm.i64 v1, -10000. - let inst64_large = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg64, - imm: immediates::Imm64::new(-10000), - }; - - // In 32-bit mode, an i64 bit add should be narrowed. - assert!(isa.encode(&func, &inst64_large, types::I64).is_err()); - - // Create an iadd_imm.i32 which is encodable in RV32. - let inst32 = InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg: arg32, - imm: immediates::Imm64::new(10), - }; - - // ADDI is I/0b00100 - assert_eq!( - encstr(&*isa, isa.encode(&func, &inst32, types::I32)), - "Ii#04" - ); - - // Create an imul.i32 which is encodable in RV32, but only when use_m is true. - let mul32 = InstructionData::Binary { - opcode: Opcode::Imul, - args: [arg32, arg32], - }; - - assert!(isa.encode(&func, &mul32, types::I32).is_err()); - } - - #[test] - fn test_rv32m() { - let shared_builder = settings::builder(); - let shared_flags = settings::Flags::new(shared_builder); - - // Set the supports_m stting which in turn enables the use_m predicate that unlocks - // encodings for imul. - let mut isa_builder = isa::lookup(triple!("riscv32")).unwrap(); - isa_builder.enable("supports_m").unwrap(); - - let isa = isa_builder.finish(shared_flags); - - let mut func = Function::new(); - let block = func.dfg.make_block(); - let arg32 = func.dfg.append_block_param(block, types::I32); - - // Create an imul.i32 which is encodable in RV32M. - let mul32 = InstructionData::Binary { - opcode: Opcode::Imul, - args: [arg32, arg32], - }; - assert_eq!( - encstr(&*isa, isa.encode(&func, &mul32, types::I32)), - "R#10c" - ); - } -} - -impl fmt::Display for Isa { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}\n{}", self.shared_flags, self.isa_flags) - } -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/registers.rs b/cranelift/codegen/src/isa/legacy/riscv/registers.rs deleted file mode 100644 index 9043b7f65f..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/registers.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! RISC-V register descriptions. - -use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; - -include!(concat!(env!("OUT_DIR"), "/registers-riscv.rs")); - -#[cfg(test)] -mod tests { - use super::{FPR, GPR, INFO}; - use crate::isa::RegUnit; - use alloc::string::{String, ToString}; - - #[test] - fn unit_encodings() { - assert_eq!(INFO.parse_regunit("x0"), Some(0)); - assert_eq!(INFO.parse_regunit("x31"), Some(31)); - assert_eq!(INFO.parse_regunit("f0"), Some(32)); - assert_eq!(INFO.parse_regunit("f31"), Some(63)); - - assert_eq!(INFO.parse_regunit("x32"), None); - assert_eq!(INFO.parse_regunit("f32"), None); - } - - #[test] - fn unit_names() { - fn uname(ru: RegUnit) -> String { - INFO.display_regunit(ru).to_string() - } - - assert_eq!(uname(0), "%x0"); - assert_eq!(uname(1), "%x1"); - assert_eq!(uname(31), "%x31"); - assert_eq!(uname(32), "%f0"); - assert_eq!(uname(33), "%f1"); - assert_eq!(uname(63), "%f31"); - assert_eq!(uname(64), "%INVALID64"); - } - - #[test] - fn classes() { - assert!(GPR.contains(GPR.unit(0))); - assert!(GPR.contains(GPR.unit(31))); - assert!(!FPR.contains(GPR.unit(0))); - assert!(!FPR.contains(GPR.unit(31))); - assert!(!GPR.contains(FPR.unit(0))); - assert!(!GPR.contains(FPR.unit(31))); - assert!(FPR.contains(FPR.unit(0))); - assert!(FPR.contains(FPR.unit(31))); - } -} diff --git a/cranelift/codegen/src/isa/legacy/riscv/settings.rs b/cranelift/codegen/src/isa/legacy/riscv/settings.rs deleted file mode 100644 index 3da9f491fd..0000000000 --- a/cranelift/codegen/src/isa/legacy/riscv/settings.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! RISC-V Settings. - -use crate::settings::{self, detail, Builder, Value}; -use core::fmt; - -// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a -// public `Flags` struct with an impl for all of the settings defined in -// `cranelift-codegen/meta/src/isa/riscv/mod.rs`. -include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs")); - -#[cfg(test)] -mod tests { - use super::{builder, Flags}; - use crate::settings::{self, Configurable}; - use alloc::string::ToString; - - #[test] - fn display_default() { - let shared = settings::Flags::new(settings::builder()); - let b = builder(); - let f = Flags::new(&shared, b); - assert_eq!( - f.to_string(), - "[riscv]\n\ - supports_m = false\n\ - supports_a = false\n\ - supports_f = false\n\ - supports_d = false\n\ - enable_m = true\n\ - enable_e = false\n" - ); - // Predicates are not part of the Display output. - assert_eq!(f.full_float(), false); - } - - #[test] - fn predicates() { - let mut sb = settings::builder(); - sb.set("enable_simd", "true").unwrap(); - let shared = settings::Flags::new(sb); - let mut b = builder(); - b.enable("supports_f").unwrap(); - b.enable("supports_d").unwrap(); - let f = Flags::new(&shared, b); - assert_eq!(f.full_float(), true); - - let mut sb = settings::builder(); - sb.set("enable_simd", "false").unwrap(); - let shared = settings::Flags::new(sb); - let mut b = builder(); - b.enable("supports_f").unwrap(); - b.enable("supports_d").unwrap(); - let f = Flags::new(&shared, b); - assert_eq!(f.full_float(), false); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/abi.rs b/cranelift/codegen/src/isa/legacy/x86/abi.rs deleted file mode 100644 index 934cfec4dd..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/abi.rs +++ /dev/null @@ -1,1102 +0,0 @@ -//! x86 ABI implementation. - -use super::super::super::settings as shared_settings; -use super::registers::{FPR, GPR, RU}; -use super::settings as isa_settings; -use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; -use crate::cursor::{Cursor, CursorPosition, EncCursor}; -use crate::ir; -use crate::ir::immediates::Imm64; -use crate::ir::stackslot::{StackOffset, StackSize}; -use crate::ir::types; -use crate::ir::{ - get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, - ValueLoc, -}; -use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa}; -use crate::regalloc::RegisterSet; -use crate::result::CodegenResult; -use crate::stack_layout::layout_stack; -use alloc::borrow::Cow; -use core::i32; -use target_lexicon::{PointerWidth, Triple}; - -/// Argument registers for x86-64 -static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9]; - -/// Return value registers. -static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx]; - -/// Argument registers for x86-64, when using windows fastcall -static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9]; - -/// Return value registers for x86-64, when using windows fastcall -static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax]; - -/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used -/// by the callee for temporary values. -/// -/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow -/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually -/// used by the callee to save & restore the values of the arguments. -/// -/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling -/// convention reserves spill space for parameters, you don’t have to use them as such" -const WIN_SHADOW_STACK_SPACE: StackSize = 32; - -/// Stack alignment requirement for functions. -/// -/// 16 bytes is the perfect stack alignment, because: -/// -/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which -/// are aligned to 16 bytes in order to aid performance". -/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a -/// 16-byte aligned stack pointer. -/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located -/// higher up in the stack. -const STACK_ALIGNMENT: u32 = 16; - -#[derive(Clone)] -struct Args { - pointer_bytes: u8, - pointer_bits: u8, - pointer_type: ir::Type, - gpr: &'static [RU], - gpr_used: usize, - fpr_limit: usize, - fpr_used: usize, - offset: u32, - call_conv: CallConv, - shared_flags: shared_settings::Flags, - #[allow(dead_code)] - isa_flags: isa_settings::Flags, - assigning_returns: bool, -} - -impl Args { - fn new( - bits: u8, - gpr: &'static [RU], - fpr_limit: usize, - call_conv: CallConv, - shared_flags: &shared_settings::Flags, - isa_flags: &isa_settings::Flags, - assigning_returns: bool, - ) -> Self { - let offset = if call_conv.extends_windows_fastcall() { - WIN_SHADOW_STACK_SPACE - } else { - 0 - }; - - Self { - pointer_bytes: bits / 8, - pointer_bits: bits, - pointer_type: ir::Type::int(u16::from(bits)).unwrap(), - gpr, - gpr_used: 0, - fpr_limit, - fpr_used: 0, - offset, - call_conv, - shared_flags: shared_flags.clone(), - isa_flags: isa_flags.clone(), - assigning_returns, - } - } -} - -impl ArgAssigner for Args { - fn assign(&mut self, arg: &AbiParam) -> ArgAction { - if let ArgumentPurpose::StructArgument(size) = arg.purpose { - if self.call_conv != CallConv::SystemV { - panic!( - "The sarg argument purpose is not yet implemented for non-systemv call conv {:?}", - self.call_conv, - ); - } - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += size; - debug_assert!(self.offset <= i32::MAX as u32); - return ArgAction::AssignAndChangeType(loc, types::SARG_T); - } - - let ty = arg.value_type; - - if ty.bits() > u16::from(self.pointer_bits) { - if !self.assigning_returns && self.call_conv.extends_windows_fastcall() { - // "Any argument that doesn't fit in 8 bytes, or isn't - // 1, 2, 4, or 8 bytes, must be passed by reference" - return ValueConversion::Pointer(self.pointer_type).into(); - } else if !ty.is_vector() && !ty.is_float() { - // On SystemV large integers and booleans are broken down to fit in a register. - return ValueConversion::IntSplit.into(); - } - } - - // Vectors should stay in vector registers unless SIMD is not enabled--then they are split - if ty.is_vector() { - if self.shared_flags.enable_simd() { - let reg = FPR.unit(self.fpr_used); - self.fpr_used += 1; - return ArgumentLoc::Reg(reg).into(); - } - return ValueConversion::VectorSplit.into(); - } - - // Small integers are extended to the size of a pointer register, but - // only in ABIs that require this. The Baldrdash (SpiderMonkey) ABI - // does, but our other supported ABIs on x86 do not. - if ty.is_int() - && ty.bits() < u16::from(self.pointer_bits) - && self.call_conv.extends_baldrdash() - { - match arg.extension { - ArgumentExtension::None => {} - ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), - ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), - } - } - - // Handle special-purpose arguments. - if ty.is_int() && self.call_conv.extends_baldrdash() { - match arg.purpose { - // This is SpiderMonkey's `WasmTlsReg`. - ArgumentPurpose::VMContext => { - return ArgumentLoc::Reg(if self.pointer_bits == 64 { - RU::r14 - } else { - RU::rsi - } as RegUnit) - .into(); - } - // This is SpiderMonkey's `WasmTableCallSigReg`. - ArgumentPurpose::SignatureId => { - return ArgumentLoc::Reg(if self.pointer_bits == 64 { - RU::r10 - } else { - RU::rcx - } as RegUnit) - .into() - } - _ => {} - } - } - - // Try to use a GPR. - if !ty.is_float() && self.gpr_used < self.gpr.len() { - let reg = self.gpr[self.gpr_used] as RegUnit; - self.gpr_used += 1; - return ArgumentLoc::Reg(reg).into(); - } - - // Try to use an FPR. - let fpr_offset = if self.call_conv.extends_windows_fastcall() { - // Float and general registers on windows share the same parameter index. - // The used register depends entirely on the parameter index: Even if XMM0 - // is not used for the first parameter, it cannot be used for the second parameter. - debug_assert_eq!(self.fpr_limit, self.gpr.len()); - &mut self.gpr_used - } else { - &mut self.fpr_used - }; - - if ty.is_float() && *fpr_offset < self.fpr_limit { - let reg = FPR.unit(*fpr_offset); - *fpr_offset += 1; - return ArgumentLoc::Reg(reg).into(); - } - - // Assign a stack location. - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += u32::from(self.pointer_bytes); - debug_assert!(self.offset <= i32::MAX as u32); - loc.into() - } -} - -/// Legalize `sig`. -pub fn legalize_signature( - sig: &mut Cow, - triple: &Triple, - _current: bool, - shared_flags: &shared_settings::Flags, - isa_flags: &isa_settings::Flags, -) { - let bits; - let mut args; - - match triple.pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => { - bits = 32; - args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false); - } - PointerWidth::U64 => { - bits = 64; - args = if sig.call_conv.extends_windows_fastcall() { - Args::new( - bits, - &ARG_GPRS_WIN_FASTCALL_X64[..], - 4, - sig.call_conv, - shared_flags, - isa_flags, - false, - ) - } else { - Args::new( - bits, - &ARG_GPRS[..], - 8, - sig.call_conv, - shared_flags, - isa_flags, - false, - ) - }; - } - } - - let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() { - // windows-x64 calling convention only uses XMM0 or RAX for return values - (&RET_GPRS_WIN_FASTCALL_X64[..], 1) - } else { - (&RET_GPRS[..], 2) - }; - - let mut rets = Args::new( - bits, - ret_regs, - ret_fpr_limit, - sig.call_conv, - shared_flags, - isa_flags, - true, - ); - - // If we don't have enough available return registers - // to fit all of the return values, we need to backtrack and start - // assigning locations all over again with a different strategy. In order to - // do that, we need a copy of the original assigner for the returns. - let mut backup_rets = rets.clone(); - - if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { - if new_returns - .iter() - .filter(|r| r.purpose == ArgumentPurpose::Normal) - .any(|r| !r.location.is_reg()) - { - // The return values couldn't all fit into available return - // registers. Introduce the use of a struct-return parameter. - debug_assert!(!sig.uses_struct_return_param()); - - // We're using the first register for the return pointer parameter. - let mut ret_ptr_param = AbiParam { - value_type: args.pointer_type, - purpose: ArgumentPurpose::StructReturn, - extension: ArgumentExtension::None, - location: ArgumentLoc::Unassigned, - legalized_to_pointer: false, - }; - match args.assign(&ret_ptr_param) { - ArgAction::Assign(ArgumentLoc::Reg(reg)) => { - ret_ptr_param.location = ArgumentLoc::Reg(reg); - sig.to_mut().params.push(ret_ptr_param); - } - _ => unreachable!("return pointer should always get a register assignment"), - } - - // We're using the first return register for the return pointer (like - // sys v does). - let mut ret_ptr_return = AbiParam { - value_type: args.pointer_type, - purpose: ArgumentPurpose::StructReturn, - extension: ArgumentExtension::None, - location: ArgumentLoc::Unassigned, - legalized_to_pointer: false, - }; - match backup_rets.assign(&ret_ptr_return) { - ArgAction::Assign(ArgumentLoc::Reg(reg)) => { - ret_ptr_return.location = ArgumentLoc::Reg(reg); - sig.to_mut().returns.push(ret_ptr_return); - } - _ => unreachable!("return pointer should always get a register assignment"), - } - - sig.to_mut().returns.retain(|ret| { - // Either this is the return pointer, in which case we want to keep - // it, or else assume that it is assigned for a reason and doesn't - // conflict with our return pointering legalization. - debug_assert_eq!( - ret.location.is_assigned(), - ret.purpose != ArgumentPurpose::Normal - ); - ret.location.is_assigned() - }); - - if let Some(new_returns) = legalize_args(&sig.returns, &mut backup_rets) { - sig.to_mut().returns = new_returns; - } - } else { - sig.to_mut().returns = new_returns; - } - } - - if let Some(new_params) = legalize_args(&sig.params, &mut args) { - sig.to_mut().params = new_params; - } -} - -/// Get register class for a type appearing in a legalized signature. -pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass { - if ty.is_int() || ty.is_bool() || ty.is_ref() { - GPR - } else { - FPR - } -} - -/// Get the set of allocatable registers for `func`. -pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet { - let mut regs = RegisterSet::new(); - regs.take(GPR, RU::rsp as RegUnit); - regs.take(GPR, RU::rbp as RegUnit); - - // 32-bit arch only has 8 registers. - if triple.pointer_width().unwrap() != PointerWidth::U64 { - for i in 8..16 { - regs.take(GPR, GPR.unit(i)); - regs.take(FPR, FPR.unit(i)); - } - if flags.enable_pinned_reg() { - unimplemented!("Pinned register not implemented on x86-32."); - } - } else { - // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and - // isn't the fixed output register of any instruction. - if flags.enable_pinned_reg() { - regs.take(GPR, RU::r15 as RegUnit); - } - } - - regs -} - -/// Get the set of callee-saved general-purpose registers. -fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are - // considered nonvolatile and must be saved and restored by a function that uses - // them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention - // RSP & RBP are not listed below, since they are restored automatically during - // a function call. If that wasn't the case, function calls (RET) would not work. - &[ - RU::rbx, - RU::rdi, - RU::rsi, - RU::r12, - RU::r13, - RU::r14, - RU::r15, - ] - } else { - &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15] - } - } - } -} - -/// Get the set of callee-saved floating-point (SIMD) registers. -fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved - // and restored by a function that uses them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of - // February 5th, 2020. - &[ - RU::xmm6, - RU::xmm7, - RU::xmm8, - RU::xmm9, - RU::xmm10, - RU::xmm11, - RU::xmm12, - RU::xmm13, - RU::xmm14, - RU::xmm15, - ] - } else { - &[] - } - } - } -} - -/// Get the set of callee-saved registers that are used. -fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet { - let mut all_callee_saved = RegisterSet::empty(); - for reg in callee_saved_gprs(isa, func.signature.call_conv) { - all_callee_saved.free(GPR, *reg as RegUnit); - } - for reg in callee_saved_fprs(isa, func.signature.call_conv) { - all_callee_saved.free(FPR, *reg as RegUnit); - } - - let mut used = RegisterSet::empty(); - for value_loc in func.locations.values() { - // Note that `value_loc` here contains only a single unit of a potentially multi-unit - // register. We don't use registers that overlap each other in the x86 ISA, but in others - // we do. So this should not be blindly reused. - if let ValueLoc::Reg(ru) = *value_loc { - if GPR.contains(ru) { - if !used.is_avail(GPR, ru) { - used.free(GPR, ru); - } - } else if FPR.contains(ru) { - if !used.is_avail(FPR, ru) { - used.free(FPR, ru); - } - } - } - } - - // regmove and regfill instructions may temporarily divert values into other registers, - // and these are not reflected in `func.locations`. Scan the function for such instructions - // and note which callee-saved registers they use. - // - // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible - // to avoid this step. - for block in &func.layout { - for inst in func.layout.block_insts(block) { - match func.dfg[inst] { - ir::instructions::InstructionData::RegMove { dst, .. } - | ir::instructions::InstructionData::RegFill { dst, .. } => { - if GPR.contains(dst) { - if !used.is_avail(GPR, dst) { - used.free(GPR, dst); - } - } else if FPR.contains(dst) { - if !used.is_avail(FPR, dst) { - used.free(FPR, dst); - } - } - } - _ => (), - } - } - } - - used.intersect(&all_callee_saved); - used -} - -pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - match func.signature.call_conv { - // For now, just translate fast and cold as system_v. - CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => { - system_v_prologue_epilogue(func, isa) - } - CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => { - fastcall_prologue_epilogue(func, isa) - } - CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => { - baldrdash_prologue_epilogue(func, isa) - } - CallConv::Probestack => unimplemented!("probestack calling convention"), - CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"), - CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(), - } -} - -fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - debug_assert!( - !isa.flags().enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - - let word_size = StackSize::from(isa.pointer_bytes()); - let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() { - WIN_SHADOW_STACK_SPACE - } else { - 0 - }; - - let bytes = - StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size; - - let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); - ss.offset = Some(-(bytes as StackOffset)); - func.stack_slots.push(ss); - - let is_leaf = func.is_leaf(); - layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?; - Ok(()) -} - -/// Implementation of the fastcall-based Win64 calling convention described at [1] -/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention -fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - if isa.triple().pointer_width().unwrap() != PointerWidth::U64 { - panic!("TODO: windows-fastcall: x86-32 not implemented yet"); - } - - // The reserved stack area is composed of: - // return address + frame pointer + all callee-saved registers - // - // Pushing the return address is an implicit function of the `call` - // instruction. Each of the others we will then push explicitly. Then we - // will adjust the stack pointer to make room for the rest of the required - // space for this frame. - let csrs = callee_saved_regs_used(isa, func); - let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32; - let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32; - let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size; - - // FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed - if fpsr_stack_size > 0 { - csr_stack_size = (csr_stack_size + 15) & !15; - } - - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::IncomingArg, - size: csr_stack_size, - offset: Some(-(csr_stack_size as StackOffset)), - }); - - let is_leaf = func.is_leaf(); - - // If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space - if !is_leaf { - // TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::ExplicitSlot, - size: WIN_SHADOW_STACK_SPACE, - offset: None, - }); - } - - let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; - - // Subtract the GPR saved register size from the local size because pushes are used for the saves - let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32); - - // Add CSRs to function signature - let reg_type = isa.pointer_type(); - let sp_arg_index = if fpsr_stack_size > 0 { - let sp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::CalleeSaved, - RU::rsp as RegUnit, - ); - let index = func.signature.params.len(); - func.signature.params.push(sp_arg); - Some(index) - } else { - None - }; - let fp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::FramePointer, - RU::rbp as RegUnit, - ); - func.signature.params.push(fp_arg); - func.signature.returns.push(fp_arg); - - for gp_csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - for fp_csr in csrs.iter(FPR) { - // The calling convention described in - // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires - // preserving the low 128 bits of XMM6-XMM15. - let csr_arg = - ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - // Set up the cursor and insert the prologue - let entry_block = func.layout.entry_block().expect("missing entry block"); - let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - sp_arg_index.is_some(), - isa, - ); - - // Reset the cursor and insert the epilogue - let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index); - - Ok(()) -} - -/// Insert a System V-compatible prologue and epilogue. -fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - let pointer_width = isa.triple().pointer_width().unwrap(); - let word_size = pointer_width.bytes() as usize; - - let csrs = callee_saved_regs_used(isa, func); - assert!( - csrs.iter(FPR).len() == 0, - "SysV ABI does not have callee-save SIMD registers" - ); - - // The reserved stack area is composed of: - // return address + frame pointer + all callee-saved registers - // - // Pushing the return address is an implicit function of the `call` - // instruction. Each of the others we will then push explicitly. Then we - // will adjust the stack pointer to make room for the rest of the required - // space for this frame. - let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32; - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::IncomingArg, - size: csr_stack_size as u32, - offset: Some(-csr_stack_size), - }); - - let is_leaf = func.is_leaf(); - let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; - let local_stack_size = i64::from(total_stack_size - csr_stack_size); - - // Add CSRs to function signature - let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap(); - // On X86-32 all parameters, including vmctx, are passed on stack, and we need - // to extract vmctx from the stack before we can save the frame pointer. - let sp_arg_index = if isa.pointer_bits() == 32 { - let sp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::CalleeSaved, - RU::rsp as RegUnit, - ); - let index = func.signature.params.len(); - func.signature.params.push(sp_arg); - Some(index) - } else { - None - }; - let fp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::FramePointer, - RU::rbp as RegUnit, - ); - func.signature.params.push(fp_arg); - func.signature.returns.push(fp_arg); - - for csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - // Set up the cursor and insert the prologue - let entry_block = func.layout.entry_block().expect("missing entry block"); - let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - sp_arg_index.is_some(), - isa, - ); - - // Reset the cursor and insert the epilogue - let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index); - - Ok(()) -} - -/// Insert the prologue for a given function. -/// This is used by common calling conventions such as System V. -fn insert_common_prologue( - pos: &mut EncCursor, - stack_size: i64, - reg_type: ir::types::Type, - csrs: &RegisterSet, - has_sp_param: bool, - isa: &dyn TargetIsa, -) { - let sp = if has_sp_param { - let block = pos.current_block().expect("missing block under cursor"); - let sp = pos.func.dfg.append_block_param(block, reg_type); - pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit); - Some(sp) - } else { - None - }; - - // If this is a leaf function with zero stack, then there's no need to - // insert a stack check since it can't overflow anything and - // forward-progress is guarantee so long as loop are handled anyway. - // - // If this has a stack size it could stack overflow, or if it isn't a leaf - // it could be part of a long call chain which we need to check anyway. - // - // First we look for the stack limit as a special argument to the function, - // and failing that we see if a custom stack limit factory has been provided - // which will be used to likely calculate the stack limit from the arguments - // or perhaps constants. - if stack_size > 0 || !pos.func.is_leaf() { - let scratch = ir::ValueLoc::Reg(RU::rax as RegUnit); - let stack_limit_arg = match pos.func.special_param(ArgumentPurpose::StackLimit) { - Some(arg) => { - let copy = pos.ins().copy(arg); - pos.func.locations[copy] = scratch; - Some(copy) - } - None => pos - .func - .stack_limit - .map(|gv| interpret_gv(pos, gv, sp, scratch)), - }; - if let Some(stack_limit_arg) = stack_limit_arg { - insert_stack_check(pos, stack_size, stack_limit_arg); - } - } - - // Append param to entry block - let block = pos.current_block().expect("missing block under cursor"); - let fp = pos.func.dfg.append_block_param(block, reg_type); - pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - - pos.ins().x86_push(fp); - - let mov_sp_inst = pos - .ins() - .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit); - - let mut last_csr_push = None; - for reg in csrs.iter(GPR) { - // Append param to entry block - let csr_arg = pos.func.dfg.append_block_param(block, reg_type); - - // Assign it a location - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - last_csr_push = Some(pos.ins().x86_push(csr_arg)); - } - - // Allocate stack frame storage. - let mut adjust_sp_inst = None; - if stack_size > 0 { - if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2()) - { - // Emit a stack probe. - let rax = RU::rax as RegUnit; - let rax_val = ir::ValueLoc::Reg(rax); - - // The probestack function expects its input in %rax. - let arg = pos.ins().iconst(reg_type, stack_size); - pos.func.locations[arg] = rax_val; - - // Call the probestack function. - let callee = get_probestack_funcref(pos.func, reg_type, rax, isa); - - // Make the call. - let call = if !isa.flags().is_pic() - && isa.triple().pointer_width().unwrap() == PointerWidth::U64 - && !pos.func.dfg.ext_funcs[callee].colocated - { - // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect. - // Use r11 as it may be clobbered under all supported calling conventions. - let r11 = RU::r11 as RegUnit; - let sig = pos.func.dfg.ext_funcs[callee].signature; - let addr = pos.ins().func_addr(reg_type, callee); - pos.func.locations[addr] = ir::ValueLoc::Reg(r11); - pos.ins().call_indirect(sig, addr, &[arg]) - } else { - // Otherwise just do a normal call. - pos.ins().call(callee, &[arg]) - }; - - // If the probestack function doesn't adjust sp, do it ourselves. - if !isa.flags().probestack_func_adjusts_sp() { - let result = pos.func.dfg.inst_results(call)[0]; - pos.func.locations[result] = rax_val; - adjust_sp_inst = Some(pos.ins().adjust_sp_down(result)); - } - } else { - // Simply decrement the stack pointer. - adjust_sp_inst = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size))); - } - } - - // With the stack pointer adjusted, save any callee-saved floating point registers via offset - // FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes - let mut last_fpr_save = None; - - for (i, reg) in csrs.iter(FPR).enumerate() { - // Append param to entry block - let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2); - - // Since regalloc has already run, we must assign a location. - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - - // Offset to where the register is saved relative to RSP, accounting for FPR save alignment - let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64 - + (stack_size % types::F64X2.bytes() as i64); - - last_fpr_save = Some(pos.ins().store( - ir::MemFlags::trusted(), - csr_arg, - sp.expect("FPR save requires SP param"), - (stack_size - offset) as i32, - )); - } - - pos.func.prologue_end = Some( - last_fpr_save - .or(adjust_sp_inst) - .or(last_csr_push) - .unwrap_or(mov_sp_inst), - ); -} - -/// Inserts code necessary to calculate `gv`. -/// -/// Note that this is typically done with `ins().global_value(...)` but that -/// requires legalization to run to encode it, and we're running super late -/// here in the backend where legalization isn't possible. To get around this -/// we manually interpret the `gv` specified and do register allocation for -/// intermediate values. -/// -/// This is an incomplete implementation of loading `GlobalValue` values to get -/// compared to the stack pointer, but currently it serves enough functionality -/// to get this implemented in `wasmtime` itself. This'll likely get expanded a -/// bit over time! -fn interpret_gv( - pos: &mut EncCursor, - gv: ir::GlobalValue, - sp: Option, - scratch: ir::ValueLoc, -) -> ir::Value { - match pos.func.global_values[gv] { - ir::GlobalValueData::VMContext => { - let vmctx_index = pos - .func - .signature - .special_param_index(ir::ArgumentPurpose::VMContext) - .expect("no vmcontext parameter found"); - match pos.func.signature.params[vmctx_index] { - AbiParam { - location: ArgumentLoc::Reg(_), - .. - } => { - let entry = pos.func.layout.entry_block().unwrap(); - pos.func.dfg.block_params(entry)[vmctx_index] - } - AbiParam { - location: ArgumentLoc::Stack(offset), - value_type, - .. - } => { - let offset = - offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8)); - // The following access can be marked `trusted` because it is a load of an argument. We - // know it is safe because it was safe to write it in preparing this function call. - let ret = - pos.ins() - .load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset); - pos.func.locations[ret] = scratch; - return ret; - } - AbiParam { - location: ArgumentLoc::Unassigned, - .. - } => unreachable!(), - } - } - ir::GlobalValueData::Load { - base, - offset, - global_type, - readonly: _, - } => { - let base = interpret_gv(pos, base, sp, scratch); - let ret = pos - .ins() - .load(global_type, ir::MemFlags::trusted(), base, offset); - pos.func.locations[ret] = scratch; - return ret; - } - ref other => panic!("global value for stack limit not supported: {}", other), - } -} - -/// Insert a check that generates a trap if the stack pointer goes -/// below a value in `stack_limit_arg`. -fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) { - use crate::ir::condcodes::IntCC; - - // Our stack pointer, after subtracting `stack_size`, must not be below - // `stack_limit_arg`. To do this we're going to add `stack_size` to - // `stack_limit_arg` and see if the stack pointer is below that. The - // `stack_size + stack_limit_arg` computation might overflow, however, due - // to how stack limits may be loaded and set externally to trigger a trap. - // - // To handle this we'll need an extra comparison to see if the stack - // pointer is already below `stack_limit_arg`. Most of the time this - // isn't necessary though since the stack limit which triggers a trap is - // likely a sentinel somewhere around `usize::max_value()`. In that case - // only conditionally emit this pre-flight check. That way most functions - // only have the one comparison, but are also guaranteed that if we add - // `stack_size` to `stack_limit_arg` is won't overflow. - // - // This does mean that code generators which use this stack check - // functionality need to ensure that values stored into the stack limit - // will never overflow if this threshold is added. - if stack_size >= 32 * 1024 { - let cflags = pos.ins().ifcmp_sp(stack_limit_arg); - pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); - pos.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::StackOverflow, - ); - } - - // Copy `stack_limit_arg` into a %rax and use it for calculating - // a SP threshold. - let sp_threshold = pos.ins().iadd_imm(stack_limit_arg, stack_size); - pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit); - - // If the stack pointer currently reaches the SP threshold or below it then after opening - // the current stack frame, the current stack pointer will reach the limit. - let cflags = pos.ins().ifcmp_sp(sp_threshold); - pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); - pos.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::StackOverflow, - ); -} - -/// Find all `return` instructions and insert epilogues before them. -fn insert_common_epilogues( - pos: &mut EncCursor, - stack_size: i64, - reg_type: ir::types::Type, - csrs: &RegisterSet, - sp_arg_index: Option, -) { - while let Some(block) = pos.next_block() { - pos.goto_last_inst(block); - if let Some(inst) = pos.current_inst() { - if pos.func.dfg[inst].opcode().is_return() { - insert_common_epilogue(inst, block, stack_size, pos, reg_type, csrs, sp_arg_index); - } - } - } -} - -/// Insert an epilogue given a specific `return` instruction. -/// This is used by common calling conventions such as System V. -fn insert_common_epilogue( - inst: ir::Inst, - block: ir::Block, - stack_size: i64, - pos: &mut EncCursor, - reg_type: ir::types::Type, - csrs: &RegisterSet, - sp_arg_index: Option, -) { - // Insert the pop of the frame pointer - let fp_pop = pos.ins().x86_pop(reg_type); - let fp_pop_inst = pos.prev_inst().unwrap(); - pos.func.locations[fp_pop] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - pos.func.dfg.append_inst_arg(inst, fp_pop); - - // Insert the CSR pops - let mut first_csr_pop_inst = None; - for reg in csrs.iter(GPR) { - let csr_pop = pos.ins().x86_pop(reg_type); - first_csr_pop_inst = pos.prev_inst(); - assert!(first_csr_pop_inst.is_some()); - pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg); - pos.func.dfg.append_inst_arg(inst, csr_pop); - } - - // Insert the adjustment of SP - let mut sp_adjust_inst = None; - if stack_size > 0 { - pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); - sp_adjust_inst = pos.prev_inst(); - assert!(sp_adjust_inst.is_some()); - } - - let mut first_fpr_load = None; - if let Some(index) = sp_arg_index { - let sp = pos - .func - .dfg - .block_params(pos.func.layout.entry_block().unwrap())[index]; - - // Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads) - for (i, reg) in csrs.iter(FPR).enumerate() { - // Offset to where the register is saved relative to RSP, accounting for FPR save alignment - let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64 - + (stack_size % types::F64X2.bytes() as i64); - - let value = pos.ins().load( - types::F64X2, - ir::MemFlags::trusted(), - sp, - (stack_size - offset) as i32, - ); - - first_fpr_load.get_or_insert(pos.current_inst().expect("current inst")); - - pos.func.locations[value] = ir::ValueLoc::Reg(reg); - pos.func.dfg.append_inst_arg(inst, value); - } - } else { - assert!(csrs.iter(FPR).len() == 0); - } - - pos.func.epilogues_start.push(( - first_fpr_load - .or(sp_adjust_inst) - .or(first_csr_pop_inst) - .unwrap_or(fp_pop_inst), - block, - )); -} - -#[cfg(feature = "unwind")] -pub fn create_unwind_info( - func: &ir::Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - use crate::isa::unwind::UnwindInfo; - use crate::machinst::UnwindInfoKind; - - // Assumption: RBP is being used as the frame pointer for both calling conventions - // In the future, we should be omitting frame pointer as an optimization, so this will change - Ok(match isa.unwind_info_kind() { - UnwindInfoKind::SystemV => { - super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u)) - } - UnwindInfoKind::Windows => { - super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u)) - } - UnwindInfoKind::None => None, - }) -} diff --git a/cranelift/codegen/src/isa/legacy/x86/binemit.rs b/cranelift/codegen/src/isa/legacy/x86/binemit.rs deleted file mode 100644 index 0480873672..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/binemit.rs +++ /dev/null @@ -1,578 +0,0 @@ -//! Emitting binary x86 machine code. - -use super::enc_tables::{needs_offset, needs_sib_byte}; -use super::registers::RU; -use crate::binemit::{bad_encoding, CodeSink, Reloc}; -use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; -use crate::ir::{ - Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode, - TrapCode, -}; -use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa}; -use crate::regalloc::RegDiversions; -use cranelift_codegen_shared::isa::x86::EncodingBits; - -include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs")); - -// Convert a stack base to the corresponding register. -fn stk_base(base: StackBase) -> RegUnit { - let ru = match base { - StackBase::SP => RU::rsp, - StackBase::FP => RU::rbp, - StackBase::Zone => unimplemented!(), - }; - ru as RegUnit -} - -// Mandatory prefix bytes for Mp* opcodes. -const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2]; - -// Second byte for three-byte opcodes for mm=0b10 and mm=0b11. -const OP3_BYTE2: [u8; 2] = [0x38, 0x3a]; - -// A REX prefix with no bits set: 0b0100WRXB. -const BASE_REX: u8 = 0b0100_0000; - -// Create a single-register REX prefix, setting the B bit to bit 3 of the register. -// This is used for instructions that encode a register in the low 3 bits of the opcode and for -// instructions that use the ModR/M `reg` field for something else. -fn rex1(reg_b: RegUnit) -> u8 { - let b = ((reg_b >> 3) & 1) as u8; - BASE_REX | b -} - -// Create a dual-register REX prefix, setting: -// -// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. -// REX.R = bit 3 of reg register. -fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { - let b = ((rm >> 3) & 1) as u8; - let r = ((reg >> 3) & 1) as u8; - BASE_REX | b | (r << 2) -} - -// Create a three-register REX prefix, setting: -// -// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. -// REX.R = bit 3 of reg register. -// REX.X = bit 3 of SIB index register. -fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 { - let b = ((rm >> 3) & 1) as u8; - let r = ((reg >> 3) & 1) as u8; - let x = ((index >> 3) & 1) as u8; - BASE_REX | b | (x << 1) | (r << 2) -} - -/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1 -/// in the Intel Software Development Manual, volume 2A. These bits can be used by different -/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one. -fn evex2(rm: RegUnit, reg: RegUnit) -> u8 { - let b = (!(rm >> 3) & 1) as u8; - let x = (!(rm >> 4) & 1) as u8; - let r = (!(reg >> 3) & 1) as u8; - let r_ = (!(reg >> 4) & 1) as u8; - 0x00 | r_ | (b << 1) | (x << 2) | (r << 3) -} - -/// Determines whether a REX prefix should be emitted. A REX byte always has 0100 in bits 7:4; bits -/// 3:0 correspond to WRXB. W allows certain instructions to declare a 64-bit operand size; because -/// [needs_rex] is only used by [infer_rex] and we prevent [infer_rex] from using [w] in -/// [Template::build], we do not need to check again whether [w] forces an inferred REX prefix--it -/// always does and should be encoded like `.rex().w()`. The RXB are extension of ModR/M or SIB -/// fields; see section 2.2.1.2 in the Intel Software Development Manual. -#[inline] -fn needs_rex(rex: u8) -> bool { - rex != BASE_REX -} - -// Emit a REX prefix. -// -// The R, X, and B bits are computed from registers using the functions above. The W bit is -// extracted from `bits`. -fn rex_prefix(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(rex & 0xf8, BASE_REX); - let w = EncodingBits::from(bits).rex_w(); - sink.put1(rex | (w << 3)); -} - -// Emit a single-byte opcode with no REX prefix. -fn put_op1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding"); - sink.put1(bits as u8); -} - -// Emit a single-byte opcode with REX prefix. -fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*"); - rex_prefix(bits, rex, sink); - sink.put1(bits as u8); -} - -/// Emit a single-byte opcode with inferred REX prefix. -fn put_dynrexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*"); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(bits as u8); -} - -// Emit two-byte opcode: 0F XX -fn put_op2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding"); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit two-byte opcode: 0F XX with REX prefix. -fn put_rexop2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*"); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit two-byte opcode: 0F XX with inferred REX prefix. -fn put_dynrexop2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0f00, - 0x0400, - "Invalid encoding bits for DynRexOp2*" - ); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit single-byte opcode with mandatory prefix. -fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding"); - sink.put1(bits as u8); -} - -// Emit single-byte opcode with mandatory prefix and REX. -fn put_rexmp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(bits as u8); -} - -// Emit two-byte opcode (0F XX) with mandatory prefix. -fn put_mp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding"); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit two-byte opcode (0F XX) with mandatory prefix and REX. -fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX. -fn put_dynrexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0c00, - 0x0400, - "Invalid encoding bits for DynRexMp2*" - ); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix. -fn put_mp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX -fn put_rexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix. -fn put_dynrexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0800, - 0x0800, - "Invalid encoding bits for DynRexMp3*" - ); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in -/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be -/// used together for certain classes of instructions; i.e., special care should be taken to ensure -/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where -/// opcodes can result in an #UD. -#[allow(dead_code)] -enum EvexContext { - RoundingRegToRegFP { - rc: EvexRoundingControl, - }, - NoRoundingFP { - sae: bool, - length: EvexVectorLength, - }, - MemoryOp { - broadcast: bool, - length: EvexVectorLength, - }, - Other { - length: EvexVectorLength, - }, -} - -impl EvexContext { - /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1, - Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1, - Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1, - Self::Other { length } => length.bits() << 1, - } - } -} - -/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`. -#[allow(dead_code)] -enum EvexVectorLength { - V128, - V256, - V512, -} - -impl EvexVectorLength { - /// Encode the `L'` and `L` bits for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::V128 => 0b00, - Self::V256 => 0b01, - Self::V512 => 0b10, - // 0b11 is reserved (#UD). - } - } -} - -/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`. -#[allow(dead_code)] -enum EvexRoundingControl { - RNE, - RD, - RU, - RZ, -} - -impl EvexRoundingControl { - /// Encode the `L'` and `L` bits for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::RNE => 0b00, - Self::RD => 0b01, - Self::RU => 0b10, - Self::RZ => 0b11, - } - } -} - -/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel -/// Software Development Manual, volume 2A. -#[allow(dead_code)] -enum EvexMasking { - None, - Merging { k: u8 }, - Zeroing { k: u8 }, -} - -impl EvexMasking { - /// Encode the `z` bit for merging with the P2 byte. - fn z_bit(&self) -> u8 { - match self { - Self::None | Self::Merging { .. } => 0, - Self::Zeroing { .. } => 1, - } - } - - /// Encode the `aaa` bits for merging with the P2 byte. - fn aaa_bits(&self) -> u8 { - match self { - Self::None => 0b000, - Self::Merging { k } | Self::Zeroing { k } => { - debug_assert!(*k <= 7); - *k - } - } - } -} - -/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe -/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function -/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are -/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring -/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix). -fn put_evex( - bits: u16, - reg: RegUnit, - vvvvv: RegUnit, - rm: RegUnit, - context: EvexContext, - masking: EvexMasking, - sink: &mut CS, -) { - let enc = EncodingBits::from(bits); - - // EVEX prefix. - sink.put1(0x62); - - debug_assert!(enc.mm() < 0b100); - let mut p0 = enc.mm() & 0b11; - p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset - sink.put1(p0); - - let mut p1 = enc.pp() | 0b100; // bit 2 is always set - p1 |= (!(vvvvv as u8) & 0b1111) << 3; - p1 |= (enc.rex_w() & 0b1) << 7; - sink.put1(p1); - - let mut p2 = masking.aaa_bits(); - p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3; - p2 |= context.bits() << 4; - p2 |= masking.z_bit() << 7; - sink.put1(p2); - - // Opcode - sink.put1(enc.opcode_byte()); - - // ModR/M byte placed in recipe -} - -/// Emit a ModR/M byte for reg-reg operands. -fn modrm_rr(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b11000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a ModR/M byte where the reg bits are part of the opcode. -fn modrm_r_bits(rm: RegUnit, bits: u16, sink: &mut CS) { - let reg = (bits >> 12) as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b11000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset. -/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an -/// absolute immediate 32-bit address. -fn modrm_rm(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b00000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address -/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual -/// section 2.2.1.6. -fn modrm_riprel(reg: RegUnit, sink: &mut CS) { - modrm_rm(0b101, reg, sink) -} - -/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit -/// displacement. -/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. -fn modrm_disp8(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b01000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit -/// displacement. -/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. -fn modrm_disp32(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b10000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present. -fn modrm_sib(reg: RegUnit, sink: &mut CS) { - modrm_rm(0b100, reg, sink); -} - -/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit -/// displacement are present. -fn modrm_sib_disp8(reg: RegUnit, sink: &mut CS) { - modrm_disp8(0b100, reg, sink); -} - -/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit -/// displacement are present. -fn modrm_sib_disp32(reg: RegUnit, sink: &mut CS) { - modrm_disp32(0b100, reg, sink); -} - -/// Emit a SIB byte with a base register and no scale+index. -fn sib_noindex(base: RegUnit, sink: &mut CS) { - let base = base as u8 & 7; - // SIB SS_III_BBB. - let mut b = 0b00_100_000; - b |= base; - sink.put1(b); -} - -/// Emit a SIB byte with a scale, base, and index. -fn sib(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) { - // SIB SS_III_BBB. - debug_assert_eq!(scale & !0x03, 0, "Scale out of range"); - let scale = scale & 3; - let index = index as u8 & 7; - let base = base as u8 & 7; - let b: u8 = (scale << 6) | (index << 3) | base; - sink.put1(b); -} - -/// Get the low 4 bits of an opcode for an integer condition code. -/// -/// Add this offset to a base opcode for: -/// -/// ---- 0x70: Short conditional branch. -/// 0x0f 0x80: Long conditional branch. -/// 0x0f 0x90: SetCC. -/// -fn icc2opc(cond: IntCC) -> u16 { - use crate::ir::condcodes::IntCC::*; - match cond { - Overflow => 0x0, - NotOverflow => 0x1, - UnsignedLessThan => 0x2, - UnsignedGreaterThanOrEqual => 0x3, - Equal => 0x4, - NotEqual => 0x5, - UnsignedLessThanOrEqual => 0x6, - UnsignedGreaterThan => 0x7, - // 0x8 = Sign. - // 0x9 = !Sign. - // 0xa = Parity even. - // 0xb = Parity odd. - SignedLessThan => 0xc, - SignedGreaterThanOrEqual => 0xd, - SignedLessThanOrEqual => 0xe, - SignedGreaterThan => 0xf, - } -} - -/// Get the low 4 bits of an opcode for a floating point condition code. -/// -/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: -/// -/// ZPC OSA -/// UN 111 000 -/// GT 000 000 -/// LT 001 000 -/// EQ 100 000 -/// -/// Not all floating point condition codes are supported. -fn fcc2opc(cond: FloatCC) -> u16 { - use crate::ir::condcodes::FloatCC::*; - match cond { - Ordered => 0xb, // EQ|LT|GT => *np (P=0) - Unordered => 0xa, // UN => *p (P=1) - OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0), - UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1) - GreaterThan => 0x7, // GT => *a (C=0&Z=0) - GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0) - UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1) - UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1) - Equal | // EQ - NotEqual | // UN|LT|GT - LessThan | // LT - LessThanOrEqual | // LT|EQ - UnorderedOrGreaterThan | // UN|GT - UnorderedOrGreaterThanOrEqual // UN|GT|EQ - => panic!("{} not supported", cond), - } -} - -/// Emit a single-byte branch displacement to `destination`. -fn disp1(destination: Block, func: &Function, sink: &mut CS) { - let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1); - sink.put1(delta as u8); -} - -/// Emit a four-byte branch displacement to `destination`. -fn disp4(destination: Block, func: &Function, sink: &mut CS) { - let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4); - sink.put4(delta); -} - -/// Emit a four-byte displacement to jump table `jt`. -fn jt_disp4(jt: JumpTable, func: &Function, sink: &mut CS) { - let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4); - sink.put4(delta); - sink.reloc_jt(Reloc::X86PCRelRodata4, jt); -} - -/// Emit a four-byte displacement to `constant`. -fn const_disp4(constant: Constant, func: &Function, sink: &mut CS) { - let offset = func.dfg.constants.get_offset(constant); - let delta = offset.wrapping_sub(sink.offset() + 4); - sink.put4(delta); - sink.reloc_constant(Reloc::X86PCRelRodata4, offset); -} diff --git a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs b/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs deleted file mode 100644 index 72890cffd9..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs +++ /dev/null @@ -1,1894 +0,0 @@ -//! Encoding tables for x86 ISAs. - -use super::registers::*; -use crate::bitset::BitSet; -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::condcodes::{FloatCC, IntCC}; -use crate::ir::types::*; -use crate::ir::{self, Function, Inst, InstBuilder, MemFlags}; -use crate::isa::constraints::*; -use crate::isa::enc_tables::*; -use crate::isa::encoding::base_size; -use crate::isa::encoding::{Encoding, RecipeSizing}; -use crate::isa::RegUnit; -use crate::isa::{self, TargetIsa}; -use crate::legalizer::expand_as_libcall; -use crate::predicates; -use crate::regalloc::RegDiversions; - -include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); -include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); - -/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB). -/// -/// Normal x86 instructions have only 3 bits for encoding a register. -/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits. -pub fn is_extended_reg(reg: RegUnit) -> bool { - // Extended registers have the fourth bit set. - reg as u8 & 0b1000 != 0 -} - -pub fn needs_sib_byte(reg: RegUnit) -> bool { - reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit -} -pub fn needs_offset(reg: RegUnit) -> bool { - reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit -} -pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool { - needs_sib_byte(reg) || needs_offset(reg) -} - -fn test_input( - op_index: usize, - inst: Inst, - divert: &RegDiversions, - func: &Function, - condition_func: fn(RegUnit) -> bool, -) -> bool { - let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); - condition_func(in_reg) -} - -fn test_result( - result_index: usize, - inst: Inst, - divert: &RegDiversions, - func: &Function, - condition_func: fn(RegUnit) -> bool, -) -> bool { - let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations); - condition_func(out_reg) -} - -fn size_plus_maybe_offset_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_offset = test_input(0, inst, divert, func, needs_offset); - sizing.base_size + if needs_offset { 1 } else { 0 } -} -fn size_plus_maybe_offset_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_offset = test_input(1, inst, divert, func, needs_offset); - sizing.base_size + if needs_offset { 1 } else { 0 } -} -fn size_plus_maybe_sib_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib = test_input(0, inst, divert, func, needs_sib_byte); - sizing.base_size + if needs_sib { 1 } else { 0 } -} -fn size_plus_maybe_sib_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib = test_input(1, inst, divert, func, needs_sib_byte); - sizing.base_size + if needs_sib { 1 } else { 0 } -} -fn size_plus_maybe_sib_or_offset_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset); - sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } -} -fn size_plus_maybe_sib_or_offset_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset); - sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) -/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset. -fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func) - + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) -/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB. -fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first input register (inreg0) and first output -/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a -/// SIB or offset. -fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func) - + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first input register (inreg0) and first output -/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a -/// SIB. -fn size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_for_inreg_0(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg. -/// -/// A REX prefix is known to be emitted if either: -/// 1. The EncodingBits specify that REX.W is to be set. -/// 2. Registers are used that require REX.R or REX.B bits for encoding. -fn size_with_inferred_rex_for_inreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on the second operand. -fn size_with_inferred_rex_for_inreg1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on the third operand. -fn size_with_inferred_rex_for_inreg2( - sizing: &RecipeSizing, - _: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers. -/// -/// A REX prefix is known to be emitted if either: -/// 1. The EncodingBits specify that REX.W is to be set. -/// 2. Registers are used that require REX.R or REX.B bits for encoding. -fn size_with_inferred_rex_for_inreg0_inreg1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand. -fn size_with_inferred_rex_for_inreg1_inreg2( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg) - || test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on a single -/// input register and a single output register. -fn size_with_inferred_rex_for_inreg0_outreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on a single output register. -fn size_with_inferred_rex_for_outreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_result(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV. -/// -/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2. -fn size_with_inferred_rex_for_cmov( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg) - || test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// If the value's definition is a constant immediate, returns its unpacked value, or None -/// otherwise. -fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option { - if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) { - if let ir::InstructionData::UnaryImm { - opcode: ir::Opcode::Iconst, - imm, - } = &pos.func.dfg[*inst] - { - let value: i64 = (*imm).into(); - Some(value) - } else { - None - } - } else { - None - } -} - -/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. -fn expand_sdivrem( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let (x, y, is_srem) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Sdiv, - args, - } => (args[0], args[1], false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Srem, - args, - } => (args[0], args[1], true), - _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)), - }; - - let old_block = func.layout.pp_block(inst); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.func.dfg.clear_results(inst); - - let avoid_div_traps = isa.flags().avoid_div_traps(); - - // If we can tolerate native division traps, sdiv doesn't need branching. - if !avoid_div_traps && !is_srem { - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - pos.ins().with_result(result).x86_sdivmodx(x, xhi, y); - pos.remove_inst(); - return; - } - - // Try to remove checks if the input value is an immediate other than 0 or -1. For these two - // immediates, we'd ideally replace conditional traps by traps, but this requires more - // manipulation of the dfg/cfg, which is out of scope here. - let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) { - (imm == 0, imm == -1) - } else { - (true, true) - }; - - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps && could_be_zero { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - - if !could_be_minus_one { - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let reuse = if is_srem { - [None, Some(result)] - } else { - [Some(result), None] - }; - pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y); - pos.remove_inst(); - return; - } - - // block handling the nominal case. - let nominal = pos.func.dfg.make_block(); - - // block handling the -1 divisor case. - let minus_one = pos.func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = pos.func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - pos.func.dfg.attach_block_param(done, result); - - // Start by checking for a -1 divisor which needs to be handled specially. - let is_m1 = pos.ins().ifcmp_imm(y, -1); - pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); - pos.ins().jump(nominal, &[]); - - // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division - // by zero. - pos.insert_block(nominal); - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y); - let divres = if is_srem { rem } else { quot }; - pos.ins().jump(done, &[divres]); - - // Now deal with the -1 divisor case. - pos.insert_block(minus_one); - let m1_result = if is_srem { - // x % -1 = 0. - pos.ins().iconst(ty, 0) - } else { - // Explicitly check for overflow: Trap when x == INT_MIN. - debug_assert!(avoid_div_traps, "Native trapping divide handled above"); - let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1)); - pos.ins() - .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow); - // x / -1 = -x. - pos.ins().irsub_imm(x, 0) - }; - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[m1_result]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, nominal); - cfg.recompute_block(pos.func, minus_one); - cfg.recompute_block(pos.func, done); -} - -/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`. -fn expand_udivrem( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let (x, y, is_urem) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Udiv, - args, - } => (args[0], args[1], false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Urem, - args, - } => (args[0], args[1], true), - _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)), - }; - let avoid_div_traps = isa.flags().avoid_div_traps(); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.func.dfg.clear_results(inst); - - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps { - let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) { - // Ideally, we'd just replace the conditional trap with a trap when the immediate is - // zero, but this requires more manipulation of the dfg/cfg, which is out of scope - // here. - imm == 0 - } else { - true - }; - if zero_check { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - } - - // Now it is safe to execute the `x86_udivmodx` instruction. - let xhi = pos.ins().iconst(ty, 0); - let reuse = if is_urem { - [None, Some(result)] - } else { - [Some(result), None] - }; - pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y); - pos.remove_inst(); -} - -/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax` -/// instructions. -fn expand_minmax( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmin, - args, - } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor), - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmax, - args, - } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band), - _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), - }; - let old_block = func.layout.pp_block(inst); - - // We need to handle the following conditions, depending on how x and y compare: - // - // 1. LT or GT: The native `x86_opc` min/max instruction does what we need. - // 2. EQ: We need to use `bitwise_opc` to make sure that - // fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0. - // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical. - - // block handling case 1) where operands are ordered but not equal. - let one_block = func.dfg.make_block(); - - // block handling case 3) where one operand is NaN. - let uno_block = func.dfg.make_block(); - - // block that handles the unordered or equal cases 2) and 3). - let ueq_block = func.dfg.make_block(); - - // block handling case 2) where operands are ordered and equal. - let eq_block = func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = func.dfg.make_block(); - - // The basic blocks are laid out to minimize branching for the common cases: - // - // 1) One branch not taken, one jump. - // 2) One branch taken. - // 3) Two branches taken, one jump. - - // Move the `inst` result value onto the `done` block. - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - // Test for case 1) ordered and not equal. - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y); - pos.ins().brnz(cmp_ueq, ueq_block, &[]); - pos.ins().jump(one_block, &[]); - - // Handle the common ordered, not equal (LT|GT) case. - pos.insert_block(one_block); - let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0; - let one_result = pos.func.dfg.first_result(one_inst); - pos.ins().jump(done, &[one_result]); - - // Case 3) Unordered. - // We know that at least one operand is a NaN that needs to be propagated. We simply use an - // `fadd` instruction which has the same NaN propagation semantics. - pos.insert_block(uno_block); - let uno_result = pos.ins().fadd(x, y); - pos.ins().jump(done, &[uno_result]); - - // Case 2) or 3). - pos.insert_block(ueq_block); - // Test for case 3) (UN) one value is NaN. - // TODO: When we get support for flag values, we can reuse the above comparison. - let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y); - pos.ins().brnz(cmp_uno, uno_block, &[]); - pos.ins().jump(eq_block, &[]); - - // We are now in case 2) where x and y compare EQ. - // We need a bitwise operation to get the sign right. - pos.insert_block(eq_block); - let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0; - let bw_result = pos.func.dfg.first_result(bw_inst); - // This should become a fall-through for this second most common case. - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[bw_result]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, one_block); - cfg.recompute_block(pos.func, uno_block); - cfg.recompute_block(pos.func, ueq_block); - cfg.recompute_block(pos.func, eq_block); - cfg.recompute_block(pos.func, done); -} - -/// This legalization converts a minimum/maximum operation into a sequence that matches the -/// non-x86-friendly WebAssembly semantics of NaN handling. This logic is kept separate from -/// [expand_minmax] above (the scalar version) for code clarity. -fn expand_minmax_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let ty = func.dfg.ctrl_typevar(inst); - debug_assert!(ty.is_vector()); - let (x, y, x86_opcode, is_max) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmin, - args, - } => (args[0], args[1], ir::Opcode::X86Fmin, false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmax, - args, - } => (args[0], args[1], ir::Opcode::X86Fmax, true), - _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // This sequence is complex due to how x86 handles NaNs and +0/-0. If x86 finds a NaN in - // either lane it returns the second operand; likewise, if both operands are in {+0.0, -0.0} - // it returns the second operand. To match the behavior of "return the minimum of the - // operands or a canonical NaN if either operand is NaN," we must compare in both - // directions. - let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y); - let forward = dfg.first_result(forward_inst); - let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x); - let backward = dfg.first_result(backward_inst); - - let (value, mask) = if is_max { - // For maximum: - // Find any differences between the forward and backward `max` operation. - let difference = pos.ins().bxor(forward, backward); - // Merge in the differences. - let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference); - let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference); - // Discover which lanes have NaNs in them. - let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value); - (value, find_nan_lanes_mask) - } else { - // For minimum: - // If either lane is a NaN, we want to use these bits, not the second operand bits. - let propagate_nans = pos.ins().bor(backward, forward); - // Find which lanes contain a NaN with an unordered comparison, filling the mask with - // 1s. - let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans); - let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask); - // Then flood the value lane with all 1s if that lane is a NaN. This causes all NaNs - // along this code path to be quieted and negative: after the upcoming shift and and_not, - // all upper bits (sign, exponent, and payload MSB) will be 1s. - let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask); - (tmp, bitcast_find_nan_lanes_mask) - }; - - // During this lowering we will need to know how many bits to shift by and what type to - // convert to when using an integer shift. Recall that an IEEE754 number looks like: - // `[sign bit] [exponent bits] [significand bits]` - // A quiet NaN has all exponent bits set to 1 and the most significant bit of the - // significand set to 1; a signaling NaN has the same exponent but the MSB of the - // significand is set to 0. The payload of the NaN is the remaining significand bits, and - // WebAssembly assumes a canonical NaN is quiet and has 0s in its payload. To compute this - // canonical NaN, we create a mask for the top 10 bits on F32X4 (1 sign + 8 exp. + 1 MSB - // sig.) and the top 13 bits on F64X2 (1 sign + 11 exp. + 1 MSB sig.). This means that all - // NaNs produced with the mask will be negative (`-NaN`) which is allowed by the sign - // non-determinism in the spec: https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0 - let (shift_by, ty_as_int) = match ty { - F32X4 => (10, I32X4), - F64X2 => (13, I64X2), - _ => unimplemented!("this legalization only understands 128-bit floating point types"), - }; - - // In order to clear the NaN payload for canonical NaNs, we shift right the NaN lanes (all - // 1s) leaving 0s in the top bits. Remember that non-NaN lanes are all 0s so this has - // little effect. - let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask); - let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by); - let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask); - - // Finally, we replace the value with `value & ~shift_mask`. For non-NaN lanes, this is - // equivalent to `... & 1111...` but for NaN lanes this will only have 1s in the top bits, - // clearing the payload. - pos.func - .dfg - .replace(inst) - .band_not(value, shift_mask_as_float); -} - -/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to -/// i64 with a pattern, the rest needs more code. -/// -/// Note that this is the scalar implementation; for the vector implemenation see -/// [expand_fcvt_from_uint_vector]. -fn expand_fcvt_from_uint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let x; - match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtFromUint, - arg, - } => x = arg, - _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)), - } - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Conversion from an unsigned int smaller than 64bit is easy on x86-64. - match xty { - ir::types::I8 | ir::types::I16 | ir::types::I32 => { - // TODO: This should be guarded by an ISA check. - let wide = pos.ins().uextend(ir::types::I64, x); - pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide); - return; - } - ir::types::I64 => {} - _ => unimplemented!(), - } - - let old_block = pos.func.layout.pp_block(inst); - - // block handling the case where x >= 0. - let poszero_block = pos.func.dfg.make_block(); - - // block handling the case where x < 0. - let neg_block = pos.func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = pos.func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - pos.func.dfg.clear_results(inst); - pos.func.dfg.attach_block_param(done, result); - - // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction. - let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0); - pos.ins().brnz(is_neg, neg_block, &[]); - pos.ins().jump(poszero_block, &[]); - - // Easy case: just use a signed conversion. - pos.insert_block(poszero_block); - let posres = pos.ins().fcvt_from_sint(ty, x); - pos.ins().jump(done, &[posres]); - - // Now handle the negative case. - pos.insert_block(neg_block); - - // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it - // back up on the FP side. - let ihalf = pos.ins().ushr_imm(x, 1); - let lsb = pos.ins().band_imm(x, 1); - let ifinal = pos.ins().bor(ihalf, lsb); - let fhalf = pos.ins().fcvt_from_sint(ty, ifinal); - let negres = pos.ins().fadd(fhalf, fhalf); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[negres]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, poszero_block); - cfg.recompute_block(pos.func, neg_block); - cfg.recompute_block(pos.func, done); -} - -/// To convert packed unsigned integers to their float equivalents, we must legalize to a special -/// AVX512 instruction (using MCSR rounding) or use a long sequence of instructions. This logic is -/// separate from [expand_fcvt_from_uint] above (the scalar version), only due to how the transform -/// groups are set up; TODO if we change the SIMD legalization groups, then this logic could be -/// merged into [expand_fcvt_from_uint] (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_from_uint_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtFromUint, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == F32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4); - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() { - // If we have certain AVX512 features, we can lower this instruction simply. - pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg); - } else { - // Otherwise, we default to a very lengthy SSE4.1-compatible sequence: PXOR, - // PBLENDW, PSUB, CVTDQ2PS, PSRLD, CVTDQ2PS, ADDPS, ADDPS - let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg); - let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into()); - let zero = pos.ins().vconst(I16X8, zero_constant); - let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55); - let bitcast_low = pos.ins().raw_bitcast(I32X4, low); - let high = pos.ins().isub(arg, bitcast_low); - let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low); - let shift_high = pos.ins().ushr_imm(high, 1); - let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high); - let double_high = pos.ins().fadd(convert_high, convert_high); - pos.func.dfg.replace(inst).fadd(double_high, convert_low); - } - } else { - unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) - } - } -} - -fn expand_fcvt_to_sint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSint, - arg, - } => arg, - _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)), - }; - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // block for checking failure cases. - let maybe_trap_block = func.dfg.make_block(); - - // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow. - // It produces an INT_MIN result instead. - func.dfg.replace(inst).x86_cvtt2si(ty, x); - - let mut pos = FuncCursor::new(func).after_inst(inst); - pos.use_srcloc(inst); - - let is_done = pos - .ins() - .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1)); - pos.ins().brnz(is_done, done, &[]); - pos.ins().jump(maybe_trap_block, &[]); - - // We now have the following possibilities: - // - // 1. INT_MIN was actually the correct conversion result. - // 2. The input was NaN -> trap bad_toint - // 3. The input was out of range -> trap int_ovf - // - pos.insert_block(maybe_trap_block); - - // Check for NaN. - let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); - pos.ins() - .trapnz(is_nan, ir::TrapCode::BadConversionToInteger); - - // Check for case 1: INT_MIN is the correct result. - // Determine the smallest floating point number that would convert to INT_MIN. - let mut overflow_cc = FloatCC::LessThan; - let output_bits = ty.lane_bits(); - let flimit = match xty { - ir::types::F32 => - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }) - } - ir::types::F64 => - // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f64const(if output_bits < 64 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee64::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee64::pow2(output_bits - 1).neg() - }) - } - _ => panic!("Can't convert {}", xty), - }; - let overflow = pos.ins().fcmp(overflow_cc, x, flimit); - pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); - - // Finally, we could have a positive value that is too large. - let fzero = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), - ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), - _ => panic!("Can't convert {}", xty), - }; - let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); - pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); - - pos.ins().jump(done, &[]); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, maybe_trap_block); - cfg.recompute_block(pos.func, done); -} - -fn expand_fcvt_to_sint_sat( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSintSat, - arg, - } => arg, - _ => panic!( - "Need fcvt_to_sint_sat: {}", - func.dfg.display_inst(inst, None) - ), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // Final block after the bad value checks. - let done_block = func.dfg.make_block(); - let intmin_block = func.dfg.make_block(); - let minsat_block = func.dfg.make_block(); - let maxsat_block = func.dfg.make_block(); - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done_block, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or - // overflow. It produces an INT_MIN result instead. - let cvtt2si = pos.ins().x86_cvtt2si(ty, x); - - let is_done = pos - .ins() - .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1)); - pos.ins().brnz(is_done, done_block, &[cvtt2si]); - pos.ins().jump(intmin_block, &[]); - - // We now have the following possibilities: - // - // 1. INT_MIN was actually the correct conversion result. - // 2. The input was NaN -> replace the result value with 0. - // 3. The input was out of range -> saturate the result to the min/max value. - pos.insert_block(intmin_block); - - // Check for NaN, which is truncated to 0. - let zero = pos.ins().iconst(ty, 0); - let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); - pos.ins().brnz(is_nan, done_block, &[zero]); - pos.ins().jump(minsat_block, &[]); - - // Check for case 1: INT_MIN is the correct result. - // Determine the smallest floating point number that would convert to INT_MIN. - pos.insert_block(minsat_block); - let mut overflow_cc = FloatCC::LessThan; - let output_bits = ty.lane_bits(); - let flimit = match xty { - ir::types::F32 => - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }) - } - ir::types::F64 => - // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f64const(if output_bits < 64 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee64::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee64::pow2(output_bits - 1).neg() - }) - } - _ => panic!("Can't convert {}", xty), - }; - - let overflow = pos.ins().fcmp(overflow_cc, x, flimit); - let min_imm = match ty { - ir::types::I32 => i32::min_value() as i64, - ir::types::I64 => i64::min_value(), - _ => panic!("Don't know the min value for {}", ty), - }; - let min_value = pos.ins().iconst(ty, min_imm); - pos.ins().brnz(overflow, done_block, &[min_value]); - pos.ins().jump(maxsat_block, &[]); - - // Finally, we could have a positive value that is too large. - pos.insert_block(maxsat_block); - let fzero = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), - ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), - _ => panic!("Can't convert {}", xty), - }; - - let max_imm = match ty { - ir::types::I32 => i32::max_value() as i64, - ir::types::I64 => i64::max_value(), - _ => panic!("Don't know the max value for {}", ty), - }; - let max_value = pos.ins().iconst(ty, max_imm); - - let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); - pos.ins().brnz(overflow, done_block, &[max_value]); - - // Recycle the original instruction. - pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done_block); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, intmin_block); - cfg.recompute_block(pos.func, minsat_block); - cfg.recompute_block(pos.func, maxsat_block); - cfg.recompute_block(pos.func, done_block); -} - -/// This legalization converts a vector of 32-bit floating point lanes to signed integer lanes -/// using CVTTPS2DQ (see encoding of `x86_cvtt2si`). This logic is separate from [expand_fcvt_to_sint_sat] -/// above (the scalar version), only due to how the transform groups are set up; TODO if we change -/// the SIMD legalization groups, then this logic could be merged into [expand_fcvt_to_sint_sat] -/// (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_to_sint_sat_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSintSat, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == I32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4); - // We must both quiet any NaNs--setting that lane to 0--and saturate any - // lanes that might overflow during conversion to the highest/lowest signed integer - // allowed in that lane. - - // Saturate NaNs: `fcmp eq` will not match if a lane contains a NaN. We use ANDPS to - // avoid doing the comparison twice (we need the zeroed lanes to find differences). - let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg); - let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans); - let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast); - - // Find differences with the zeroed lanes (we will only use the MSB: 1 if positive or - // NaN, 0 otherwise). - let differences = pos.ins().bxor(zeroed_nans_bitcast, arg); - let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences); - - // Convert the numeric lanes. CVTTPS2DQ will mark overflows with 0x80000000 (MSB set). - let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy); - - // Create a mask of all 1s only on positive overflow, 0s otherwise. This uses the MSB - // of `differences` (1 when positive or NaN) and the MSB of `converted` (1 on positive - // overflow). - let tmp = pos.ins().band(differences_bitcast, converted); - let mask = pos.ins().sshr_imm(tmp, 31); - - // Apply the mask to create 0x7FFFFFFF for positive overflow. XOR of all 0s (all other - // cases) has no effect. - pos.func.dfg.replace(inst).bxor(converted, mask); - } else { - unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) - } - } -} - -fn expand_fcvt_to_uint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUint, - arg, - } => arg, - _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // block handle numbers < 2^(N-1). - let below_uint_max_block = func.dfg.make_block(); - - // block handle numbers < 0. - let below_zero_block = func.dfg.make_block(); - - // block handling numbers >= 2^(N-1). - let large = func.dfg.make_block(); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in - // the destination integer type. - let pow2nm1 = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), - ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), - _ => panic!("Can't convert {}", xty), - }; - let is_large = pos.ins().ffcmp(x, pow2nm1); - pos.ins() - .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); - pos.ins().jump(below_uint_max_block, &[]); - - // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the - // previous comparison. - pos.insert_block(below_uint_max_block); - pos.ins().trapff( - FloatCC::Unordered, - is_large, - ir::TrapCode::BadConversionToInteger, - ); - - // Now we know that x < 2^(N-1) and not NaN. - let sres = pos.ins().x86_cvtt2si(ty, x); - let is_neg = pos.ins().ifcmp_imm(sres, 0); - pos.ins() - .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); - pos.ins().jump(below_zero_block, &[]); - - pos.insert_block(below_zero_block); - pos.ins().trap(ir::TrapCode::IntegerOverflow); - - // Handle the case where x >= 2^(N-1) and not NaN. - pos.insert_block(large); - let adjx = pos.ins().fsub(x, pow2nm1); - let lres = pos.ins().x86_cvtt2si(ty, adjx); - let is_neg = pos.ins().ifcmp_imm(lres, 0); - pos.ins() - .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow); - let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[lfinal]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, below_uint_max_block); - cfg.recompute_block(pos.func, below_zero_block); - cfg.recompute_block(pos.func, large); - cfg.recompute_block(pos.func, done); -} - -fn expand_fcvt_to_uint_sat( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUintSat, - arg, - } => arg, - _ => panic!( - "Need fcvt_to_uint_sat: {}", - func.dfg.display_inst(inst, None) - ), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // block handle numbers < 2^(N-1). - let below_pow2nm1_or_nan_block = func.dfg.make_block(); - let below_pow2nm1_block = func.dfg.make_block(); - - // block handling numbers >= 2^(N-1). - let large = func.dfg.make_block(); - - // block handling numbers < 2^N. - let uint_large_block = func.dfg.make_block(); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in - // the destination integer type. - let pow2nm1 = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), - ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), - _ => panic!("Can't convert {}", xty), - }; - let zero = pos.ins().iconst(ty, 0); - let is_large = pos.ins().ffcmp(x, pow2nm1); - pos.ins() - .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); - pos.ins().jump(below_pow2nm1_or_nan_block, &[]); - - // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison. - pos.insert_block(below_pow2nm1_or_nan_block); - pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]); - pos.ins().jump(below_pow2nm1_block, &[]); - - // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're - // done; otherwise saturate to the minimum unsigned value, that is 0. - pos.insert_block(below_pow2nm1_block); - let sres = pos.ins().x86_cvtt2si(ty, x); - let is_neg = pos.ins().ifcmp_imm(sres, 0); - pos.ins() - .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); - pos.ins().jump(done, &[zero]); - - // Handle the case where x >= 2^(N-1) and not NaN. - pos.insert_block(large); - let adjx = pos.ins().fsub(x, pow2nm1); - let lres = pos.ins().x86_cvtt2si(ty, adjx); - let max_value = pos.ins().iconst( - ty, - match ty { - ir::types::I32 => u32::max_value() as i64, - ir::types::I64 => u64::max_value() as i64, - _ => panic!("Can't convert {}", ty), - }, - ); - let is_neg = pos.ins().ifcmp_imm(lres, 0); - pos.ins() - .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]); - pos.ins().jump(uint_large_block, &[]); - - pos.insert_block(uint_large_block); - let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[lfinal]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block); - cfg.recompute_block(pos.func, below_pow2nm1_block); - cfg.recompute_block(pos.func, large); - cfg.recompute_block(pos.func, uint_large_block); - cfg.recompute_block(pos.func, done); -} - -// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4. -static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [ - 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, -]; - -/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes -/// using a long sequence of NaN quieting and truncation. This logic is separate from -/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are -/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into -/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_to_uint_sat_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUintSat, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == I32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4); - // We must both quiet any NaNs--setting that lane to 0--and saturate any - // lanes that might overflow during conversion to the highest/lowest integer - // allowed in that lane. - let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into()); - let max_signed_constant = pos - .func - .dfg - .constants - .insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into()); - let zeroes = pos.ins().vconst(F32X4, zeroes_constant); - let max_signed = pos.ins().vconst(F32X4, max_signed_constant); - // Clamp the input to 0 for negative floating point numbers. TODO we need to - // convert NaNs to 0 but this doesn't do that? - let ge_zero = pos.ins().x86_fmax(arg, zeroes); - // Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert. - // For floating point numbers above this, CVTTPS2DQ returns the undefined value - // 0x80000000. - let minus_max_signed = pos.ins().fsub(ge_zero, max_signed); - let le_max_signed = - pos.ins() - .fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed); - // Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0. - // These lanes have the MSB set to 1 after the XOR. We are trying to calculate a - // valid, in-range addend. - let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed); - let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed); - let difference = pos - .ins() - .bxor(minus_max_signed_as_int, le_max_signed_as_int); - // Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified - // previously (MSB set to 1). - let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes); - let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int); - // Convert the original clamped number to an integer and add back in the addend - // (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these). - let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero); - pos.func.dfg.replace(inst).iadd(converted, addend); - } else { - unreachable!( - "{} should not be legalized in expand_fcvt_to_uint_sat_vector", - pos.func.dfg.display_inst(inst, None) - ) - } - } -} - -/// Convert shuffle instructions. -fn convert_shuffle( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] { - // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1 - // in the most significant position zeroes the lane. - let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b }; - - // We only have to worry about aliasing here because copies will be introduced later (in - // regalloc). - let a = pos.func.dfg.resolve_aliases(args[0]); - let b = pos.func.dfg.resolve_aliases(args[1]); - let mask = pos - .func - .dfg - .immediates - .get(mask) - .expect("The shuffle immediate should have been recorded before this point") - .clone(); - if a == b { - // PSHUFB the first argument (since it is the same as the second). - let constructed_mask = mask - .iter() - // If the mask is greater than 15 it still may be referring to a lane in b. - .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b }) - .map(zero_unknown_lane_index) - .collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let a_type = pos.func.dfg.value_type(a); - let mask_value = pos.ins().vconst(a_type, handle); - // Shuffle the single incoming argument. - pos.func.dfg.replace(inst).x86_pshufb(a, mask_value); - } else { - // PSHUFB the first argument, placing zeroes for unused lanes. - let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let a_type = pos.func.dfg.value_type(a); - let mask_value = pos.ins().vconst(a_type, handle); - // Shuffle the first argument. - let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value); - - // PSHUFB the second argument, placing zeroes for unused lanes. - let constructed_mask = mask - .iter() - .map(|b| b.wrapping_sub(16)) - .map(zero_unknown_lane_index) - .collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let b_type = pos.func.dfg.value_type(b); - let mask_value = pos.ins().vconst(b_type, handle); - // Shuffle the second argument. - let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value); - - // OR the vectors together to form the final shuffled value. - pos.func - .dfg - .replace(inst) - .bor(shuffled_first_arg, shuffled_second_arg); - - // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB - }; - } -} - -/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF -/// extractlane instruction -fn convert_extractlane( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::BinaryImm8 { - opcode: ir::Opcode::Extractlane, - arg, - imm: lane, - } = pos.func.dfg[inst] - { - // NOTE: the following legalization assumes that the upper bits of the XMM register do - // not need to be zeroed during extractlane. - let value_type = pos.func.dfg.value_type(arg); - if value_type.lane_type().is_float() { - // Floats are already in XMM registers and can stay there. - let shuffled = if lane != 0 { - // Replace the extractlane with a PSHUFD to get the float in the right place. - match value_type { - F32X4 => { - // Move the selected lane to the 0 lane. - let shuffle_mask: u8 = 0b00_00_00_00 | lane; - pos.ins().x86_pshufd(arg, shuffle_mask) - } - F64X2 => { - assert_eq!(lane, 1); - // Because we know the lane == 1, we move the upper 64 bits to the lower - // 64 bits, leaving the top 64 bits as-is. - let shuffle_mask = 0b11_10_11_10; - let bitcast = pos.ins().raw_bitcast(F32X4, arg); - pos.ins().x86_pshufd(bitcast, shuffle_mask) - } - _ => unreachable!(), - } - } else { - // Remove the extractlane instruction, leaving the float where it is. - arg - }; - // Then we must bitcast to the right type. - pos.func - .dfg - .replace(inst) - .raw_bitcast(value_type.lane_type(), shuffled); - } else { - // For non-floats, lower with the usual PEXTR* instruction. - pos.func.dfg.replace(inst).x86_pextr(arg, lane); - } - } -} - -/// Because floats exist in XMM registers, we can keep them there when executing a CLIF -/// insertlane instruction -fn convert_insertlane( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::TernaryImm8 { - opcode: ir::Opcode::Insertlane, - args: [vector, replacement], - imm: lane, - } = pos.func.dfg[inst] - { - let value_type = pos.func.dfg.value_type(vector); - if value_type.lane_type().is_float() { - // Floats are already in XMM registers and can stay there. - match value_type { - F32X4 => { - assert!(lane <= 3); - let immediate = 0b00_00_00_00 | lane << 4; - // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane - // shifted into bits 5:6). - pos.func - .dfg - .replace(inst) - .x86_insertps(vector, replacement, immediate) - } - F64X2 => { - let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types - if lane == 0 { - // Move the lowest quadword in replacement to vector without changing - // the upper bits. - pos.func - .dfg - .replace(inst) - .x86_movsd(vector, replacement_as_vector) - } else { - assert_eq!(lane, 1); - // Move the low 64 bits of replacement vector to the high 64 bits of the - // vector. - pos.func - .dfg - .replace(inst) - .x86_movlhps(vector, replacement_as_vector) - } - } - _ => unreachable!(), - }; - } else { - // For non-floats, lower with the usual PINSR* instruction. - pos.func - .dfg - .replace(inst) - .x86_pinsr(vector, replacement, lane); - } - } -} - -/// For SIMD or scalar integer negation, convert `ineg` to `vconst + isub` or `iconst + isub`. -fn convert_ineg( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::Ineg, - arg, - } = pos.func.dfg[inst] - { - let value_type = pos.func.dfg.value_type(arg); - let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() { - let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into()); - pos.ins().vconst(value_type, zero_immediate) // this should be legalized to a PXOR - } else if value_type.is_int() { - pos.ins().iconst(value_type, 0) - } else { - panic!("Can't convert ineg of type {}", value_type) - }; - pos.func.dfg.replace(inst).isub(zero_value, arg); - } else { - unreachable!() - } -} - -fn expand_dword_to_xmm<'f>( - pos: &mut FuncCursor<'_>, - arg: ir::Value, - arg_type: ir::Type, -) -> ir::Value { - if arg_type == I64 { - let (arg_lo, arg_hi) = pos.ins().isplit(arg); - let arg = pos.ins().scalar_to_vector(I32X4, arg_lo); - let arg = pos.ins().insertlane(arg, arg_hi, 1); - let arg = pos.ins().raw_bitcast(I64X2, arg); - arg - } else { - pos.ins().bitcast(I64X2, arg) - } -} - -fn contract_dword_from_xmm<'f>( - pos: &mut FuncCursor<'f>, - inst: ir::Inst, - ret: ir::Value, - ret_type: ir::Type, -) { - if ret_type == I64 { - let ret = pos.ins().raw_bitcast(I32X4, ret); - let ret_lo = pos.ins().extractlane(ret, 0); - let ret_hi = pos.ins().extractlane(ret, 1); - pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi); - } else { - let ret = pos.ins().extractlane(ret, 0); - pos.func.dfg.replace(inst).ireduce(ret_type, ret); - } -} - -// Masks for i8x16 unsigned right shift. -static USHR_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, - 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -]; - -// Convert a vector unsigned right shift. x86 has implementations for i16x8 and up (see `x86_pslr`), -// but for i8x16 we translate the shift to a i16x8 shift and mask off the upper bits. This same -// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex` -// has a varargs field that we can't modify with the CDSL in legalize.rs. -fn convert_ushr( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Ushr, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - // Note that for Wasm, the bounding of the shift index has happened during translation - let arg0_type = pos.func.dfg.value_type(arg0); - let arg1_type = pos.func.dfg.value_type(arg1); - assert!(!arg1_type.is_vector() && arg1_type.is_int()); - - // TODO it may be more clear to use scalar_to_vector here; the current issue is that - // scalar_to_vector has the restriction that the vector produced has a matching lane size - // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 -> - // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift - // index type--this could be relaxed since it is not really meaningful. - let shift_index = pos.ins().bitcast(I64X2, arg1); - - if arg0_type == I8X16 { - // First, shift the vector using an I16X8 shift. - let bitcasted = pos.ins().raw_bitcast(I16X8, arg0); - let shifted = pos.ins().x86_psrl(bitcasted, shift_index); - let shifted = pos.ins().raw_bitcast(I8X16, shifted); - - // Then, fixup the even lanes that have incorrect upper bits. This uses the 128 mask - // bytes as a table that we index into. It is a substantial code-size increase but - // reduces the instruction count slightly. - let masks = pos.func.dfg.constants.insert(USHR_MASKS.as_ref().into()); - let mask_address = pos.ins().const_addr(isa.pointer_type(), masks); - let mask_offset = pos.ins().ishl_imm(arg1, 4); - let mask = - pos.ins() - .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0); - pos.func.dfg.replace(inst).band(shifted, mask); - } else if arg0_type.is_vector() { - // x86 has encodings for these shifts. - pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index); - } else if arg0_type == I64 { - // 64 bit shifts need to be legalized on x86_32. - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.has_sse41() { - // if we have pinstrq/pextrq (SSE 4.1), legalize to that - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psrl(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); - } else { - // otherwise legalize to libcall - expand_as_libcall(inst, func, isa); - } - } else { - // Everything else should be already legal. - unreachable!() - } - } -} - -// Masks for i8x16 left shift. -static SHL_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, - 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, - 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, - 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, - 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, -]; - -// Convert a vector left shift. x86 has implementations for i16x8 and up (see `x86_psll`), -// but for i8x16 we translate the shift to a i16x8 shift and mask off the lower bits. This same -// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex` -// has a varargs field that we can't modify with the CDSL in legalize.rs. -fn convert_ishl( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Ishl, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - // Note that for Wasm, the bounding of the shift index has happened during translation - let arg0_type = pos.func.dfg.value_type(arg0); - let arg1_type = pos.func.dfg.value_type(arg1); - assert!(!arg1_type.is_vector() && arg1_type.is_int()); - - // TODO it may be more clear to use scalar_to_vector here; the current issue is that - // scalar_to_vector has the restriction that the vector produced has a matching lane size - // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 -> - // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift - // index type--this could be relaxed since it is not really meaningful. - let shift_index = pos.ins().bitcast(I64X2, arg1); - - if arg0_type == I8X16 { - // First, shift the vector using an I16X8 shift. - let bitcasted = pos.ins().raw_bitcast(I16X8, arg0); - let shifted = pos.ins().x86_psll(bitcasted, shift_index); - let shifted = pos.ins().raw_bitcast(I8X16, shifted); - - // Then, fixup the even lanes that have incorrect lower bits. This uses the 128 mask - // bytes as a table that we index into. It is a substantial code-size increase but - // reduces the instruction count slightly. - let masks = pos.func.dfg.constants.insert(SHL_MASKS.as_ref().into()); - let mask_address = pos.ins().const_addr(isa.pointer_type(), masks); - let mask_offset = pos.ins().ishl_imm(arg1, 4); - let mask = - pos.ins() - .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0); - pos.func.dfg.replace(inst).band(shifted, mask); - } else if arg0_type.is_vector() { - // x86 has encodings for these shifts. - pos.func.dfg.replace(inst).x86_psll(arg0, shift_index); - } else if arg0_type == I64 { - // 64 bit shifts need to be legalized on x86_32. - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.has_sse41() { - // if we have pinstrq/pextrq (SSE 4.1), legalize to that - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psll(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); - } else { - // otherwise legalize to libcall - expand_as_libcall(inst, func, isa); - } - } else { - // Everything else should be already legal. - unreachable!() - } - } -} - -/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2. -fn convert_i64x2_imul( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Imul, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - let ty = pos.func.dfg.ctrl_typevar(inst); - if ty == I64X2 { - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() { - // If we have certain AVX512 features, we can lower this instruction simply. - pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1); - } else { - // Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each - // 64-bit lane into 32-bit high and low sections using shifting and then performs - // the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 = - // concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1). - let high0 = pos.ins().ushr_imm(arg0, 32); - let mul0 = pos.ins().x86_pmuludq(high0, arg1); - let high1 = pos.ins().ushr_imm(arg1, 32); - let mul1 = pos.ins().x86_pmuludq(high1, arg0); - let addhigh = pos.ins().iadd(mul0, mul1); - let high = pos.ins().ishl_imm(addhigh, 32); - let low = pos.ins().x86_pmuludq(arg0, arg1); - pos.func.dfg.replace(inst).iadd(low, high); - } - } else { - unreachable!( - "{} should be encodable; it cannot be legalized by convert_i64x2_imul", - pos.func.dfg.display_inst(inst, None) - ); - } - } -} - -fn expand_tls_value( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - use crate::settings::TlsModel; - - assert!( - isa.triple().architecture == target_lexicon::Architecture::X86_64, - "Not yet implemented for {:?}", - isa.triple(), - ); - - if let ir::InstructionData::UnaryGlobalValue { - opcode: ir::Opcode::TlsValue, - global_value, - } = func.dfg[inst] - { - let ctrl_typevar = func.dfg.ctrl_typevar(inst); - assert_eq!(ctrl_typevar, ir::types::I64); - - match isa.flags().tls_model() { - TlsModel::None => panic!("tls_model flag is not set."), - TlsModel::ElfGd => { - func.dfg.replace(inst).x86_elf_tls_get_addr(global_value); - } - TlsModel::Macho => { - func.dfg.replace(inst).x86_macho_tls_get_addr(global_value); - } - model => unimplemented!("tls_value for tls model {:?}", model), - } - } else { - unreachable!(); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/mod.rs b/cranelift/codegen/src/isa/legacy/x86/mod.rs deleted file mode 100644 index e61fda1931..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/mod.rs +++ /dev/null @@ -1,199 +0,0 @@ -//! x86 Instruction Set Architectures. - -mod abi; -mod binemit; -mod enc_tables; -mod registers; -pub mod settings; -#[cfg(feature = "unwind")] -pub mod unwind; - -use super::super::settings as shared_settings; -#[cfg(feature = "testing_hooks")] -use crate::binemit::CodeSink; -use crate::binemit::{emit_function, MemoryCodeSink}; -use crate::ir; -use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; -use crate::isa::Builder as IsaBuilder; -#[cfg(feature = "unwind")] -use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit}; -use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; -use crate::regalloc; -use crate::result::CodegenResult; -use crate::timing; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; -use core::any::Any; -use core::fmt; -use core::hash::{Hash, Hasher}; -use target_lexicon::{PointerWidth, Triple}; - -#[allow(dead_code)] -struct Isa { - triple: Triple, - shared_flags: shared_settings::Flags, - isa_flags: settings::Flags, - cpumode: &'static [shared_enc_tables::Level1Entry], -} - -/// Get an ISA builder for creating x86 targets. -pub fn isa_builder(triple: Triple) -> IsaBuilder { - IsaBuilder { - triple, - setup: settings::builder(), - constructor: isa_constructor, - } -} - -fn isa_constructor( - triple: Triple, - shared_flags: shared_settings::Flags, - builder: shared_settings::Builder, -) -> Box { - let level1 = match triple.pointer_width().unwrap() { - PointerWidth::U16 => unimplemented!("x86-16"), - PointerWidth::U32 => &enc_tables::LEVEL1_I32[..], - PointerWidth::U64 => &enc_tables::LEVEL1_I64[..], - }; - - let isa_flags = settings::Flags::new(&shared_flags, builder); - - Box::new(Isa { - triple, - isa_flags, - shared_flags, - cpumode: level1, - }) -} - -impl TargetIsa for Isa { - fn name(&self) -> &'static str { - "x86" - } - - fn triple(&self) -> &Triple { - &self.triple - } - - fn flags(&self) -> &shared_settings::Flags { - &self.shared_flags - } - - fn isa_flags(&self) -> Vec { - self.isa_flags.iter().collect() - } - - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.shared_flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - - fn uses_cpu_flags(&self) -> bool { - true - } - - fn uses_complex_addresses(&self) -> bool { - true - } - - fn register_info(&self) -> RegInfo { - registers::INFO.clone() - } - - #[cfg(feature = "unwind")] - fn map_dwarf_register(&self, reg: RegUnit) -> Result { - unwind::systemv::map_reg(self, reg).map(|r| r.0) - } - - fn encoding_info(&self) -> EncInfo { - enc_tables::INFO.clone() - } - - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - lookup_enclist( - ctrl_typevar, - inst, - func, - self.cpumode, - &enc_tables::LEVEL2[..], - &enc_tables::ENCLISTS[..], - &enc_tables::LEGALIZE_ACTIONS[..], - &enc_tables::RECIPE_PREDICATES[..], - &enc_tables::INST_PREDICATES[..], - self.isa_flags.predicate_view(), - ) - } - - fn legalize_signature(&self, sig: &mut Cow, current: bool) { - abi::legalize_signature( - sig, - &self.triple, - current, - &self.shared_flags, - &self.isa_flags, - ) - } - - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { - abi::regclass_for_abi_type(ty) - } - - fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet { - abi::allocatable_registers(&self.triple, &self.shared_flags) - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn CodeSink, - ) { - binemit::emit_inst(func, inst, divert, sink, self) - } - - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { - emit_function(func, binemit::emit_inst, sink, self) - } - - fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { - let _tt = timing::prologue_epilogue(); - abi::prologue_epilogue(func, self) - } - - fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { - ir::condcodes::IntCC::UnsignedLessThan - } - - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { - ir::condcodes::IntCC::UnsignedLessThan - } - - #[cfg(feature = "unwind")] - fn create_unwind_info( - &self, - func: &ir::Function, - ) -> CodegenResult> { - abi::create_unwind_info(func, self) - } - - #[cfg(feature = "unwind")] - fn create_systemv_cie(&self) -> Option { - Some(unwind::systemv::create_cie()) - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } -} - -impl fmt::Display for Isa { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}\n{}", self.shared_flags, self.isa_flags) - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/registers.rs b/cranelift/codegen/src/isa/legacy/x86/registers.rs deleted file mode 100644 index a7518b268b..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/registers.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! x86 register descriptions. - -use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; - -include!(concat!(env!("OUT_DIR"), "/registers-x86.rs")); - -#[cfg(test)] -mod tests { - use super::*; - use crate::isa::RegUnit; - use alloc::string::{String, ToString}; - - #[test] - fn unit_encodings() { - fn gpr(unit: usize) -> Option { - Some(GPR.unit(unit)) - } - // The encoding of integer registers is not alphabetical. - assert_eq!(INFO.parse_regunit("rax"), gpr(0)); - assert_eq!(INFO.parse_regunit("rbx"), gpr(3)); - assert_eq!(INFO.parse_regunit("rcx"), gpr(1)); - assert_eq!(INFO.parse_regunit("rdx"), gpr(2)); - assert_eq!(INFO.parse_regunit("rsi"), gpr(6)); - assert_eq!(INFO.parse_regunit("rdi"), gpr(7)); - assert_eq!(INFO.parse_regunit("rbp"), gpr(5)); - assert_eq!(INFO.parse_regunit("rsp"), gpr(4)); - assert_eq!(INFO.parse_regunit("r8"), gpr(8)); - assert_eq!(INFO.parse_regunit("r15"), gpr(15)); - - fn fpr(unit: usize) -> Option { - Some(FPR.unit(unit)) - } - assert_eq!(INFO.parse_regunit("xmm0"), fpr(0)); - assert_eq!(INFO.parse_regunit("xmm15"), fpr(15)); - - // FIXME(#1306) Add these tests back in when FPR32 is re-added. - // fn fpr32(unit: usize) -> Option { - // Some(FPR32.unit(unit)) - // } - // assert_eq!(INFO.parse_regunit("xmm0"), fpr32(0)); - // assert_eq!(INFO.parse_regunit("xmm31"), fpr32(31)); - } - - #[test] - fn unit_names() { - fn gpr(ru: RegUnit) -> String { - INFO.display_regunit(GPR.first + ru).to_string() - } - assert_eq!(gpr(0), "%rax"); - assert_eq!(gpr(3), "%rbx"); - assert_eq!(gpr(1), "%rcx"); - assert_eq!(gpr(2), "%rdx"); - assert_eq!(gpr(6), "%rsi"); - assert_eq!(gpr(7), "%rdi"); - assert_eq!(gpr(5), "%rbp"); - assert_eq!(gpr(4), "%rsp"); - assert_eq!(gpr(8), "%r8"); - assert_eq!(gpr(15), "%r15"); - - fn fpr(ru: RegUnit) -> String { - INFO.display_regunit(FPR.first + ru).to_string() - } - assert_eq!(fpr(0), "%xmm0"); - assert_eq!(fpr(15), "%xmm15"); - - // FIXME(#1306) Add these tests back in when FPR32 is re-added. - // fn fpr32(ru: RegUnit) -> String { - // INFO.display_regunit(FPR32.first + ru).to_string() - // } - // assert_eq!(fpr32(0), "%xmm0"); - // assert_eq!(fpr32(31), "%xmm31"); - } - - #[test] - fn regclasses() { - assert_eq!(GPR.intersect_index(GPR), Some(GPR.into())); - assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into())); - assert_eq!(GPR.intersect_index(FPR), None); - assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into())); - assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into())); - assert_eq!(ABCD.intersect_index(FPR), None); - assert_eq!(FPR.intersect_index(FPR), Some(FPR.into())); - assert_eq!(FPR.intersect_index(GPR), None); - assert_eq!(FPR.intersect_index(ABCD), None); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/settings.rs b/cranelift/codegen/src/isa/legacy/x86/settings.rs deleted file mode 100644 index f13431c1a2..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/settings.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! x86 Settings. - -use crate::settings::{self, detail, Builder, Value}; -use core::fmt; - -// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a -// public `Flags` struct with an impl for all of the settings defined in -// `cranelift-codegen/meta/src/isa/x86/settings.rs`. -include!(concat!(env!("OUT_DIR"), "/settings-x86.rs")); - -#[cfg(test)] -mod tests { - use super::{builder, Flags}; - use crate::settings::{self, Configurable}; - - #[test] - fn presets() { - let shared = settings::Flags::new(settings::builder()); - - // Nehalem has SSE4.1 but not BMI1. - let mut b0 = builder(); - b0.enable("nehalem").unwrap(); - let f0 = Flags::new(&shared, b0); - assert_eq!(f0.has_sse41(), true); - assert_eq!(f0.has_bmi1(), false); - - let mut b1 = builder(); - b1.enable("haswell").unwrap(); - let f1 = Flags::new(&shared, b1); - assert_eq!(f1.has_sse41(), true); - assert_eq!(f1.has_bmi1(), true); - } - #[test] - fn display_presets() { - // Spot check that the flags Display impl does not cause a panic - let shared = settings::Flags::new(settings::builder()); - - let b0 = builder(); - let f0 = Flags::new(&shared, b0); - let _ = format!("{}", f0); - - let mut b1 = builder(); - b1.enable("nehalem").unwrap(); - let f1 = Flags::new(&shared, b1); - let _ = format!("{}", f1); - - let mut b2 = builder(); - b2.enable("haswell").unwrap(); - let f2 = Flags::new(&shared, b2); - let _ = format!("{}", f2); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind.rs b/cranelift/codegen/src/isa/legacy/x86/unwind.rs deleted file mode 100644 index 2eed8b74e4..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind.rs +++ /dev/null @@ -1,531 +0,0 @@ -//! Module for x86 unwind generation for supported ABIs. - -pub mod systemv; -pub mod winx64; - -use crate::ir::{Function, InstructionData, Opcode, ValueLoc}; -use crate::isa::x86::registers::{FPR, RU}; -use crate::isa::{RegUnit, TargetIsa}; -use crate::result::CodegenResult; -use alloc::vec::Vec; -use std::collections::HashMap; - -use crate::isa::unwind::input::{UnwindCode, UnwindInfo}; - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult>> { - // Find last block based on max offset. - let last_block = func - .layout - .blocks() - .max_by_key(|b| func.offsets[*b]) - .expect("at least a block"); - // Find last instruction offset + size, and make it function size. - let function_size = func - .inst_offsets(last_block, &isa.encoding_info()) - .fold(0, |_, (offset, _, size)| offset + size); - - let entry_block = func.layout.entry_block().expect("missing entry block"); - let prologue_end = func.prologue_end.unwrap(); - let epilogues_start = func - .epilogues_start - .iter() - .map(|(i, b)| (*b, *i)) - .collect::>(); - - let word_size = isa.pointer_bytes(); - - let mut stack_size = None; - let mut prologue_size = 0; - let mut prologue_unwind_codes = Vec::new(); - let mut epilogues_unwind_codes = Vec::new(); - let mut frame_register: Option = None; - - // Process only entry block and blocks with epilogues. - let mut blocks = func - .epilogues_start - .iter() - .map(|(_, b)| *b) - .collect::>(); - if !blocks.contains(&entry_block) { - blocks.push(entry_block); - } - blocks.sort_by_key(|b| func.offsets[*b]); - - for block in blocks.iter() { - let mut in_prologue = block == &entry_block; - let mut in_epilogue = false; - let mut epilogue_pop_offsets = Vec::new(); - - let epilogue_start = epilogues_start.get(block); - let is_last_block = block == &last_block; - - for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) { - let offset = offset + size; - - let unwind_codes; - if in_prologue { - // Check for prologue end (inclusive) - if prologue_end == inst { - in_prologue = false; - } - prologue_size += size; - unwind_codes = &mut prologue_unwind_codes; - } else if !in_epilogue && epilogue_start == Some(&inst) { - // Now in an epilogue, emit a remember state instruction if not last block - in_epilogue = true; - - epilogues_unwind_codes.push(Vec::new()); - unwind_codes = epilogues_unwind_codes.last_mut().unwrap(); - - if !is_last_block { - unwind_codes.push((offset, UnwindCode::RememberState)); - } - } else if in_epilogue { - unwind_codes = epilogues_unwind_codes.last_mut().unwrap(); - } else { - // Ignore normal instructions - continue; - } - - match func.dfg[inst] { - InstructionData::Unary { opcode, arg } => { - match opcode { - Opcode::X86Push => { - let reg = func.locations[arg].unwrap_reg(); - unwind_codes.push(( - offset, - UnwindCode::StackAlloc { - size: word_size.into(), - }, - )); - unwind_codes.push(( - offset, - UnwindCode::SaveRegister { - reg, - stack_offset: 0, - }, - )); - } - Opcode::AdjustSpDown => { - let stack_size = - stack_size.expect("expected a previous stack size instruction"); - - // This is used when calling a stack check function - // We need to track the assignment to RAX which has the size of the stack - unwind_codes - .push((offset, UnwindCode::StackAlloc { size: stack_size })); - } - _ => {} - } - } - InstructionData::UnaryImm { opcode, imm } => { - match opcode { - Opcode::Iconst => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - assert!(stack_size.is_none()); - - // This instruction should only appear in a prologue to pass an - // argument of the stack size to a stack check function. - // Record the stack size so we know what it is when we encounter the adjustment - // instruction (which will adjust via the register assigned to this instruction). - stack_size = Some(imm as u32); - } - Opcode::AdjustSpDownImm => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - - stack_size = Some(imm as u32); - - unwind_codes - .push((offset, UnwindCode::StackAlloc { size: imm as u32 })); - } - Opcode::AdjustSpUpImm => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - - stack_size = Some(imm as u32); - - unwind_codes - .push((offset, UnwindCode::StackDealloc { size: imm as u32 })); - } - _ => {} - } - } - InstructionData::Store { - opcode: Opcode::Store, - args: [arg1, arg2], - offset: stack_offset, - .. - } => { - if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) = - (func.locations[arg1], func.locations[arg2]) - { - // If this is a save of an FPR, record an unwind operation - // Note: the stack_offset here is relative to an adjusted SP - if dst == (RU::rsp as RegUnit) && FPR.contains(src) { - let stack_offset: i32 = stack_offset.into(); - unwind_codes.push(( - offset, - UnwindCode::SaveRegister { - reg: src, - stack_offset: stack_offset as u32, - }, - )); - } - } - } - InstructionData::CopySpecial { src, dst, .. } if frame_register.is_none() => { - // Check for change in CFA register (RSP is always the starting CFA) - if src == (RU::rsp as RegUnit) { - unwind_codes.push((offset, UnwindCode::SetFramePointer { reg: dst })); - frame_register = Some(dst); - } - } - InstructionData::NullAry { opcode } => match opcode { - Opcode::X86Pop => { - epilogue_pop_offsets.push(offset); - } - _ => {} - }, - InstructionData::MultiAry { opcode, .. } if in_epilogue => match opcode { - Opcode::Return => { - let args = func.dfg.inst_args(inst); - for (i, arg) in args.iter().rev().enumerate() { - // Only walk back the args for the pop instructions encountered - if i >= epilogue_pop_offsets.len() { - break; - } - - let offset = epilogue_pop_offsets[i]; - - let reg = func.locations[*arg].unwrap_reg(); - unwind_codes.push((offset, UnwindCode::RestoreRegister { reg })); - unwind_codes.push(( - offset, - UnwindCode::StackDealloc { - size: word_size.into(), - }, - )); - - if Some(reg) == frame_register { - unwind_codes.push((offset, UnwindCode::RestoreFramePointer)); - // Keep frame_register assigned for next epilogue. - } - } - epilogue_pop_offsets.clear(); - - // TODO ensure unwind codes sorted by offsets ? - - if !is_last_block { - unwind_codes.push((offset, UnwindCode::RestoreState)); - } - - in_epilogue = false; - } - _ => {} - }, - _ => {} - }; - } - } - - Ok(Some(UnwindInfo { - prologue_size, - prologue_unwind_codes, - epilogues_unwind_codes, - function_size, - word_size, - initial_sp_offset: word_size, - })) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ - types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, - }; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_small_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 9, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (9, UnwindCode::StackAlloc { size: 64 }) - ], - epilogues_unwind_codes: vec![vec![ - (13, UnwindCode::StackDealloc { size: 64 }), - ( - 15, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (15, UnwindCode::StackDealloc { size: 8 }), - (15, UnwindCode::RestoreFramePointer) - ]], - function_size: 16, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - #[test] - fn test_medium_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 27, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (27, UnwindCode::StackAlloc { size: 10000 }) - ], - epilogues_unwind_codes: vec![vec![ - (34, UnwindCode::StackDealloc { size: 10000 }), - ( - 36, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (36, UnwindCode::StackDealloc { size: 8 }), - (36, UnwindCode::RestoreFramePointer) - ]], - function_size: 37, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - #[test] - fn test_large_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 27, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (27, UnwindCode::StackAlloc { size: 1000000 }) - ], - epilogues_unwind_codes: vec![vec![ - (34, UnwindCode::StackDealloc { size: 1000000 }), - ( - 36, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (36, UnwindCode::StackDealloc { size: 8 }), - (36, UnwindCode::RestoreFramePointer) - ]], - function_size: 37, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 5, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into() - } - ) - ], - epilogues_unwind_codes: vec![ - vec![ - (12, UnwindCode::RememberState), - ( - 12, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (12, UnwindCode::StackDealloc { size: 8 }), - (12, UnwindCode::RestoreFramePointer), - (13, UnwindCode::RestoreState) - ], - vec![ - ( - 15, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (15, UnwindCode::StackDealloc { size: 8 }), - (15, UnwindCode::RestoreFramePointer) - ] - ], - function_size: 16, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brnz(v0, block2, &[]); - pos.ins().jump(block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs deleted file mode 100644 index 31fc64c9fb..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! Unwind information for System V ABI (x86-64). - -use crate::ir::Function; -use crate::isa::{ - unwind::systemv::{RegisterMappingError, UnwindInfo}, - RegUnit, TargetIsa, -}; -use crate::result::CodegenResult; -use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64}; - -/// Creates a new x86-64 common information entry (CIE). -pub fn create_cie() -> CommonInformationEntry { - use gimli::write::CallFrameInstruction; - - let mut entry = CommonInformationEntry::new( - Encoding { - address_size: 8, - format: Format::Dwarf32, - version: 1, - }, - 1, // Code alignment factor - -8, // Data alignment factor - X86_64::RA, - ); - - // Every frame will start with the call frame address (CFA) at RSP+8 - // It is +8 to account for the push of the return address by the call instruction - entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8)); - - // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP) - entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8)); - - entry -} - -/// Map Cranelift registers to their corresponding Gimli registers. -pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result { - if isa.name() != "x86" || isa.pointer_bits() != 64 { - return Err(RegisterMappingError::UnsupportedArchitecture); - } - - // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow - const X86_GP_REG_MAP: [gimli::Register; 16] = [ - X86_64::RAX, - X86_64::RCX, - X86_64::RDX, - X86_64::RBX, - X86_64::RSP, - X86_64::RBP, - X86_64::RSI, - X86_64::RDI, - X86_64::R8, - X86_64::R9, - X86_64::R10, - X86_64::R11, - X86_64::R12, - X86_64::R13, - X86_64::R14, - X86_64::R15, - ]; - const X86_XMM_REG_MAP: [gimli::Register; 16] = [ - X86_64::XMM0, - X86_64::XMM1, - X86_64::XMM2, - X86_64::XMM3, - X86_64::XMM4, - X86_64::XMM5, - X86_64::XMM6, - X86_64::XMM7, - X86_64::XMM8, - X86_64::XMM9, - X86_64::XMM10, - X86_64::XMM11, - X86_64::XMM12, - X86_64::XMM13, - X86_64::XMM14, - X86_64::XMM15, - ]; - - let reg_info = isa.register_info(); - let bank = reg_info - .bank_containing_regunit(reg) - .ok_or_else(|| RegisterMappingError::MissingBank)?; - match bank.name { - "IntRegs" => { - // x86 GP registers have a weird mapping to DWARF registers, so we use a - // lookup table. - Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize]) - } - "FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]), - _ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)), - } -} - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - // Only System V-like calling conventions are supported - match isa.unwind_info_kind() { - crate::machinst::UnwindInfoKind::SystemV => {} - _ => return Ok(None), - } - - if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 { - return Ok(None); - } - - let unwind = match super::create_unwind_info(func, isa)? { - Some(u) => u, - None => { - return Ok(None); - } - }; - - struct RegisterMapper<'a, 'b>(&'a (dyn TargetIsa + 'b)); - impl<'a, 'b> crate::isa::unwind::systemv::RegisterMapper for RegisterMapper<'a, 'b> { - fn map(&self, reg: RegUnit) -> Result { - Ok(map_reg(self.0, reg)?.0) - } - fn sp(&self) -> u16 { - X86_64::RSP.0 - } - fn fp(&self) -> Option { - Some(X86_64::RBP.0) - } - } - let map = RegisterMapper(isa); - - Ok(Some(UnwindInfo::build(unwind, &map)?)) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ - types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, - }; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use gimli::write::Address; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_simple_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let fde = match isa - .create_unwind_info(&context.func) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(1234)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }"); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - context.compile(&*isa).expect("expected compilation"); - - let fde = match isa - .create_unwind_info(&context.func) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(4321)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, SameValue(Register(6))), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }"); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brnz(v0, block2, &[]); - pos.ins().jump(block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs deleted file mode 100644 index 33e5463bb8..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs +++ /dev/null @@ -1,265 +0,0 @@ -//! Unwind information for Windows x64 ABI. - -use crate::ir::Function; -use crate::isa::x86::registers::{FPR, GPR}; -use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa}; -use crate::result::CodegenResult; - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - // Only Windows fastcall is supported for unwind information - if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() { - return Ok(None); - } - - let unwind = match super::create_unwind_info(func, isa)? { - Some(u) => u, - None => { - return Ok(None); - } - }; - - Ok(Some(UnwindInfo::build::(unwind)?)) -} - -struct RegisterMapper; - -impl crate::isa::unwind::winx64::RegisterMapper for RegisterMapper { - fn map(reg: RegUnit) -> crate::isa::unwind::winx64::MappedRegister { - use crate::isa::unwind::winx64::MappedRegister; - if GPR.contains(reg) { - MappedRegister::Int(GPR.index_of(reg) as u8) - } else if FPR.contains(reg) { - MappedRegister::Xmm(reg as u8) - } else { - panic!() - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind}; - use crate::isa::unwind::winx64::UnwindCode; - use crate::isa::x86::registers::RU; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_wrong_calling_convention() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function(CallConv::SystemV, None)); - - context.compile(&*isa).expect("expected compilation"); - - assert_eq!( - create_unwind_info(&context.func, &*isa).expect("can create unwind info"), - None - ); - } - - #[test] - fn test_small_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 9, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 9, - size: 64 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 8); - - let mut buf = [0u8; 8]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x09, // Prologue size - 0x02, // Unwind code count (1 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x09, // Prolog offset - 0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP) - ] - ); - } - - #[test] - fn test_medium_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 27, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 27, - size: 10000 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 12); - - let mut buf = [0u8; 12]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x1B, // Prologue size - 0x03, // Unwind code count (2 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x1B, // Prolog offset - 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0) - 0xE2, // Low size byte - 0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - 0x00, // Padding - 0x00, // Padding - ] - ); - } - - #[test] - fn test_large_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 27, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 27, - size: 1000000 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 12); - - let mut buf = [0u8; 12]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x1B, // Prologue size - 0x04, // Unwind code count (3 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x1B, // Prolog offset - 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1) - 0x40, // Byte 1 of size - 0x42, // Byte 2 of size - 0x0F, // Byte 3 of size - 0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - ] - ); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } -} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 3331534c49..b1ff090498 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -44,30 +44,18 @@ //! concurrent function compilations. pub use crate::isa::call_conv::CallConv; -pub use crate::isa::constraints::{ - BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints, -}; -pub use crate::isa::enc_tables::Encodings; -pub use crate::isa::encoding::{base_size, EncInfo, Encoding}; -pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit}; -pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef}; -use crate::binemit; use crate::flowgraph; use crate::ir; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; use crate::machinst::{MachBackend, UnwindInfoKind}; -use crate::regalloc; use crate::result::CodegenResult; use crate::settings; use crate::settings::SetResult; -use crate::timing; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; -use core::any::Any; +use alloc::{boxed::Box, vec::Vec}; use core::fmt; use core::fmt::{Debug, Formatter}; -use core::hash::Hasher; use target_lexicon::{triple, Architecture, OperatingSystem, PointerWidth, Triple}; // This module is made public here for benchmarking purposes. No guarantees are @@ -84,23 +72,9 @@ pub(crate) mod aarch64; #[cfg(feature = "s390x")] mod s390x; -#[cfg(any(feature = "x86", feature = "riscv"))] -mod legacy; - -#[cfg(feature = "x86")] -use legacy::x86; - -#[cfg(feature = "riscv")] -use legacy::riscv; - pub mod unwind; mod call_conv; -mod constraints; -mod enc_tables; -mod encoding; -pub mod registers; -mod stack; #[cfg(test)] mod test_utils; @@ -120,57 +94,18 @@ macro_rules! isa_builder { }}; } -/// The "variant" for a given target. On one platform (x86-64), we have two -/// backends, the "old" and "new" one; the new one is the default if included -/// in the build configuration and not otherwise specified. -#[derive(Clone, Copy, Debug)] -pub enum BackendVariant { - /// Any backend available. - Any, - /// A "legacy" backend: one that operates using legalizations and encodings. - Legacy, - /// A backend built on `MachInst`s and the `VCode` framework. - MachInst, -} - -impl Default for BackendVariant { - fn default() -> Self { - BackendVariant::Any - } -} - -/// Look for an ISA for the given `triple`, selecting the backend variant given -/// by `variant` if available. -pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result { - match (triple.architecture, variant) { - (Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => { - isa_builder!(riscv, (feature = "riscv"), triple) - } - (Architecture::X86_64, BackendVariant::Legacy) => { - isa_builder!(x86, (feature = "x86"), triple) - } - (Architecture::X86_64, BackendVariant::MachInst) => { - isa_builder!(x64, (feature = "x86"), triple) - } - #[cfg(not(feature = "old-x86-backend"))] - (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x64, (feature = "x86"), triple) - } - #[cfg(feature = "old-x86-backend")] - (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x86, (feature = "x86"), triple) - } - (Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple), - (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple), - (Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple), - _ => Err(LookupError::Unsupported), - } -} - /// Look for an ISA for the given `triple`. /// Return a builder that can create a corresponding `TargetIsa`. pub fn lookup(triple: Triple) -> Result { - lookup_variant(triple, BackendVariant::Any) + match triple.architecture { + Architecture::X86_64 => { + isa_builder!(x64, (feature = "x86"), triple) + } + Architecture::Arm { .. } => isa_builder!(arm32, (feature = "arm32"), triple), + Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), + Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), + _ => Err(LookupError::Unsupported), + } } /// Look for a supported ISA with the given `name`. @@ -277,8 +212,9 @@ impl TargetFrontendConfig { } } -/// Methods that are specialized to a target ISA. Implies a Display trait that shows the -/// shared flags, as well as any isa-specific flags. +/// Methods that are specialized to a target ISA. +/// +/// Implies a Display trait that shows the shared flags, as well as any ISA-specific flags. pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the name of this ISA. fn name(&self) -> &'static str; @@ -292,233 +228,15 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; - /// Get the variant of this ISA (Legacy or MachInst). - fn variant(&self) -> BackendVariant { - BackendVariant::Legacy - } - - /// Hashes all flags, both ISA-independent and ISA-specific, into the - /// specified hasher. - fn hash_all_flags(&self, hasher: &mut dyn Hasher); - - /// Get the default calling convention of this target. - fn default_call_conv(&self) -> CallConv { - CallConv::triple_default(self.triple()) - } - - /// Get the endianness of this ISA. - fn endianness(&self) -> ir::Endianness { - match self.triple().endianness().unwrap() { - target_lexicon::Endianness::Little => ir::Endianness::Little, - target_lexicon::Endianness::Big => ir::Endianness::Big, - } - } - - /// Returns the code (text) section alignment for this ISA. - fn code_section_alignment(&self) -> u64 { - use target_lexicon::*; - match (self.triple().operating_system, self.triple().architecture) { - ( - OperatingSystem::MacOSX { .. } - | OperatingSystem::Darwin - | OperatingSystem::Ios - | OperatingSystem::Tvos, - Architecture::Aarch64(..), - ) => 0x4000, - _ => 0x1000, - } - } - - /// Get the pointer type of this ISA. - fn pointer_type(&self) -> ir::Type { - ir::Type::int(u16::from(self.pointer_bits())).unwrap() - } - - /// Get the width of pointers on this ISA. - fn pointer_width(&self) -> PointerWidth { - self.triple().pointer_width().unwrap() - } - - /// Get the width of pointers on this ISA, in units of bits. - fn pointer_bits(&self) -> u8 { - self.pointer_width().bits() - } - - /// Get the width of pointers on this ISA, in units of bytes. - fn pointer_bytes(&self) -> u8 { - self.pointer_width().bytes() - } - - /// Get the information needed by frontends producing Cranelift IR. - fn frontend_config(&self) -> TargetFrontendConfig { - TargetFrontendConfig { - default_call_conv: self.default_call_conv(), - pointer_width: self.pointer_width(), - } - } - - /// Does the CPU implement scalar comparisons using a CPU flags register? - fn uses_cpu_flags(&self) -> bool { - false - } - - /// Does the CPU implement multi-register addressing? - fn uses_complex_addresses(&self) -> bool { - false - } - - /// Get a data structure describing the registers in this ISA. - fn register_info(&self) -> RegInfo; - - #[cfg(feature = "unwind")] - /// Map a Cranelift register to its corresponding DWARF register. - fn map_dwarf_register(&self, _: RegUnit) -> Result { - Err(RegisterMappingError::UnsupportedArchitecture) - } - #[cfg(feature = "unwind")] /// Map a regalloc::Reg to its corresponding DWARF register. fn map_regalloc_reg_to_dwarf(&self, _: ::regalloc::Reg) -> Result { Err(RegisterMappingError::UnsupportedArchitecture) } - /// Returns an iterator over legal encodings for the instruction. - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a>; - - /// Encode an instruction after determining it is legal. - /// - /// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object. - /// Otherwise, return `Legalize` action. - /// - /// This is also the main entry point for determining if an instruction is legal. - fn encode( - &self, - func: &ir::Function, - inst: &ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Result { - let mut iter = self.legal_encodings(func, inst, ctrl_typevar); - iter.next().ok_or_else(|| iter.legalize()) - } - - /// Get a data structure describing the instruction encodings in this ISA. - fn encoding_info(&self) -> EncInfo; - - /// Legalize a function signature. - /// - /// This is used to legalize both the signature of the function being compiled and any called - /// functions. The signature should be modified by adding `ArgumentLoc` annotations to all - /// arguments and return values. - /// - /// Arguments with types that are not supported by the ABI can be expanded into multiple - /// arguments: - /// - /// - Integer types that are too large to fit in a register can be broken into multiple - /// arguments of a smaller integer type. - /// - Floating point types can be bit-cast to an integer type of the same size, and possible - /// broken into smaller integer types. - /// - Vector types can be bit-cast and broken down into smaller vectors or scalars. - /// - /// The legalizer will adapt argument and return values as necessary at all ABI boundaries. - /// - /// When this function is called to legalize the signature of the function currently being - /// compiled, `current` is true. The legalized signature can then also contain special purpose - /// arguments and return values such as: - /// - /// - A `link` argument representing the link registers on RISC architectures that don't push - /// the return address on the stack. - /// - A `link` return value which will receive the value that was passed to the `link` - /// argument. - /// - An `sret` argument can be added if one wasn't present already. This is necessary if the - /// signature returns more values than registers are available for returning values. - /// - An `sret` return value can be added if the ABI requires a function to return its `sret` - /// argument in a register. - /// - /// Arguments and return values for the caller's frame pointer and other callee-saved registers - /// should not be added by this function. These arguments are not added until after register - /// allocation. - fn legalize_signature(&self, sig: &mut Cow, current: bool); - - /// Get the register class that should be used to represent an ABI argument or return value of - /// type `ty`. This should be the top-level register class that contains the argument - /// registers. - /// - /// This function can assume that it will only be asked to provide register classes for types - /// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries. - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass; - - /// Get the set of allocatable registers that can be used when compiling `func`. - /// - /// This set excludes reserved registers like the stack pointer and other special-purpose - /// registers. - fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet; - - /// Compute the stack layout and insert prologue and epilogue code into `func`. - /// - /// Return an error if the stack frame is too large. - fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { - let _tt = timing::prologue_epilogue(); - // This default implementation is unlikely to be good enough. - use crate::ir::stackslot::{StackOffset, StackSize}; - use crate::stack_layout::layout_stack; - - let word_size = StackSize::from(self.pointer_bytes()); - - // Account for the SpiderMonkey standard prologue pushes. - if func.signature.call_conv.extends_baldrdash() { - let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size; - let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); - ss.offset = Some(-(bytes as StackOffset)); - func.stack_slots.push(ss); - } - - let is_leaf = func.is_leaf(); - layout_stack(&mut func.stack_slots, is_leaf, word_size)?; - Ok(()) - } - - /// Emit binary machine code for a single instruction into the `sink` trait object. - /// - /// Note that this will call `put*` methods on the `sink` trait object via its vtable which - /// is not the fastest way of emitting code. - /// - /// This function is under the "testing_hooks" feature, and is only suitable for use by - /// test harnesses. It increases code size, and is inefficient. - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn binemit::CodeSink, - ); - - /// Emit a whole function into memory. - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink); - /// IntCC condition for Unsigned Addition Overflow (Carry). fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC; - /// IntCC condition for Unsigned Subtraction Overflow (Borrow/Carry). - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC; - - /// Returns the flavor of unwind information emitted for this target. - fn unwind_info_kind(&self) -> UnwindInfoKind { - match self.triple().operating_system { - #[cfg(feature = "unwind")] - OperatingSystem::Windows => UnwindInfoKind::Windows, - #[cfg(feature = "unwind")] - _ => UnwindInfoKind::SystemV, - #[cfg(not(feature = "unwind"))] - _ => UnwindInfoKind::None, - } - } - /// Creates unwind information for the function. /// /// Returns `None` if there is no unwind information for the function. @@ -544,10 +262,80 @@ pub trait TargetIsa: fmt::Display + Send + Sync { fn get_mach_backend(&self) -> Option<&dyn MachBackend> { None } +} - /// Return an [Any] reference for downcasting to the ISA-specific implementation of this trait - /// with `isa.as_any().downcast_ref::()`. - fn as_any(&self) -> &dyn Any; +/// Methods implemented for free for target ISA! +impl<'a> dyn TargetIsa + 'a { + /// Get the default calling convention of this target. + pub fn default_call_conv(&self) -> CallConv { + CallConv::triple_default(self.triple()) + } + + /// Get the endianness of this ISA. + pub fn endianness(&self) -> ir::Endianness { + match self.triple().endianness().unwrap() { + target_lexicon::Endianness::Little => ir::Endianness::Little, + target_lexicon::Endianness::Big => ir::Endianness::Big, + } + } + + /// Returns the code (text) section alignment for this ISA. + pub fn code_section_alignment(&self) -> u64 { + use target_lexicon::*; + match (self.triple().operating_system, self.triple().architecture) { + ( + OperatingSystem::MacOSX { .. } + | OperatingSystem::Darwin + | OperatingSystem::Ios + | OperatingSystem::Tvos, + Architecture::Aarch64(..), + ) => 0x4000, + // 64 KB is the maximal page size (i.e. memory translation granule size) + // supported by the architecture and is used on some platforms. + (_, Architecture::Aarch64(..)) => 0x10000, + _ => 0x1000, + } + } + + /// Get the pointer type of this ISA. + pub fn pointer_type(&self) -> ir::Type { + ir::Type::int(u16::from(self.pointer_bits())).unwrap() + } + + /// Get the width of pointers on this ISA. + pub(crate) fn pointer_width(&self) -> PointerWidth { + self.triple().pointer_width().unwrap() + } + + /// Get the width of pointers on this ISA, in units of bits. + pub fn pointer_bits(&self) -> u8 { + self.pointer_width().bits() + } + + /// Get the width of pointers on this ISA, in units of bytes. + pub fn pointer_bytes(&self) -> u8 { + self.pointer_width().bytes() + } + + /// Get the information needed by frontends producing Cranelift IR. + pub fn frontend_config(&self) -> TargetFrontendConfig { + TargetFrontendConfig { + default_call_conv: self.default_call_conv(), + pointer_width: self.pointer_width(), + } + } + + /// Returns the flavor of unwind information emitted for this target. + pub(crate) fn unwind_info_kind(&self) -> UnwindInfoKind { + match self.triple().operating_system { + #[cfg(feature = "unwind")] + OperatingSystem::Windows => UnwindInfoKind::Windows, + #[cfg(feature = "unwind")] + _ => UnwindInfoKind::SystemV, + #[cfg(not(feature = "unwind"))] + _ => UnwindInfoKind::None, + } + } } impl Debug for &dyn TargetIsa { diff --git a/cranelift/codegen/src/isa/registers.rs b/cranelift/codegen/src/isa/registers.rs deleted file mode 100644 index e67ae13453..0000000000 --- a/cranelift/codegen/src/isa/registers.rs +++ /dev/null @@ -1,360 +0,0 @@ -//! Data structures describing the registers in an ISA. - -use crate::entity::EntityRef; -use core::fmt; - -/// Register units are the smallest units of register allocation. -/// -/// Normally there is a 1-1 correspondence between registers and register units, but when an ISA -/// has aliasing registers, the aliasing can be modeled with registers that cover multiple -/// register units. -/// -/// The register allocator will enforce that each register unit only gets used for one thing. -pub type RegUnit = u16; - -/// A bit mask indexed by register classes. -/// -/// The size of this type is determined by the ISA with the most register classes. -pub type RegClassMask = u32; - -/// A bit mask indexed by register units. -/// -/// The size of this type is determined by the target ISA that has the most register units defined. -/// Currently that is arm32 which has 64+16 units. -pub type RegUnitMask = [RegClassMask; 3]; - -/// The register units in a target ISA are divided into disjoint register banks. Each bank covers a -/// contiguous range of register units. -/// -/// The `RegBank` struct provides a static description of a register bank. -pub struct RegBank { - /// The name of this register bank as defined in the ISA's DSL definition. - pub name: &'static str, - - /// The first register unit in this bank. - pub first_unit: RegUnit, - - /// The total number of register units in this bank. - pub units: RegUnit, - - /// Array of specially named register units. This array can be shorter than the number of units - /// in the bank. - pub names: &'static [&'static str], - - /// Name prefix to use for those register units in the bank not covered by the `names` array. - /// The remaining register units will be named this prefix followed by their decimal offset in - /// the bank. So with a prefix `r`, registers will be named `r8`, `r9`, ... - pub prefix: &'static str, - - /// Index of the first top-level register class in this bank. - pub first_toprc: usize, - - /// Number of top-level register classes in this bank. - /// - /// The top-level register classes in a bank are guaranteed to be numbered sequentially from - /// `first_toprc`, and all top-level register classes across banks come before any sub-classes. - pub num_toprcs: usize, - - /// Is register pressure tracking enabled for this bank? - pub pressure_tracking: bool, -} - -impl RegBank { - /// Does this bank contain `regunit`? - fn contains(&self, regunit: RegUnit) -> bool { - regunit >= self.first_unit && regunit - self.first_unit < self.units - } - - /// Try to parse a regunit name. The name is not expected to begin with `%`. - fn parse_regunit(&self, name: &str) -> Option { - match self.names.iter().position(|&x| x == name) { - Some(offset) => { - // This is one of the special-cased names. - Some(offset as RegUnit) - } - None => { - // Try a regular prefixed name. - if name.starts_with(self.prefix) { - name[self.prefix.len()..].parse().ok() - } else { - None - } - } - } - .and_then(|offset| { - if offset < self.units { - Some(offset + self.first_unit) - } else { - None - } - }) - } - - /// Write `regunit` to `w`, assuming that it belongs to this bank. - /// All regunits are written with a `%` prefix. - fn write_regunit(&self, f: &mut fmt::Formatter, regunit: RegUnit) -> fmt::Result { - let offset = regunit - self.first_unit; - assert!(offset < self.units); - if (offset as usize) < self.names.len() { - write!(f, "%{}", self.names[offset as usize]) - } else { - write!(f, "%{}{}", self.prefix, offset) - } - } -} - -/// A register class reference. -/// -/// All register classes are statically defined in tables generated from the meta descriptions. -pub type RegClass = &'static RegClassData; - -/// Data about a register class. -/// -/// A register class represents a subset of the registers in a bank. It describes the set of -/// permitted registers for a register operand in a given encoding of an instruction. -/// -/// A register class can be a subset of another register class. The top-level register classes are -/// disjoint. -pub struct RegClassData { - /// The name of the register class. - pub name: &'static str, - - /// The index of this class in the ISA's RegInfo description. - pub index: u8, - - /// How many register units to allocate per register. - pub width: u8, - - /// Index of the register bank this class belongs to. - pub bank: u8, - - /// Index of the top-level register class contains this one. - pub toprc: u8, - - /// The first register unit in this class. - pub first: RegUnit, - - /// Bit-mask of sub-classes of this register class, including itself. - /// - /// Bits correspond to RC indexes. - pub subclasses: RegClassMask, - - /// Mask of register units in the class. If `width > 1`, the mask only has a bit set for the - /// first register unit in each allocatable register. - pub mask: RegUnitMask, - - /// The global `RegInfo` instance containing this register class. - pub info: &'static RegInfo, - - /// The "pinned" register of the associated register bank. - /// - /// This register must be non-volatile (callee-preserved) and must not be the fixed - /// output register of any instruction. - pub pinned_reg: Option, -} - -impl RegClassData { - /// Get the register class index corresponding to the intersection of `self` and `other`. - /// - /// This register class is guaranteed to exist if the register classes overlap. If the register - /// classes don't overlap, returns `None`. - pub fn intersect_index(&self, other: RegClass) -> Option { - // Compute the set of common subclasses. - let mask = self.subclasses & other.subclasses; - - if mask == 0 { - // No overlap. - None - } else { - // Register class indexes are topologically ordered, so the largest common subclass has - // the smallest index. - Some(RegClassIndex(mask.trailing_zeros() as u8)) - } - } - - /// Get the intersection of `self` and `other`. - pub fn intersect(&self, other: RegClass) -> Option { - self.intersect_index(other).map(|rci| self.info.rc(rci)) - } - - /// Returns true if `other` is a subclass of this register class. - /// A register class is considered to be a subclass of itself. - pub fn has_subclass>(&self, other: RCI) -> bool { - self.subclasses & (1 << other.into().0) as u32 != 0 - } - - /// Get the top-level register class containing this class. - pub fn toprc(&self) -> RegClass { - self.info.rc(RegClassIndex(self.toprc)) - } - - /// Get a specific register unit in this class. - pub fn unit(&self, offset: usize) -> RegUnit { - let uoffset = offset * usize::from(self.width); - self.first + uoffset as RegUnit - } - - /// Does this register class contain `regunit`? - pub fn contains(&self, regunit: RegUnit) -> bool { - self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32) as u32) != 0 - } - - /// If the pinned register is used, is the given regunit the pinned register of this class? - #[inline] - pub fn is_pinned_reg(&self, enabled: bool, regunit: RegUnit) -> bool { - enabled - && self - .pinned_reg - .map_or(false, |pinned_reg| pinned_reg == regunit) - } - - /// Calculate the index of the register inside the class. - pub fn index_of(&self, regunit: RegUnit) -> u16 { - assert!( - self.contains(regunit), - "the {} register class does not contain {}", - self.name, - regunit - ); - regunit - self.first - } -} - -impl fmt::Display for RegClassData { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(self.name) - } -} - -impl fmt::Debug for RegClassData { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(self.name) - } -} - -/// Within an ISA, register classes are uniquely identified by their index. -impl PartialEq for RegClassData { - fn eq(&self, other: &Self) -> bool { - self.index == other.index - } -} - -/// A small reference to a register class. -/// -/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method -/// can be used to get the real register class reference back. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct RegClassIndex(u8); - -impl EntityRef for RegClassIndex { - fn new(idx: usize) -> Self { - Self(idx as u8) - } - - fn index(self) -> usize { - usize::from(self.0) - } -} - -impl From for RegClassIndex { - fn from(rc: RegClass) -> Self { - Self(rc.index) - } -} - -impl fmt::Display for RegClassIndex { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "rci{}", self.0) - } -} - -/// Test of two registers overlap. -/// -/// A register is identified as a `(RegClass, RegUnit)` pair. The register class is needed to -/// determine the width (in regunits) of the register. -pub fn regs_overlap(rc1: RegClass, reg1: RegUnit, rc2: RegClass, reg2: RegUnit) -> bool { - let end1 = reg1 + RegUnit::from(rc1.width); - let end2 = reg2 + RegUnit::from(rc2.width); - !(end1 <= reg2 || end2 <= reg1) -} - -/// Information about the registers in an ISA. -/// -/// The `RegUnit` data structure collects all relevant static information about the registers in an -/// ISA. -#[derive(Clone)] -pub struct RegInfo { - /// All register banks, ordered by their `first_unit`. The register banks are disjoint, but - /// there may be holes of unused register unit numbers between banks due to alignment. - pub banks: &'static [RegBank], - - /// All register classes ordered topologically so a sub-class always follows its parent. - pub classes: &'static [RegClass], -} - -impl RegInfo { - /// Get the register bank holding `regunit`. - pub fn bank_containing_regunit(&self, regunit: RegUnit) -> Option<&RegBank> { - // We could do a binary search, but most ISAs have only two register banks... - self.banks.iter().find(|b| b.contains(regunit)) - } - - /// Try to parse a regunit name. The name is not expected to begin with `%`. - pub fn parse_regunit(&self, name: &str) -> Option { - self.banks - .iter() - .filter_map(|b| b.parse_regunit(name)) - .next() - } - - /// Make a temporary object that can display a register unit. - pub fn display_regunit(&self, regunit: RegUnit) -> DisplayRegUnit { - DisplayRegUnit { - regunit, - reginfo: self, - } - } - - /// Get the register class corresponding to `idx`. - pub fn rc(&self, idx: RegClassIndex) -> RegClass { - self.classes[idx.index()] - } - - /// Get the top-level register class containing the `idx` class. - pub fn toprc(&self, idx: RegClassIndex) -> RegClass { - self.classes[self.rc(idx).toprc as usize] - } -} - -/// Temporary object that holds enough information to print a register unit. -pub struct DisplayRegUnit<'a> { - regunit: RegUnit, - reginfo: &'a RegInfo, -} - -impl<'a> fmt::Display for DisplayRegUnit<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.reginfo.bank_containing_regunit(self.regunit) { - Some(b) => b.write_regunit(f, self.regunit), - None => write!(f, "%INVALID{}", self.regunit), - } - } -} - -#[test] -fn assert_sizes() { - use cranelift_codegen_shared::constants; - use std::mem::size_of; - - // In these tests, size_of returns number of bytes: we actually want the number of bits, so - // multiply these by 8. - assert!( - (size_of::() * 8) <= constants::MAX_NUM_REG_CLASSES, - "need to bump MAX_NUM_REG_CLASSES or change RegClassMask type" - ); - - assert!( - constants::MAX_NUM_REG_CLASSES < (1 << (size_of::() * 8)), - "need to change RegClassIndex's type to a wider type" - ); -} diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index d0dd517e65..8712d0de28 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -371,7 +371,7 @@ impl ABIMachineSpec for S390xMachineDeps { insts.push(Inst::mov64(into_reg, from_reg)); } insts.push(Inst::AluRUImm32 { - alu_op: ALUOp::Add64, + alu_op: ALUOp::AddLogical64, rd: into_reg, imm, }); diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index a28b9d14b4..9c60699279 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -956,24 +956,28 @@ impl MachInstEmit for Inst { match self { &Inst::AluRRR { alu_op, rd, rn, rm } => { let (opcode, have_rr) = match alu_op { - ALUOp::Add32 => (0xb9f8, true), // ARK - ALUOp::Add64 => (0xb9e8, true), // AGRK - ALUOp::Sub32 => (0xb9f9, true), // SRK - ALUOp::Sub64 => (0xb9e9, true), // SGRK - ALUOp::Mul32 => (0xb9fd, true), // MSRKC - ALUOp::Mul64 => (0xb9ed, true), // MSGRKC - ALUOp::And32 => (0xb9f4, true), // NRK - ALUOp::And64 => (0xb9e4, true), // NGRK - ALUOp::Orr32 => (0xb9f6, true), // ORK - ALUOp::Orr64 => (0xb9e6, true), // OGRK - ALUOp::Xor32 => (0xb9f7, true), // XRK - ALUOp::Xor64 => (0xb9e7, true), // XGRK - ALUOp::AndNot32 => (0xb974, false), // NNRK - ALUOp::AndNot64 => (0xb964, false), // NNGRK - ALUOp::OrrNot32 => (0xb976, false), // NORK - ALUOp::OrrNot64 => (0xb966, false), // NOGRK - ALUOp::XorNot32 => (0xb977, false), // NXRK - ALUOp::XorNot64 => (0xb967, false), // NXGRK + ALUOp::Add32 => (0xb9f8, true), // ARK + ALUOp::Add64 => (0xb9e8, true), // AGRK + ALUOp::AddLogical32 => (0xb9fa, true), // ALRK + ALUOp::AddLogical64 => (0xb9ea, true), // ALGRK + ALUOp::Sub32 => (0xb9f9, true), // SRK + ALUOp::Sub64 => (0xb9e9, true), // SGRK + ALUOp::SubLogical32 => (0xb9fb, true), // SLRK + ALUOp::SubLogical64 => (0xb9eb, true), // SLGRK + ALUOp::Mul32 => (0xb9fd, true), // MSRKC + ALUOp::Mul64 => (0xb9ed, true), // MSGRKC + ALUOp::And32 => (0xb9f4, true), // NRK + ALUOp::And64 => (0xb9e4, true), // NGRK + ALUOp::Orr32 => (0xb9f6, true), // ORK + ALUOp::Orr64 => (0xb9e6, true), // OGRK + ALUOp::Xor32 => (0xb9f7, true), // XRK + ALUOp::Xor64 => (0xb9e7, true), // XGRK + ALUOp::AndNot32 => (0xb974, false), // NNRK + ALUOp::AndNot64 => (0xb964, false), // NNGRK + ALUOp::OrrNot32 => (0xb976, false), // NORK + ALUOp::OrrNot64 => (0xb966, false), // NOGRK + ALUOp::XorNot32 => (0xb977, false), // NXRK + ALUOp::XorNot64 => (0xb967, false), // NXGRK _ => unreachable!(), }; if have_rr && rd.to_reg() == rn { @@ -1003,21 +1007,27 @@ impl MachInstEmit for Inst { } &Inst::AluRR { alu_op, rd, rm } => { let (opcode, is_rre) = match alu_op { - ALUOp::Add32 => (0x1a, false), // AR - ALUOp::Add64 => (0xb908, true), // AGR - ALUOp::Add64Ext32 => (0xb918, true), // AGFR - ALUOp::Sub32 => (0x1b, false), // SR - ALUOp::Sub64 => (0xb909, true), // SGR - ALUOp::Sub64Ext32 => (0xb919, true), // SGFR - ALUOp::Mul32 => (0xb252, true), // MSR - ALUOp::Mul64 => (0xb90c, true), // MSGR - ALUOp::Mul64Ext32 => (0xb91c, true), // MSGFR - ALUOp::And32 => (0x14, false), // NR - ALUOp::And64 => (0xb980, true), // NGR - ALUOp::Orr32 => (0x16, false), // OR - ALUOp::Orr64 => (0xb981, true), // OGR - ALUOp::Xor32 => (0x17, false), // XR - ALUOp::Xor64 => (0xb982, true), // XGR + ALUOp::Add32 => (0x1a, false), // AR + ALUOp::Add64 => (0xb908, true), // AGR + ALUOp::Add64Ext32 => (0xb918, true), // AGFR + ALUOp::AddLogical32 => (0x1e, false), // ALR + ALUOp::AddLogical64 => (0xb90a, true), // ALGR + ALUOp::AddLogical64Ext32 => (0xb91a, true), // ALGFR + ALUOp::Sub32 => (0x1b, false), // SR + ALUOp::Sub64 => (0xb909, true), // SGR + ALUOp::Sub64Ext32 => (0xb919, true), // SGFR + ALUOp::SubLogical32 => (0x1f, false), // SLR + ALUOp::SubLogical64 => (0xb90b, true), // SLGR + ALUOp::SubLogical64Ext32 => (0xb91b, true), // SLGFR + ALUOp::Mul32 => (0xb252, true), // MSR + ALUOp::Mul64 => (0xb90c, true), // MSGR + ALUOp::Mul64Ext32 => (0xb91c, true), // MSGFR + ALUOp::And32 => (0x14, false), // NR + ALUOp::And64 => (0xb980, true), // NGR + ALUOp::Orr32 => (0x16, false), // OR + ALUOp::Orr64 => (0xb981, true), // OGR + ALUOp::Xor32 => (0x17, false), // XR + ALUOp::Xor64 => (0xb982, true), // XGR _ => unreachable!(), }; if is_rre { @@ -1032,27 +1042,33 @@ impl MachInstEmit for Inst { ref mem, } => { let (opcode_rx, opcode_rxy) = match alu_op { - ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) - ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) - ALUOp::Add64 => (None, Some(0xe308)), // AG - ALUOp::Add64Ext16 => (None, Some(0xe338)), // AGH - ALUOp::Add64Ext32 => (None, Some(0xe318)), // AGF - ALUOp::Sub32 => (Some(0x5b), Some(0xe35b)), // S(Y) - ALUOp::Sub32Ext16 => (Some(0x4b), Some(0xe37b)), // SH(Y) - ALUOp::Sub64 => (None, Some(0xe309)), // SG - ALUOp::Sub64Ext16 => (None, Some(0xe339)), // SGH - ALUOp::Sub64Ext32 => (None, Some(0xe319)), // SGF - ALUOp::Mul32 => (Some(0x71), Some(0xe351)), // MS(Y) - ALUOp::Mul32Ext16 => (Some(0x4c), Some(0xe37c)), // MH(Y) - ALUOp::Mul64 => (None, Some(0xe30c)), // MSG - ALUOp::Mul64Ext16 => (None, Some(0xe33c)), // MSH - ALUOp::Mul64Ext32 => (None, Some(0xe31c)), // MSGF - ALUOp::And32 => (Some(0x54), Some(0xe354)), // N(Y) - ALUOp::And64 => (None, Some(0xe380)), // NG - ALUOp::Orr32 => (Some(0x56), Some(0xe356)), // O(Y) - ALUOp::Orr64 => (None, Some(0xe381)), // OG - ALUOp::Xor32 => (Some(0x57), Some(0xe357)), // X(Y) - ALUOp::Xor64 => (None, Some(0xe382)), // XG + ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) + ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) + ALUOp::Add64 => (None, Some(0xe308)), // AG + ALUOp::Add64Ext16 => (None, Some(0xe338)), // AGH + ALUOp::Add64Ext32 => (None, Some(0xe318)), // AGF + ALUOp::AddLogical32 => (Some(0x5e), Some(0xe35e)), // AL(Y) + ALUOp::AddLogical64 => (None, Some(0xe30a)), // ALG + ALUOp::AddLogical64Ext32 => (None, Some(0xe31a)), // ALGF + ALUOp::Sub32 => (Some(0x5b), Some(0xe35b)), // S(Y) + ALUOp::Sub32Ext16 => (Some(0x4b), Some(0xe37b)), // SH(Y) + ALUOp::Sub64 => (None, Some(0xe309)), // SG + ALUOp::Sub64Ext16 => (None, Some(0xe339)), // SGH + ALUOp::Sub64Ext32 => (None, Some(0xe319)), // SGF + ALUOp::SubLogical32 => (Some(0x5f), Some(0xe35f)), // SL(Y) + ALUOp::SubLogical64 => (None, Some(0xe30b)), // SLG + ALUOp::SubLogical64Ext32 => (None, Some(0xe31b)), // SLGF + ALUOp::Mul32 => (Some(0x71), Some(0xe351)), // MS(Y) + ALUOp::Mul32Ext16 => (Some(0x4c), Some(0xe37c)), // MH(Y) + ALUOp::Mul64 => (None, Some(0xe30c)), // MSG + ALUOp::Mul64Ext16 => (None, Some(0xe33c)), // MSH + ALUOp::Mul64Ext32 => (None, Some(0xe31c)), // MSGF + ALUOp::And32 => (Some(0x54), Some(0xe354)), // N(Y) + ALUOp::And64 => (None, Some(0xe380)), // NG + ALUOp::Orr32 => (Some(0x56), Some(0xe356)), // O(Y) + ALUOp::Orr64 => (None, Some(0xe381)), // OG + ALUOp::Xor32 => (Some(0x57), Some(0xe357)), // X(Y) + ALUOp::Xor64 => (None, Some(0xe382)), // XG _ => unreachable!(), }; let rd = rd.to_reg(); @@ -1082,10 +1098,10 @@ impl MachInstEmit for Inst { } &Inst::AluRUImm32 { alu_op, rd, imm } => { let opcode = match alu_op { - ALUOp::Add32 => 0xc2b, // ALFI - ALUOp::Add64 => 0xc2a, // ALGFI - ALUOp::Sub32 => 0xc25, // SLFI - ALUOp::Sub64 => 0xc24, // SLGFI + ALUOp::AddLogical32 => 0xc2b, // ALFI + ALUOp::AddLogical64 => 0xc2a, // ALGFI + ALUOp::SubLogical32 => 0xc25, // SLFI + ALUOp::SubLogical64 => 0xc24, // SLGFI _ => unreachable!(), }; put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); @@ -1380,14 +1396,16 @@ impl MachInstEmit for Inst { ref mem, } => { let opcode = match alu_op { - ALUOp::Add32 => 0xebf8, // LAA - ALUOp::Add64 => 0xebe8, // LAAG - ALUOp::And32 => 0xebf4, // LAN - ALUOp::And64 => 0xebe4, // LANG - ALUOp::Orr32 => 0xebf6, // LAO - ALUOp::Orr64 => 0xebe6, // LAOG - ALUOp::Xor32 => 0xebf7, // LAX - ALUOp::Xor64 => 0xebe7, // LAXG + ALUOp::Add32 => 0xebf8, // LAA + ALUOp::Add64 => 0xebe8, // LAAG + ALUOp::AddLogical32 => 0xebfa, // LAAL + ALUOp::AddLogical64 => 0xebea, // LAALG + ALUOp::And32 => 0xebf4, // LAN + ALUOp::And64 => 0xebe4, // LANG + ALUOp::Orr32 => 0xebf6, // LAO + ALUOp::Orr64 => 0xebe6, // LAOG + ALUOp::Xor32 => 0xebf7, // LAX + ALUOp::Xor64 => 0xebe7, // LAXG _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 242c62adbe..15f32bed74 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -32,6 +32,26 @@ fn test_s390x_binemit() { "B9E86045", "agrk %r4, %r5, %r6", )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9FA3012", + "alrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9EA6045", + "algrk %r4, %r5, %r6", + )); insns.push(( Inst::AluRRR { alu_op: ALUOp::Sub32, @@ -52,6 +72,26 @@ fn test_s390x_binemit() { "B9E96045", "sgrk %r4, %r5, %r6", )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9FB3012", + "slrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::SubLogical64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9EB6045", + "slgrk %r4, %r5, %r6", + )); insns.push(( Inst::AluRRR { alu_op: ALUOp::Mul32, @@ -261,6 +301,33 @@ fn test_s390x_binemit() { "B9180045", "agfr %r4, %r5", )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1E12", + "alr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B90A0045", + "algr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::AddLogical64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B91A0045", + "algfr %r4, %r5", + )); insns.push(( Inst::AluRR { alu_op: ALUOp::Sub32, @@ -288,6 +355,33 @@ fn test_s390x_binemit() { "B9190045", "sgfr %r4, %r5", )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1F12", + "slr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::SubLogical64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B90B0045", + "slgr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::SubLogical64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B91B0045", + "slgfr %r4, %r5", + )); insns.push(( Inst::AluRR { alu_op: ALUOp::Mul32, @@ -468,6 +562,62 @@ fn test_s390x_binemit() { "E31020000018", "agf %r1, 0(%r2)", )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5E102000", + "al %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005E", + "aly %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000A", + "alg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::AddLogical64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001A", + "algf %r1, 0(%r2)", + )); insns.push(( Inst::AluRX { alu_op: ALUOp::Sub32, @@ -566,6 +716,62 @@ fn test_s390x_binemit() { "E31020000019", "sgf %r1, 0(%r2)", )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5F102000", + "sl %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005F", + "sly %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000B", + "slg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::SubLogical64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001B", + "slgf %r1, 0(%r2)", + )); insns.push(( Inst::AluRX { alu_op: ALUOp::Mul32, @@ -939,7 +1145,7 @@ fn test_s390x_binemit() { insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add32, + alu_op: ALUOp::AddLogical32, rd: writable_gpr(7), imm: 0, }, @@ -948,7 +1154,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add32, + alu_op: ALUOp::AddLogical32, rd: writable_gpr(7), imm: 4294967295, }, @@ -957,7 +1163,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub32, + alu_op: ALUOp::SubLogical32, rd: writable_gpr(7), imm: 0, }, @@ -966,7 +1172,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub32, + alu_op: ALUOp::SubLogical32, rd: writable_gpr(7), imm: 4294967295, }, @@ -975,7 +1181,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add64, + alu_op: ALUOp::AddLogical64, rd: writable_gpr(7), imm: 0, }, @@ -984,7 +1190,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Add64, + alu_op: ALUOp::AddLogical64, rd: writable_gpr(7), imm: 4294967295, }, @@ -993,7 +1199,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub64, + alu_op: ALUOp::SubLogical64, rd: writable_gpr(7), imm: 0, }, @@ -1002,7 +1208,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::AluRUImm32 { - alu_op: ALUOp::Sub64, + alu_op: ALUOp::SubLogical64, rd: writable_gpr(7), imm: 4294967295, }, @@ -2325,6 +2531,126 @@ fn test_s390x_binemit() { "EB456FFF7FE8", "laag %r4, %r5, 524287(%r6)", )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45000080FA", + "laal %r4, %r5, -524288", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB450FFF7FFA", + "laal %r4, %r5, 524287", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45600080FA", + "laal %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical32, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB456FFF7FFA", + "laal %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45000080EA", + "laalg %r4, %r5, -524288", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB450FFF7FEA", + "laalg %r4, %r5, 524287", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB45600080EA", + "laalg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::AtomicRmw { + alu_op: ALUOp::AddLogical64, + rd: writable_gpr(4), + rn: gpr(5), + mem: MemArg::BXD20 { + base: gpr(6), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB456FFF7FEA", + "laalg %r4, %r5, 524287(%r6)", + )); insns.push(( Inst::AtomicRmw { alu_op: ALUOp::And32, diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 7d05b61855..74406cd8df 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -55,11 +55,17 @@ pub enum ALUOp { Add64, Add64Ext16, Add64Ext32, + AddLogical32, + AddLogical64, + AddLogical64Ext32, Sub32, Sub32Ext16, Sub64, Sub64Ext16, Sub64Ext32, + SubLogical32, + SubLogical64, + SubLogical64Ext32, Mul32, Mul32Ext16, Mul64, @@ -2572,8 +2578,12 @@ impl Inst { let (op, have_rr) = match alu_op { ALUOp::Add32 => ("ark", true), ALUOp::Add64 => ("agrk", true), + ALUOp::AddLogical32 => ("alrk", true), + ALUOp::AddLogical64 => ("algrk", true), ALUOp::Sub32 => ("srk", true), ALUOp::Sub64 => ("sgrk", true), + ALUOp::SubLogical32 => ("slrk", true), + ALUOp::SubLogical64 => ("slgrk", true), ALUOp::Mul32 => ("msrkc", true), ALUOp::Mul64 => ("msgrkc", true), ALUOp::And32 => ("nrk", true), @@ -2623,9 +2633,15 @@ impl Inst { ALUOp::Add32 => "ar", ALUOp::Add64 => "agr", ALUOp::Add64Ext32 => "agfr", + ALUOp::AddLogical32 => "alr", + ALUOp::AddLogical64 => "algr", + ALUOp::AddLogical64Ext32 => "algfr", ALUOp::Sub32 => "sr", ALUOp::Sub64 => "sgr", ALUOp::Sub64Ext32 => "sgfr", + ALUOp::SubLogical32 => "slr", + ALUOp::SubLogical64 => "slgr", + ALUOp::SubLogical64Ext32 => "slgfr", ALUOp::Mul32 => "msr", ALUOp::Mul64 => "msgr", ALUOp::Mul64Ext32 => "msgfr", @@ -2652,11 +2668,17 @@ impl Inst { ALUOp::Add64 => (None, Some("ag")), ALUOp::Add64Ext16 => (None, Some("agh")), ALUOp::Add64Ext32 => (None, Some("agf")), + ALUOp::AddLogical32 => (Some("al"), Some("aly")), + ALUOp::AddLogical64 => (None, Some("alg")), + ALUOp::AddLogical64Ext32 => (None, Some("algf")), ALUOp::Sub32 => (Some("s"), Some("sy")), ALUOp::Sub32Ext16 => (Some("sh"), Some("shy")), ALUOp::Sub64 => (None, Some("sg")), ALUOp::Sub64Ext16 => (None, Some("sgh")), ALUOp::Sub64Ext32 => (None, Some("sgf")), + ALUOp::SubLogical32 => (Some("sl"), Some("sly")), + ALUOp::SubLogical64 => (None, Some("slg")), + ALUOp::SubLogical64Ext32 => (None, Some("slgf")), ALUOp::Mul32 => (Some("ms"), Some("msy")), ALUOp::Mul32Ext16 => (Some("mh"), Some("mhy")), ALUOp::Mul64 => (None, Some("msg")), @@ -2715,10 +2737,10 @@ impl Inst { } &Inst::AluRUImm32 { alu_op, rd, imm } => { let op = match alu_op { - ALUOp::Add32 => "alfi", - ALUOp::Add64 => "algfi", - ALUOp::Sub32 => "slfi", - ALUOp::Sub64 => "slgfi", + ALUOp::AddLogical32 => "alfi", + ALUOp::AddLogical64 => "algfi", + ALUOp::SubLogical32 => "slfi", + ALUOp::SubLogical64 => "slgfi", _ => unreachable!(), }; let rd = rd.to_reg().show_rru(mb_rru); @@ -2967,6 +2989,8 @@ impl Inst { let op = match alu_op { ALUOp::Add32 => "laa", ALUOp::Add64 => "laag", + ALUOp::AddLogical32 => "laal", + ALUOp::AddLogical64 => "laalg", ALUOp::And32 => "lan", ALUOp::And64 => "lang", ALUOp::Orr32 => "lao", diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index cd0db1ae34..e1cff43b93 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -661,16 +661,7 @@ fn lower_icmp_to_flags>( (false, true) => NarrowValueMode::SignExtend64, (false, false) => NarrowValueMode::ZeroExtend64, }; - let inputs = [ - InsnInput { - insn: insn, - input: 0, - }, - InsnInput { - insn: insn, - input: 1, - }, - ]; + let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let ty = ctx.input_ty(insn, 0); let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); if is_signed { @@ -759,16 +750,7 @@ fn lower_icmp_to_flags>( fn lower_fcmp_to_flags>(ctx: &mut C, insn: IRInst) { let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); - let inputs = [ - InsnInput { - insn: insn, - input: 0, - }, - InsnInput { - insn: insn, - input: 1, - }, - ]; + let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); match bits { @@ -973,17 +955,37 @@ fn lower_insn_to_regs>( } } Opcode::IaddIfcout => { - // This only supports the operands emitted by dynamic_addr. let ty = ty.unwrap(); assert!(ty == types::I32 || ty == types::I64); - let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + // Emit an ADD LOGICAL instruction, which sets the condition code + // to indicate an (unsigned) carry bit. + let alu_op = choose_32_64(ty, ALUOp::AddLogical32, ALUOp::AddLogical64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let imm = input_matches_uimm32(ctx, inputs[1]).unwrap(); - ctx.emit(Inst::gen_move(rd, rn, ty)); - // Note that this will emit AL(G)FI, which sets the condition - // code to indicate an (unsigned) carry bit. - ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm }); + if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::AddLogical64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::AddLogical64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } } Opcode::UaddSat | Opcode::SaddSat => unimplemented!(), @@ -2889,17 +2891,11 @@ fn lower_insn_to_regs>( Opcode::Spill | Opcode::Fill | Opcode::FillNop - | Opcode::Regmove - | Opcode::CopySpecial - | Opcode::CopyToSsa | Opcode::CopyNop | Opcode::AdjustSpDown | Opcode::AdjustSpUpImm | Opcode::AdjustSpDownImm - | Opcode::DummySargT - | Opcode::IfcmpSp - | Opcode::Regspill - | Opcode::Regfill => { + | Opcode::IfcmpSp => { panic!("Unused opcode should not be encountered."); } @@ -2962,45 +2958,6 @@ fn lower_insn_to_regs>( | Opcode::IfcmpImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } - - #[cfg(feature = "x86")] - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86Palignr - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } } Ok(()) @@ -3136,7 +3093,7 @@ fn lower_branch>( info: Box::new(JTSequenceInfo { default_target, targets: jt_targets, - targets_for_term: targets_for_term, + targets_for_term, }), }); } diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 5c1b0b32da..2dd3e9f0b1 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -14,7 +14,6 @@ use crate::result::CodegenResult; use crate::settings as shared_settings; use alloc::{boxed::Box, vec::Vec}; -use core::hash::{Hash, Hasher}; use regalloc::{PrettyPrint, RealRegUniverse, Reg}; use target_lexicon::{Architecture, Triple}; @@ -114,11 +113,6 @@ impl MachBackend for S390xBackend { self.isa_flags.iter().collect() } - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - fn reg_universe(&self) -> &RealRegUniverse { &self.reg_universe } @@ -132,10 +126,6 @@ impl MachBackend for S390xBackend { IntCC::UnsignedGreaterThan } - fn unsigned_sub_overflow_condition(&self) -> IntCC { - unimplemented!() - } - #[cfg(feature = "unwind")] fn emit_unwind_info( &self, diff --git a/cranelift/codegen/src/isa/stack.rs b/cranelift/codegen/src/isa/stack.rs deleted file mode 100644 index ae093bed28..0000000000 --- a/cranelift/codegen/src/isa/stack.rs +++ /dev/null @@ -1,95 +0,0 @@ -//! Low-level details of stack accesses. -//! -//! The `ir::StackSlots` type deals with stack slots and stack frame layout. The `StackRef` type -//! defined in this module expresses the low-level details of accessing a stack slot from an -//! encoded instruction. - -use crate::ir::stackslot::{StackOffset, StackSlotKind, StackSlots}; -use crate::ir::StackSlot; - -/// A method for referencing a stack slot in the current stack frame. -/// -/// Stack slots are addressed with a constant offset from a base register. The base can be the -/// stack pointer, the frame pointer, or (in the future) a zone register pointing to an inner zone -/// of a large stack frame. -#[derive(Clone, Copy, Debug)] -pub struct StackRef { - /// The base register to use for addressing. - pub base: StackBase, - - /// Immediate offset from the base register to the first byte of the stack slot. - pub offset: StackOffset, -} - -impl StackRef { - /// Get a reference to the stack slot `ss` using one of the base pointers in `mask`. - pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option { - // Try an SP-relative reference. - if mask.contains(StackBase::SP) { - return Some(Self::sp(ss, frame)); - } - - // No reference possible with this mask. - None - } - - /// Get a reference to `ss` using the stack pointer as a base. - pub fn sp(ss: StackSlot, frame: &StackSlots) -> Self { - let size = frame - .layout_info - .expect("Stack layout must be computed before referencing stack slots") - .frame_size; - let slot = &frame[ss]; - let offset = if slot.kind == StackSlotKind::OutgoingArg { - // Outgoing argument slots have offsets relative to our stack pointer. - slot.offset.unwrap() - } else { - // All other slots have offsets relative to our caller's stack frame. - // Offset where SP is pointing. (All ISAs have stacks growing downwards.) - let sp_offset = -(size as StackOffset); - slot.offset.unwrap() - sp_offset - }; - Self { - base: StackBase::SP, - offset, - } - } -} - -/// Generic base register for referencing stack slots. -/// -/// Most ISAs have a stack pointer and an optional frame pointer, so provide generic names for -/// those two base pointers. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum StackBase { - /// Use the stack pointer. - SP = 0, - - /// Use the frame pointer (if one is present). - FP = 1, - - /// Use an explicit zone pointer in a general-purpose register. - /// - /// This feature is not yet implemented. - Zone = 2, -} - -/// Bit mask of supported stack bases. -/// -/// Many instruction encodings can use different base registers while others only work with the -/// stack pointer, say. A `StackBaseMask` is a bit mask of supported stack bases for a given -/// instruction encoding. -/// -/// This behaves like a set of `StackBase` variants. -/// -/// The internal representation as a `u8` is public because stack base masks are used in constant -/// tables generated from the meta-language encoding definitions. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct StackBaseMask(pub u8); - -impl StackBaseMask { - /// Check if this mask contains the `base` variant. - pub fn contains(self, base: StackBase) -> bool { - self.0 & (1 << base as usize) != 0 - } -} diff --git a/cranelift/codegen/src/isa/test_utils.rs b/cranelift/codegen/src/isa/test_utils.rs index 01c500d6ca..c57a0a56cb 100644 --- a/cranelift/codegen/src/isa/test_utils.rs +++ b/cranelift/codegen/src/isa/test_utils.rs @@ -2,9 +2,7 @@ #![allow(dead_code)] use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc}; -use crate::ir::Value; -use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode}; -use crate::isa::TargetIsa; +use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; use alloc::vec::Vec; use std::string::String; @@ -68,10 +66,6 @@ impl CodeSink for TestCodeSink { ) { } - fn reloc_constant(&mut self, _rel: Reloc, _constant_offset: ConstantOffset) {} - - fn reloc_jt(&mut self, _rel: Reloc, _jt: JumpTable) {} - fn trap(&mut self, _code: TrapCode, _srcloc: SourceLoc) {} fn begin_jumptables(&mut self) {} @@ -80,7 +74,5 @@ impl CodeSink for TestCodeSink { fn end_codegen(&mut self) {} - fn add_stack_map(&mut self, _val_list: &[Value], _func: &Function, _isa: &dyn TargetIsa) {} - fn add_call_site(&mut self, _opcode: Opcode, _srcloc: SourceLoc) {} } diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs index 13397c3266..4dd8ae78dd 100644 --- a/cranelift/codegen/src/isa/unwind.rs +++ b/cranelift/codegen/src/isa/unwind.rs @@ -24,77 +24,6 @@ pub enum UnwindInfo { SystemV(systemv::UnwindInfo), } -/// Intermediate representation for the unwind information -/// generated by a backend. -pub mod input { - use crate::binemit::CodeOffset; - use alloc::vec::Vec; - #[cfg(feature = "enable-serde")] - use serde::{Deserialize, Serialize}; - - /// Elementary operation in the unwind operations. - #[derive(Clone, Debug, PartialEq, Eq)] - #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] - pub enum UnwindCode { - /// Defines that a register is saved at the specified offset. - SaveRegister { - /// The saved register. - reg: Reg, - /// The specified offset relative to the stack pointer. - stack_offset: u32, - }, - /// Defines that a register is as defined before call. - RestoreRegister { - /// The restored register. - reg: Reg, - }, - /// The stack pointer was adjusted to allocate the stack. - StackAlloc { - /// Size to allocate. - size: u32, - }, - /// The stack pointer was adjusted to free the stack. - StackDealloc { - /// Size to deallocate. - size: u32, - }, - /// The alternative register was assigned as frame pointer base. - SetFramePointer { - /// The specified register. - reg: Reg, - }, - /// Restores a frame pointer base to default register. - RestoreFramePointer, - /// Saves the state. - RememberState, - /// Restores the state. - RestoreState, - /// On aarch64 ARMv8.3+ devices, enables or disables pointer authentication. - Aarch64SetPointerAuth { - /// Whether return addresses (hold in LR) contain a pointer-authentication code. - return_addresses: bool, - }, - } - - /// Unwind information as generated by a backend. - #[derive(Clone, Debug, PartialEq, Eq)] - #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] - pub struct UnwindInfo { - /// Size of the prologue. - pub prologue_size: CodeOffset, - /// Unwind codes for prologue. - pub prologue_unwind_codes: Vec<(CodeOffset, UnwindCode)>, - /// Unwind codes for epilogues. - pub epilogues_unwind_codes: Vec)>>, - /// Entire function size. - pub function_size: CodeOffset, - /// Platform word size in bytes. - pub word_size: u8, - /// Initial stack pointer offset. - pub initial_sp_offset: u8, - } -} - /// Unwind pseudoinstruction used in VCode backends: represents that /// at the present location, an action has just been taken. /// diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs index da3bfea869..b914f13a75 100644 --- a/cranelift/codegen/src/isa/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/unwind/systemv.rs @@ -1,7 +1,6 @@ //! System V ABI unwind information. use crate::binemit::CodeOffset; -use crate::isa::unwind::input; use crate::isa::unwind::UnwindInst; use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; @@ -259,66 +258,6 @@ pub(crate) fn create_unwind_info_from_insts>( } impl UnwindInfo { - // TODO: remove `build()` below when old backend is removed. The new backend uses a simpler - // approach in `create_unwind_info_from_insts()` above. - - pub(crate) fn build<'b, Reg: PartialEq + Copy>( - unwind: input::UnwindInfo, - map_reg: &'b dyn RegisterMapper, - ) -> CodegenResult { - use input::UnwindCode; - let mut builder = InstructionBuilder::new(unwind.initial_sp_offset, map_reg); - - for (offset, c) in unwind.prologue_unwind_codes.iter().chain( - unwind - .epilogues_unwind_codes - .iter() - .map(|c| c.iter()) - .flatten(), - ) { - match c { - UnwindCode::SaveRegister { reg, stack_offset } => { - builder - .save_reg(*offset, *reg, *stack_offset) - .map_err(CodegenError::RegisterMappingError)?; - } - UnwindCode::StackAlloc { size } => { - builder.adjust_sp_down_imm(*offset, *size as i64); - } - UnwindCode::StackDealloc { size } => { - builder.adjust_sp_up_imm(*offset, *size as i64); - } - UnwindCode::RestoreRegister { reg } => { - builder - .restore_reg(*offset, *reg) - .map_err(CodegenError::RegisterMappingError)?; - } - UnwindCode::SetFramePointer { reg } => { - builder - .set_cfa_reg(*offset, *reg) - .map_err(CodegenError::RegisterMappingError)?; - } - UnwindCode::RestoreFramePointer => { - builder.restore_cfa(*offset); - } - UnwindCode::RememberState => { - builder.remember_state(*offset); - } - UnwindCode::RestoreState => { - builder.restore_state(*offset); - } - UnwindCode::Aarch64SetPointerAuth { return_addresses } => { - builder.set_aarch64_pauth(*offset, *return_addresses); - } - } - } - - let instructions = builder.instructions; - let len = unwind.function_size; - - Ok(Self { instructions, len }) - } - /// Converts the unwind information into a `FrameDescriptionEntry`. pub fn to_fde(&self, address: Address) -> gimli::write::FrameDescriptionEntry { let mut fde = FrameDescriptionEntry::new(address, self.len); @@ -330,145 +269,3 @@ impl UnwindInfo { fde } } - -// TODO: delete the builder below when the old backend is removed. - -struct InstructionBuilder<'a, Reg: PartialEq + Copy> { - sp_offset: i32, - frame_register: Option, - saved_state: Option<(i32, Option)>, - map_reg: &'a dyn RegisterMapper, - instructions: Vec<(u32, CallFrameInstruction)>, -} - -impl<'a, Reg: PartialEq + Copy> InstructionBuilder<'a, Reg> { - fn new(sp_offset: u8, map_reg: &'a (dyn RegisterMapper + 'a)) -> Self { - Self { - sp_offset: sp_offset as i32, // CFA offset starts at the specified offset to account for the return address on stack - saved_state: None, - frame_register: None, - map_reg, - instructions: Vec::new(), - } - } - - fn save_reg( - &mut self, - offset: u32, - reg: Reg, - stack_offset: u32, - ) -> Result<(), RegisterMappingError> { - // Pushes in the prologue are register saves, so record an offset of the save - self.instructions.push(( - offset, - CallFrameInstruction::Offset( - self.map_reg.map(reg)?, - stack_offset as i32 - self.sp_offset, - ), - )); - - Ok(()) - } - - fn adjust_sp_down_imm(&mut self, offset: u32, imm: i64) { - assert!(imm <= core::u32::MAX as i64); - - self.sp_offset += imm as i32; - - // Don't adjust the CFA if we're using a frame pointer - if self.frame_register.is_some() { - return; - } - - self.instructions - .push((offset, CallFrameInstruction::CfaOffset(self.sp_offset))); - } - - fn adjust_sp_up_imm(&mut self, offset: u32, imm: i64) { - assert!(imm <= core::u32::MAX as i64); - - self.sp_offset -= imm as i32; - - // Don't adjust the CFA if we're using a frame pointer - if self.frame_register.is_some() { - return; - } - - let cfa_inst_ofs = { - // Scan to find and merge with CFA instruction with the same offset. - let mut it = self.instructions.iter_mut(); - loop { - match it.next_back() { - Some((i_offset, i)) if *i_offset == offset => { - if let CallFrameInstruction::Cfa(_, o) = i { - break Some(o); - } - } - _ => { - break None; - } - } - } - }; - - if let Some(o) = cfa_inst_ofs { - // Update previous CFA instruction. - *o = self.sp_offset; - } else { - // Add just CFA offset instruction. - self.instructions - .push((offset, CallFrameInstruction::CfaOffset(self.sp_offset))); - } - } - - fn set_cfa_reg(&mut self, offset: u32, reg: Reg) -> Result<(), RegisterMappingError> { - self.instructions.push(( - offset, - CallFrameInstruction::CfaRegister(self.map_reg.map(reg)?), - )); - self.frame_register = Some(reg); - Ok(()) - } - - fn restore_cfa(&mut self, offset: u32) { - // Restore SP and its offset. - self.instructions.push(( - offset, - CallFrameInstruction::Cfa(self.map_reg.sp(), self.sp_offset), - )); - self.frame_register = None; - } - - fn restore_reg(&mut self, offset: u32, reg: Reg) -> Result<(), RegisterMappingError> { - // Pops in the epilogue are register restores, so record a "same value" for the register - self.instructions.push(( - offset, - CallFrameInstruction::SameValue(self.map_reg.map(reg)?), - )); - - Ok(()) - } - - fn remember_state(&mut self, offset: u32) { - self.saved_state = Some((self.sp_offset, self.frame_register)); - - self.instructions - .push((offset, CallFrameInstruction::RememberState)); - } - - fn restore_state(&mut self, offset: u32) { - let (sp_offset, frame_register) = self.saved_state.take().unwrap(); - self.sp_offset = sp_offset; - self.frame_register = frame_register; - - self.instructions - .push((offset, CallFrameInstruction::RestoreState)); - } - - fn set_aarch64_pauth(&mut self, offset: u32, return_addresses: bool) { - self.instructions.push(( - offset, - CallFrameInstruction::Aarch64SetPointerAuth { return_addresses }, - )); - } -} diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 1c232f6855..68cb38b758 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -1,6 +1,5 @@ //! Windows x64 ABI unwind information. -use crate::isa::unwind::input; use crate::result::{CodegenError, CodegenResult}; use alloc::vec::Vec; use log::warn; @@ -259,76 +258,6 @@ impl UnwindInfo { .iter() .fold(0, |nodes, c| nodes + c.node_count()) } - - // TODO: remove `build()` below when old backend is removed. The new backend uses - // a simpler approach in `create_unwind_info_from_insts()` below. - - pub(crate) fn build>( - unwind: input::UnwindInfo, - ) -> CodegenResult { - use crate::isa::unwind::input::UnwindCode as InputUnwindCode; - - let word_size: u32 = unwind.word_size.into(); - let mut unwind_codes = Vec::new(); - for (offset, c) in unwind.prologue_unwind_codes.iter() { - match c { - InputUnwindCode::SaveRegister { reg, stack_offset } => { - let reg = MR::map(*reg); - let offset = ensure_unwind_offset(*offset)?; - match reg { - MappedRegister::Int(reg) => { - // Attempt to convert sequence of the `InputUnwindCode`: - // `StackAlloc { size = word_size }`, `SaveRegister { stack_offset: 0 }` - // to the shorter `UnwindCode::PushRegister`. - let push_reg_sequence = if let Some(UnwindCode::StackAlloc { - instruction_offset: alloc_offset, - size, - }) = unwind_codes.last() - { - *size == word_size && offset == *alloc_offset && *stack_offset == 0 - } else { - false - }; - if push_reg_sequence { - *unwind_codes.last_mut().unwrap() = UnwindCode::PushRegister { - instruction_offset: offset, - reg, - }; - } else { - unwind_codes.push(UnwindCode::SaveReg { - instruction_offset: offset, - reg, - stack_offset: *stack_offset, - }); - } - } - MappedRegister::Xmm(reg) => { - unwind_codes.push(UnwindCode::SaveXmm { - instruction_offset: offset, - reg, - stack_offset: *stack_offset, - }); - } - } - } - InputUnwindCode::StackAlloc { size } => { - unwind_codes.push(UnwindCode::StackAlloc { - instruction_offset: ensure_unwind_offset(*offset)?, - size: *size, - }); - } - _ => {} - } - } - - Ok(Self { - flags: 0, // this assumes cranelift functions have no SEH handlers - prologue_size: ensure_unwind_offset(unwind.prologue_size)?, - frame_register: None, - frame_register_offset: 0, - unwind_codes, - }) - } } const UNWIND_RBP_REG: u8 = 5; diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index c02593eab2..1a81191141 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -8,8 +8,8 @@ //! //! cd to the top of your wasmtime tree, then: //! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \ -//! --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \ -//! --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \ +//! --features experimental_x64 --all --exclude peepmatic \ +//! --exclude peepmatic-automata --exclude peepmatic-fuzzing \ //! --exclude peepmatic-macro --exclude wasmtime-wasi-nn -- isa::x64::inst::emit_tests::test_x64_emit use super::*; diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 9115db0671..7119d5b260 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -109,7 +109,6 @@ mod tests { use target_lexicon::triple; #[test] - #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_simple_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") @@ -152,7 +151,6 @@ mod tests { } #[test] - #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_multi_return_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index da29a04314..1a635108d0 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2,8 +2,9 @@ use crate::data_value::DataValue; use crate::ir::{ - condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName, - Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type, + condcodes::{CondCode, FloatCC, IntCC}, + types, AbiParam, ArgumentPurpose, ExternalName, Inst as IRInst, InstructionData, LibCall, + Opcode, Signature, Type, }; use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; @@ -15,7 +16,6 @@ use crate::result::CodegenResult; use crate::settings::{Flags, TlsModel}; use alloc::boxed::Box; use alloc::vec::Vec; -use cranelift_codegen_shared::condcodes::CondCode; use log::trace; use regalloc::{Reg, RegClass, Writable}; use smallvec::{smallvec, SmallVec}; @@ -6865,18 +6865,12 @@ fn lower_insn_to_regs>( Opcode::Spill | Opcode::Fill | Opcode::FillNop - | Opcode::Regmove - | Opcode::CopySpecial - | Opcode::CopyToSsa | Opcode::CopyNop | Opcode::AdjustSpDown | Opcode::AdjustSpUpImm | Opcode::AdjustSpDownImm | Opcode::IfcmpSp - | Opcode::Regspill - | Opcode::Regfill - | Opcode::Copy - | Opcode::DummySargT => { + | Opcode::Copy => { panic!("Unused opcode should not be encountered."); } @@ -6900,44 +6894,6 @@ fn lower_insn_to_regs>( panic!("Branch opcode reached non-branch lowering logic!"); } - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Palignr - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } - Opcode::Nop => { // Nothing. } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 43c7fb74a6..47677cc885 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -15,7 +15,6 @@ use crate::machinst::{ use crate::result::CodegenResult; use crate::settings::{self as shared_settings, Flags}; use alloc::{boxed::Box, vec::Vec}; -use core::hash::{Hash, Hasher}; use regalloc::{PrettyPrint, RealRegUniverse, Reg}; use target_lexicon::Triple; @@ -95,11 +94,6 @@ impl MachBackend for X64Backend { self.x64_flags.iter().collect() } - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.flags.hash(&mut hasher); - self.x64_flags.hash(&mut hasher); - } - fn name(&self) -> &'static str { "x64" } @@ -118,12 +112,6 @@ impl MachBackend for X64Backend { IntCC::UnsignedLessThan } - fn unsigned_sub_overflow_condition(&self) -> IntCC { - // unsigned `<`; this corresponds to the carry flag set on x86, which - // indicates a sub has underflowed (carry is borrow for subtract). - IntCC::UnsignedLessThan - } - #[cfg(feature = "unwind")] fn emit_unwind_info( &self, diff --git a/cranelift/codegen/src/legalizer/boundary.rs b/cranelift/codegen/src/legalizer/boundary.rs deleted file mode 100644 index 3b51bc5b57..0000000000 --- a/cranelift/codegen/src/legalizer/boundary.rs +++ /dev/null @@ -1,1174 +0,0 @@ -//! Legalize ABI boundaries. -//! -//! This legalizer sub-module contains code for dealing with ABI boundaries: -//! -//! - Function arguments passed to the entry block. -//! - Function arguments passed to call instructions. -//! - Return values from call instructions. -//! - Return values passed to return instructions. -//! -//! The ABI boundary legalization happens in two phases: -//! -//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information -//! and possibly new argument types. It also rewrites the entry block arguments to match. -//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions -//! to match the new ABI signatures. -//! -//! Between the two phases, preamble signatures and call/return arguments don't match. This -//! intermediate state doesn't type check. - -use crate::abi::{legalize_abi_value, ValueConversion}; -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::instructions::CallInfo; -use crate::ir::{ - AbiParam, ArgumentLoc, ArgumentPurpose, Block, DataFlowGraph, ExtFuncData, ExternalName, - Function, Inst, InstBuilder, LibCall, MemFlags, SigRef, Signature, StackSlotData, - StackSlotKind, Type, Value, ValueLoc, -}; -use crate::isa::TargetIsa; -use crate::legalizer::split::{isplit, vsplit}; -use alloc::borrow::Cow; -use alloc::vec::Vec; -use core::mem; -use cranelift_entity::EntityList; - -/// Legalize all the function signatures in `func`. -/// -/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't -/// change the entry block arguments, calls, or return instructions, so this can leave the function -/// in a state with type discrepancies. -pub fn legalize_signatures(func: &mut Function, isa: &dyn TargetIsa) { - if let Some(new) = legalize_signature(&func.signature, true, isa) { - let old = mem::replace(&mut func.signature, new); - func.old_signature = Some(old); - } - - for (sig_ref, sig_data) in func.dfg.signatures.iter_mut() { - if let Some(new) = legalize_signature(sig_data, false, isa) { - let old = mem::replace(sig_data, new); - func.dfg.old_signatures[sig_ref] = Some(old); - } - } - - if let Some(entry) = func.layout.entry_block() { - legalize_entry_params(func, entry); - spill_entry_params(func, entry); - } -} - -/// Legalize the libcall signature, which we may generate on the fly after -/// `legalize_signatures` has been called. -pub fn legalize_libcall_signature(signature: &mut Signature, isa: &dyn TargetIsa) { - if let Some(s) = legalize_signature(signature, false, isa) { - *signature = s; - } -} - -/// Legalize the given signature. -/// -/// `current` is true if this is the signature for the current function. -fn legalize_signature( - signature: &Signature, - current: bool, - isa: &dyn TargetIsa, -) -> Option { - let mut cow = Cow::Borrowed(signature); - isa.legalize_signature(&mut cow, current); - match cow { - Cow::Borrowed(_) => None, - Cow::Owned(s) => Some(s), - } -} - -/// Legalize the entry block parameters after `func`'s signature has been legalized. -/// -/// The legalized signature may contain more parameters than the original signature, and the -/// parameter types have been changed. This function goes through the parameters of the entry block -/// and replaces them with parameters of the right type for the ABI. -/// -/// The original entry block parameters are computed from the new ABI parameters by code inserted at -/// the top of the entry block. -fn legalize_entry_params(func: &mut Function, entry: Block) { - let mut has_sret = false; - let mut has_link = false; - let mut has_vmctx = false; - let mut has_sigid = false; - let mut has_stack_limit = false; - - // Insert position for argument conversion code. - // We want to insert instructions before the first instruction in the entry block. - // If the entry block is empty, append instructions to it instead. - let mut pos = FuncCursor::new(func).at_first_inst(entry); - - // Keep track of the argument types in the ABI-legalized signature. - let mut abi_arg = 0; - - // Process the block parameters one at a time, possibly replacing one argument with multiple new - // ones. We do this by detaching the entry block parameters first. - let block_params = pos.func.dfg.detach_block_params(entry); - let mut old_arg = 0; - while let Some(arg) = block_params.get(old_arg, &pos.func.dfg.value_lists) { - old_arg += 1; - - let abi_type = pos.func.signature.params[abi_arg]; - let arg_type = pos.func.dfg.value_type(arg); - if let ArgumentPurpose::StructArgument(size) = abi_type.purpose { - let offset = if let ArgumentLoc::Stack(offset) = abi_type.location { - offset - } else { - unreachable!("StructArgument must already have a Stack ArgumentLoc assigned"); - }; - let ss = pos.func.stack_slots.make_incoming_arg(size, offset); - let struct_arg = pos.ins().stack_addr(arg_type, ss, 0); - pos.func.dfg.change_to_alias(arg, struct_arg); - let dummy = pos - .func - .dfg - .append_block_param(entry, crate::ir::types::SARG_T); - pos.func.locations[dummy] = ValueLoc::Stack(ss); - abi_arg += 1; - continue; - } - - if arg_type == abi_type.value_type { - // No value translation is necessary, this argument matches the ABI type. - // Just use the original block argument value. This is the most common case. - pos.func.dfg.attach_block_param(entry, arg); - match abi_type.purpose { - ArgumentPurpose::Normal => {} - ArgumentPurpose::StructArgument(_) => unreachable!("Handled above"), - ArgumentPurpose::FramePointer => {} - ArgumentPurpose::CalleeSaved => {} - ArgumentPurpose::StructReturn => { - debug_assert!(!has_sret, "Multiple sret arguments found"); - has_sret = true; - } - ArgumentPurpose::VMContext => { - debug_assert!(!has_vmctx, "Multiple vmctx arguments found"); - has_vmctx = true; - } - ArgumentPurpose::SignatureId => { - debug_assert!(!has_sigid, "Multiple sigid arguments found"); - has_sigid = true; - } - ArgumentPurpose::StackLimit => { - debug_assert!(!has_stack_limit, "Multiple stack_limit arguments found"); - has_stack_limit = true; - } - ArgumentPurpose::Link => panic!("Unexpected link arg {}", abi_type), - ArgumentPurpose::CallerTLS | ArgumentPurpose::CalleeTLS => {} - } - abi_arg += 1; - } else { - // Compute the value we want for `arg` from the legalized ABI parameters. - let mut get_arg = |func: &mut Function, ty| { - let abi_type = func.signature.params[abi_arg]; - debug_assert_eq!( - abi_type.purpose, - ArgumentPurpose::Normal, - "Can't legalize special-purpose argument" - ); - if ty == abi_type.value_type { - abi_arg += 1; - Ok(func.dfg.append_block_param(entry, ty)) - } else { - Err(abi_type) - } - }; - let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg); - // The old `arg` is no longer an attached block argument, but there are probably still - // uses of the value. - debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted); - } - } - - // The legalized signature may contain additional parameters representing special-purpose - // registers. - for &arg in &pos.func.signature.params[abi_arg..] { - match arg.purpose { - // Any normal parameters should have been processed above. - ArgumentPurpose::Normal | ArgumentPurpose::StructArgument(_) => { - panic!("Leftover arg: {}", arg); - } - // The callee-save parameters should not appear until after register allocation is - // done. - ArgumentPurpose::FramePointer | ArgumentPurpose::CalleeSaved => { - panic!("Premature callee-saved arg {}", arg); - } - // These can be meaningfully added by `legalize_signature()`. - ArgumentPurpose::Link => { - debug_assert!(!has_link, "Multiple link parameters found"); - has_link = true; - } - ArgumentPurpose::StructReturn => { - debug_assert!(!has_sret, "Multiple sret parameters found"); - has_sret = true; - } - ArgumentPurpose::VMContext => { - debug_assert!(!has_vmctx, "Multiple vmctx parameters found"); - has_vmctx = true; - } - ArgumentPurpose::SignatureId => { - debug_assert!(!has_sigid, "Multiple sigid parameters found"); - has_sigid = true; - } - ArgumentPurpose::StackLimit => { - debug_assert!(!has_stack_limit, "Multiple stack_limit parameters found"); - has_stack_limit = true; - } - ArgumentPurpose::CallerTLS | ArgumentPurpose::CalleeTLS => {} - } - - // Just create entry block values to match here. We will use them in `handle_return_abi()` - // below. - pos.func.dfg.append_block_param(entry, arg.value_type); - } -} - -/// Legalize the results returned from a call instruction to match the ABI signature. -/// -/// The cursor `pos` points to a call instruction with at least one return value. The cursor will -/// be left pointing after the instructions inserted to convert the return values. -/// -/// This function is very similar to the `legalize_entry_params` function above. -/// -/// Returns the possibly new instruction representing the call. -fn legalize_inst_results(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst -where - ResType: FnMut(&Function, usize) -> AbiParam, -{ - let call = pos - .current_inst() - .expect("Cursor must point to a call instruction"); - - // We theoretically allow for call instructions that return a number of fixed results before - // the call return values. In practice, it doesn't happen. - debug_assert_eq!( - pos.func.dfg[call] - .opcode() - .constraints() - .num_fixed_results(), - 0, - "Fixed results on calls not supported" - ); - - let results = pos.func.dfg.detach_results(call); - let mut next_res = 0; - let mut abi_res = 0; - - // Point immediately after the call. - pos.next_inst(); - - while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) { - next_res += 1; - - let res_type = pos.func.dfg.value_type(res); - if res_type == get_abi_type(pos.func, abi_res).value_type { - // No value translation is necessary, this result matches the ABI type. - pos.func.dfg.attach_result(call, res); - abi_res += 1; - } else { - let mut get_res = |func: &mut Function, ty| { - let abi_type = get_abi_type(func, abi_res); - if ty == abi_type.value_type { - let last_res = func.dfg.append_result(call, ty); - abi_res += 1; - Ok(last_res) - } else { - Err(abi_type) - } - }; - let v = convert_from_abi(pos, res_type, Some(res), &mut get_res); - debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v); - } - } - - call -} - -fn assert_is_valid_sret_legalization( - old_ret_list: &EntityList, - old_sig: &Signature, - new_sig: &Signature, - pos: &FuncCursor, -) { - debug_assert_eq!( - old_sig.returns.len(), - old_ret_list.len(&pos.func.dfg.value_lists) - ); - - // Assert that the only difference in special parameters is that there - // is an appended struct return pointer parameter. - let old_special_params: Vec<_> = old_sig - .params - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - let new_special_params: Vec<_> = new_sig - .params - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - debug_assert_eq!(old_special_params.len() + 1, new_special_params.len()); - debug_assert!(old_special_params - .iter() - .zip(&new_special_params) - .all(|(old, new)| old.purpose == new.purpose)); - debug_assert_eq!( - new_special_params.last().unwrap().purpose, - ArgumentPurpose::StructReturn - ); - - // If the special returns have changed at all, then the only change - // should be that the struct return pointer is returned back out of the - // function, so that callers don't have to load its stack address again. - let old_special_returns: Vec<_> = old_sig - .returns - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - let new_special_returns: Vec<_> = new_sig - .returns - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - debug_assert!(old_special_returns - .iter() - .zip(&new_special_returns) - .all(|(old, new)| old.purpose == new.purpose)); - debug_assert!( - old_special_returns.len() == new_special_returns.len() - || (old_special_returns.len() + 1 == new_special_returns.len() - && new_special_returns.last().unwrap().purpose == ArgumentPurpose::StructReturn) - ); -} - -fn legalize_sret_call(isa: &dyn TargetIsa, pos: &mut FuncCursor, sig_ref: SigRef, call: Inst) { - let old_ret_list = pos.func.dfg.detach_results(call); - let old_sig = pos.func.dfg.old_signatures[sig_ref] - .take() - .expect("must have an old signature when using an `sret` parameter"); - - // We make a bunch of assumptions about the shape of the old, multi-return - // signature and the new, sret-using signature in this legalization - // function. Assert that these assumptions hold true in debug mode. - if cfg!(debug_assertions) { - assert_is_valid_sret_legalization( - &old_ret_list, - &old_sig, - &pos.func.dfg.signatures[sig_ref], - &pos, - ); - } - - // Go through and remove all normal return values from the `call` - // instruction's returns list. These will be stored into the stack slot that - // the sret points to. At the same time, calculate the size of the sret - // stack slot. - let mut sret_slot_size = 0; - for (i, ret) in old_sig.returns.iter().enumerate() { - let v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap(); - let ty = pos.func.dfg.value_type(v); - if ret.purpose == ArgumentPurpose::Normal { - debug_assert_eq!(ret.location, ArgumentLoc::Unassigned); - let ty = legalized_type_for_sret(ty); - let size = ty.bytes(); - sret_slot_size = round_up_to_multiple_of_type_align(sret_slot_size, ty) + size; - } else { - let new_v = pos.func.dfg.append_result(call, ty); - pos.func.dfg.change_to_alias(v, new_v); - } - } - - let stack_slot = pos.func.stack_slots.push(StackSlotData { - kind: StackSlotKind::StructReturnSlot, - size: sret_slot_size, - offset: None, - }); - - // Append the sret pointer to the `call` instruction's arguments. - let ptr_type = Type::triple_pointer_type(isa.triple()); - let sret_arg = pos.ins().stack_addr(ptr_type, stack_slot, 0); - pos.func.dfg.append_inst_arg(call, sret_arg); - - // The sret pointer might be returned by the signature as well. If so, we - // need to add it to the `call` instruction's results list. - // - // Additionally, when the sret is explicitly returned in this calling - // convention, then use it when loading the sret returns back into ssa - // values to avoid keeping the original `sret_arg` live and potentially - // having to do spills and fills. - let sret = - if pos.func.dfg.signatures[sig_ref].uses_special_return(ArgumentPurpose::StructReturn) { - pos.func.dfg.append_result(call, ptr_type) - } else { - sret_arg - }; - - // Finally, load each of the call's return values out of the sret stack - // slot. - pos.goto_after_inst(call); - let mut offset = 0; - for i in 0..old_ret_list.len(&pos.func.dfg.value_lists) { - if old_sig.returns[i].purpose != ArgumentPurpose::Normal { - continue; - } - - let old_v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap(); - let ty = pos.func.dfg.value_type(old_v); - let mut legalized_ty = legalized_type_for_sret(ty); - - offset = round_up_to_multiple_of_type_align(offset, legalized_ty); - - let new_legalized_v = - pos.ins() - .load(legalized_ty, MemFlags::trusted(), sret, offset as i32); - - // "Illegalize" the loaded value from the legalized type back to its - // original `ty`. This is basically the opposite of - // `legalize_type_for_sret_store`. - let mut new_v = new_legalized_v; - if ty.is_bool() { - legalized_ty = legalized_ty.as_bool_pedantic(); - new_v = pos.ins().raw_bitcast(legalized_ty, new_v); - - if ty.bits() < legalized_ty.bits() { - legalized_ty = ty; - new_v = pos.ins().breduce(legalized_ty, new_v); - } - } - - pos.func.dfg.change_to_alias(old_v, new_v); - - offset += legalized_ty.bytes(); - } - - pos.func.dfg.old_signatures[sig_ref] = Some(old_sig); -} - -/// Compute original value of type `ty` from the legalized ABI arguments. -/// -/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an -/// ABI argument. It returns: -/// -/// - `Ok(arg)` if the requested type matches the next ABI argument. -/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`. -/// -/// If the `into_result` value is provided, the converted result will be written into that value. -fn convert_from_abi( - pos: &mut FuncCursor, - ty: Type, - into_result: Option, - get_arg: &mut GetArg, -) -> Value -where - GetArg: FnMut(&mut Function, Type) -> Result, -{ - // Terminate the recursion when we get the desired type. - let arg_type = match get_arg(pos.func, ty) { - Ok(v) => { - debug_assert_eq!(pos.func.dfg.value_type(v), ty); - debug_assert_eq!(into_result, None); - return v; - } - Err(t) => t, - }; - - // Reconstruct how `ty` was legalized into the `arg_type` argument. - let conversion = legalize_abi_value(ty, &arg_type); - - log::trace!("convert_from_abi({}): {:?}", ty, conversion); - - // The conversion describes value to ABI argument. We implement the reverse conversion here. - match conversion { - // Construct a `ty` by concatenating two ABI integers. - ValueConversion::IntSplit => { - let abi_ty = ty.half_width().expect("Invalid type for conversion"); - let lo = convert_from_abi(pos, abi_ty, None, get_arg); - let hi = convert_from_abi(pos, abi_ty, None, get_arg); - log::trace!( - "intsplit {}: {}, {}: {}", - lo, - pos.func.dfg.value_type(lo), - hi, - pos.func.dfg.value_type(hi) - ); - pos.ins().with_results([into_result]).iconcat(lo, hi) - } - // Construct a `ty` by concatenating two halves of a vector. - ValueConversion::VectorSplit => { - let abi_ty = ty.half_vector().expect("Invalid type for conversion"); - let lo = convert_from_abi(pos, abi_ty, None, get_arg); - let hi = convert_from_abi(pos, abi_ty, None, get_arg); - pos.ins().with_results([into_result]).vconcat(lo, hi) - } - // Construct a `ty` by bit-casting from an integer type. - ValueConversion::IntBits => { - debug_assert!(!ty.is_int()); - let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion"); - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - pos.ins().with_results([into_result]).bitcast(ty, arg) - } - // ABI argument is a sign-extended version of the value we want. - ValueConversion::Sext(abi_ty) => { - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - // TODO: Currently, we don't take advantage of the ABI argument being sign-extended. - // We could insert an `assert_sreduce` which would fold with a following `sextend` of - // this value. - pos.ins().with_results([into_result]).ireduce(ty, arg) - } - ValueConversion::Uext(abi_ty) => { - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - // TODO: Currently, we don't take advantage of the ABI argument being sign-extended. - // We could insert an `assert_ureduce` which would fold with a following `uextend` of - // this value. - pos.ins().with_results([into_result]).ireduce(ty, arg) - } - // ABI argument is a pointer to the value we want. - ValueConversion::Pointer(abi_ty) => { - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - pos.ins() - .with_results([into_result]) - .load(ty, MemFlags::new(), arg, 0) - } - } -} - -/// Convert `value` to match an ABI signature by inserting instructions at `pos`. -/// -/// This may require expanding the value to multiple ABI arguments. The conversion process is -/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented -/// to the closure, it will perform one of two actions: -/// -/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list -/// of arguments and return `Ok(())`. -/// 2. If the suggested argument doesn't have the right value type, don't change anything, but -/// return the `Err(AbiParam)` that is needed. -/// -fn convert_to_abi( - pos: &mut FuncCursor, - cfg: &ControlFlowGraph, - value: Value, - put_arg: &mut PutArg, -) where - PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>, -{ - // Start by invoking the closure to either terminate the recursion or get the argument type - // we're trying to match. - let arg_type = match put_arg(pos.func, value) { - Ok(_) => return, - Err(t) => t, - }; - - let ty = pos.func.dfg.value_type(value); - match legalize_abi_value(ty, &arg_type) { - ValueConversion::IntSplit => { - let curpos = pos.position(); - let srcloc = pos.srcloc(); - let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value); - convert_to_abi(pos, cfg, lo, put_arg); - convert_to_abi(pos, cfg, hi, put_arg); - } - ValueConversion::VectorSplit => { - let curpos = pos.position(); - let srcloc = pos.srcloc(); - let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value); - convert_to_abi(pos, cfg, lo, put_arg); - convert_to_abi(pos, cfg, hi, put_arg); - } - ValueConversion::IntBits => { - debug_assert!(!ty.is_int()); - let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion"); - let arg = pos.ins().bitcast(abi_ty, value); - convert_to_abi(pos, cfg, arg, put_arg); - } - ValueConversion::Sext(abi_ty) => { - let arg = pos.ins().sextend(abi_ty, value); - convert_to_abi(pos, cfg, arg, put_arg); - } - ValueConversion::Uext(abi_ty) => { - let arg = pos.ins().uextend(abi_ty, value); - convert_to_abi(pos, cfg, arg, put_arg); - } - ValueConversion::Pointer(abi_ty) => { - // Note: This conversion can only happen for call arguments, - // so we can allocate the value on stack safely. - let stack_slot = pos.func.create_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: ty.bytes(), - offset: None, - }); - let arg = pos.ins().stack_addr(abi_ty, stack_slot, 0); - pos.ins().store(MemFlags::new(), value, arg, 0); - convert_to_abi(pos, cfg, arg, put_arg); - } - } -} - -/// Check if a sequence of arguments match a desired sequence of argument types. -fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool { - args.len() == types.len() - && args.iter().zip(types.iter()).all(|(v, at)| { - if let ArgumentPurpose::StructArgument(_) = at.purpose { - true - } else { - dfg.value_type(*v) == at.value_type - } - }) -} - -/// Check if the arguments of the call `inst` match the signature. -/// -/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the -/// signature doesn't match. -fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> { - // Extract the signature and argument values. - let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) { - CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args), - CallInfo::Indirect(sig_ref, args) => (sig_ref, args), - CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]), - }; - let sig = &dfg.signatures[sig_ref]; - - if check_arg_types(dfg, args, &sig.params[..]) - && check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..]) - { - // All types check out. - Ok(()) - } else { - // Call types need fixing. - Err(sig_ref) - } -} - -/// Check if the arguments of the return `inst` match the signature. -fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool { - check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns) -} - -/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`. -/// -/// - `abi_args` is the number of arguments that the ABI signature requires. -/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI -/// argument number in `0..abi_args`. -/// -fn legalize_inst_arguments( - pos: &mut FuncCursor, - cfg: &ControlFlowGraph, - abi_args: usize, - mut get_abi_type: ArgType, -) where - ArgType: FnMut(&Function, usize) -> AbiParam, -{ - let inst = pos - .current_inst() - .expect("Cursor must point to a call instruction"); - - // Lift the value list out of the call instruction so we modify it. - let mut vlist = pos.func.dfg[inst] - .take_value_list() - .expect("Call must have a value list"); - - // The value list contains all arguments to the instruction, including the callee on an - // indirect call which isn't part of the call arguments that must match the ABI signature. - // Figure out how many fixed values are at the front of the list. We won't touch those. - let num_fixed_values = pos.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let have_args = vlist.len(&pos.func.dfg.value_lists) - num_fixed_values; - if abi_args < have_args { - // This happens with multiple return values after we've legalized the - // signature but haven't legalized the return instruction yet. This - // legalization is handled in `handle_return_abi`. - pos.func.dfg[inst].put_value_list(vlist); - return; - } - - // Grow the value list to the right size and shift all the existing arguments to the right. - // This lets us write the new argument values into the list without overwriting the old - // arguments. - // - // Before: - // - // <--> fixed_values - // <-----------> have_args - // [FFFFOOOOOOOOOOOOO] - // - // After grow_at(): - // - // <--> fixed_values - // <-----------> have_args - // <------------------> abi_args - // [FFFF-------OOOOOOOOOOOOO] - // ^ - // old_arg_offset - // - // After writing the new arguments: - // - // <--> fixed_values - // <------------------> abi_args - // [FFFFNNNNNNNNNNNNNNNNNNNN] - // - vlist.grow_at( - num_fixed_values, - abi_args - have_args, - &mut pos.func.dfg.value_lists, - ); - let old_arg_offset = num_fixed_values + abi_args - have_args; - - let mut abi_arg = 0; - for old_arg in 0..have_args { - let old_value = vlist - .get(old_arg_offset + old_arg, &pos.func.dfg.value_lists) - .unwrap(); - let mut put_arg = |func: &mut Function, arg| { - let abi_type = get_abi_type(func, abi_arg); - let struct_argument = if let ArgumentPurpose::StructArgument(_) = abi_type.purpose { - true - } else { - false - }; - if func.dfg.value_type(arg) == abi_type.value_type || struct_argument { - // This is the argument type we need. - vlist.as_mut_slice(&mut func.dfg.value_lists)[num_fixed_values + abi_arg] = arg; - abi_arg += 1; - Ok(()) - } else { - Err(abi_type) - } - }; - convert_to_abi(pos, cfg, old_value, &mut put_arg); - } - - // Put the modified value list back. - pos.func.dfg[inst].put_value_list(vlist); -} - -/// Ensure that the `ty` being returned is a type that can be loaded and stored -/// (potentially after another narrowing legalization) from memory, since it -/// will go into the `sret` space. -fn legalized_type_for_sret(ty: Type) -> Type { - if ty.is_bool() { - let bits = std::cmp::max(8, ty.bits()); - Type::int(bits).unwrap() - } else { - ty - } -} - -/// Insert any legalization code required to ensure that `val` can be stored -/// into the `sret` memory. Returns the (potentially new, potentially -/// unmodified) legalized value and its type. -fn legalize_type_for_sret_store(pos: &mut FuncCursor, val: Value, ty: Type) -> (Value, Type) { - if ty.is_bool() { - let bits = std::cmp::max(8, ty.bits()); - let ty = Type::int(bits).unwrap(); - let val = pos.ins().bint(ty, val); - (val, ty) - } else { - (val, ty) - } -} - -/// Insert ABI conversion code before and after the call instruction at `pos`. -/// -/// Instructions inserted before the call will compute the appropriate ABI values for the -/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to -/// match the new signature. -/// -/// Instructions will be inserted after the call to convert returned ABI values back to the -/// original return values. The call's result values will be adapted to match the new signature. -/// -/// Returns `true` if any instructions were inserted. -pub fn handle_call_abi( - isa: &dyn TargetIsa, - mut inst: Inst, - func: &mut Function, - cfg: &ControlFlowGraph, -) -> bool { - let pos = &mut FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by checking if the argument types already match the signature. - let sig_ref = match check_call_signature(&pos.func.dfg, inst) { - Ok(_) => return spill_call_arguments(pos, isa), - Err(s) => s, - }; - - let sig = &pos.func.dfg.signatures[sig_ref]; - let old_sig = &pos.func.dfg.old_signatures[sig_ref]; - - if sig.uses_struct_return_param() - && old_sig - .as_ref() - .map_or(false, |s| !s.uses_struct_return_param()) - { - legalize_sret_call(isa, pos, sig_ref, inst); - } else { - if !pos.func.dfg.signatures[sig_ref].returns.is_empty() { - inst = legalize_inst_results(pos, |func, abi_res| { - func.dfg.signatures[sig_ref].returns[abi_res] - }); - } - } - - // Go back and fix the call arguments to match the ABI signature. - pos.goto_inst(inst); - let abi_args = pos.func.dfg.signatures[sig_ref].params.len(); - legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { - func.dfg.signatures[sig_ref].params[abi_arg] - }); - - debug_assert!( - check_call_signature(&pos.func.dfg, inst).is_ok(), - "Signature still wrong: {}, {}{}", - pos.func.dfg.display_inst(inst, None), - sig_ref, - pos.func.dfg.signatures[sig_ref] - ); - - // Go back and insert spills for any stack arguments. - pos.goto_inst(inst); - spill_call_arguments(pos, isa); - - // Yes, we changed stuff. - true -} - -/// Insert ABI conversion code before and after the return instruction at `inst`. -/// -/// Return `true` if any instructions were inserted. -pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool { - // Check if the returned types already match the signature. - if check_return_signature(&func.dfg, inst, &func.signature) { - return false; - } - - // Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to - // the legalized signature. - let special_args = func - .signature - .returns - .iter() - .rev() - .take_while(|&rt| { - rt.purpose == ArgumentPurpose::Link - || rt.purpose == ArgumentPurpose::StructReturn - || rt.purpose == ArgumentPurpose::VMContext - }) - .count(); - let abi_args = func.signature.returns.len() - special_args; - - let pos = &mut FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { - let arg = func.signature.returns[abi_arg]; - debug_assert!( - !arg.legalized_to_pointer, - "Return value cannot be legalized to pointer" - ); - arg - }); - // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to - // the legalized signature. These values should simply be propagated from the entry block - // arguments. - if special_args > 0 { - log::trace!( - "Adding {} special-purpose arguments to {}", - special_args, - pos.func.dfg.display_inst(inst, None) - ); - let mut vlist = pos.func.dfg[inst].take_value_list().unwrap(); - let mut sret = None; - - for arg in &pos.func.signature.returns[abi_args..] { - match arg.purpose { - ArgumentPurpose::Link - | ArgumentPurpose::StructReturn - | ArgumentPurpose::VMContext => {} - ArgumentPurpose::Normal => panic!("unexpected return value {}", arg), - _ => panic!("Unsupported special purpose return value {}", arg), - } - // A `link`/`sret`/`vmctx` return value can only appear in a signature that has a - // unique matching argument. They are appended at the end, so search the signature from - // the end. - let idx = pos - .func - .signature - .params - .iter() - .rposition(|t| t.purpose == arg.purpose) - .expect("No matching special purpose argument."); - // Get the corresponding entry block value and add it to the return instruction's - // arguments. - let val = pos - .func - .dfg - .block_params(pos.func.layout.entry_block().unwrap())[idx]; - debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type); - vlist.push(val, &mut pos.func.dfg.value_lists); - - if let ArgumentPurpose::StructReturn = arg.purpose { - sret = Some(val); - } - } - - // Store all the regular returns into the retptr space and remove them - // from the `return` instruction's value list. - if let Some(sret) = sret { - let mut offset = 0; - let num_regular_rets = vlist.len(&pos.func.dfg.value_lists) - special_args; - for i in 0..num_regular_rets { - debug_assert_eq!( - pos.func.old_signature.as_ref().unwrap().returns[i].purpose, - ArgumentPurpose::Normal, - ); - - // The next return value to process is always at `0`, since the - // list is emptied as we iterate. - let v = vlist.get(0, &pos.func.dfg.value_lists).unwrap(); - let ty = pos.func.dfg.value_type(v); - let (v, ty) = legalize_type_for_sret_store(pos, v, ty); - - let size = ty.bytes(); - offset = round_up_to_multiple_of_type_align(offset, ty); - - pos.ins().store(MemFlags::trusted(), v, sret, offset as i32); - vlist.remove(0, &mut pos.func.dfg.value_lists); - - offset += size; - } - } - pos.func.dfg[inst].put_value_list(vlist); - } - - debug_assert_eq!( - pos.func.dfg.inst_variable_args(inst).len(), - abi_args + special_args - ); - debug_assert!( - check_return_signature(&pos.func.dfg, inst, &pos.func.signature), - "Signature still wrong: {} / signature {}", - pos.func.dfg.display_inst(inst, None), - pos.func.signature - ); - - // Yes, we changed stuff. - true -} - -fn round_up_to_multiple_of_type_align(bytes: u32, ty: Type) -> u32 { - // We don't have a dedicated alignment for types, so assume they are - // size-aligned. - let align = ty.bytes(); - round_up_to_multiple_of_pow2(bytes, align) -} - -/// Round `n` up to the next multiple of `to` that is greater than or equal to -/// `n`. -/// -/// `to` must be a power of two and greater than zero. -/// -/// This is useful for rounding an offset or pointer up to some type's required -/// alignment. -fn round_up_to_multiple_of_pow2(n: u32, to: u32) -> u32 { - debug_assert!(to > 0); - debug_assert!(to.is_power_of_two()); - - // The simple version of this function is - // - // (n + to - 1) / to * to - // - // Consider the numerator: `n + to - 1`. This is ensuring that if there is - // any remainder for `n / to`, then the result of the division is one - // greater than `n / to`, and that otherwise we get exactly the same result - // as `n / to` due to integer division rounding off the remainder. In other - // words, we only round up if `n` is not aligned to `to`. - // - // However, we know `to` is a power of two, and therefore `anything / to` is - // equivalent to `anything >> log2(to)` and `anything * to` is equivalent to - // `anything << log2(to)`. We can therefore rewrite our simplified function - // into the following: - // - // (n + to - 1) >> log2(to) << log2(to) - // - // But shifting a value right by some number of bits `b` and then shifting - // it left by that same number of bits `b` is equivalent to clearing the - // bottom `b` bits of the number. We can clear the bottom `b` bits of a - // number by bit-wise and'ing the number with the bit-wise not of `2^b - 1`. - // Plugging this into our function and simplifying, we get: - // - // (n + to - 1) >> log2(to) << log2(to) - // = (n + to - 1) & !(2^log2(to) - 1) - // = (n + to - 1) & !(to - 1) - // - // And now we have the final version of this function! - - (n + to - 1) & !(to - 1) -} - -/// Assign stack slots to incoming function parameters on the stack. -/// -/// Values that are passed into the function on the stack must be assigned to an `IncomingArg` -/// stack slot already during legalization. -fn spill_entry_params(func: &mut Function, entry: Block) { - for (abi, &arg) in func - .signature - .params - .iter() - .zip(func.dfg.block_params(entry)) - { - if let ArgumentPurpose::StructArgument(_) = abi.purpose { - // Location has already been assigned during legalization. - } else if let ArgumentLoc::Stack(offset) = abi.location { - let ss = func - .stack_slots - .make_incoming_arg(abi.value_type.bytes(), offset); - func.locations[arg] = ValueLoc::Stack(ss); - } - } -} - -/// Assign stack slots to outgoing function arguments on the stack. -/// -/// Values that are passed to a called function on the stack must be assigned to a matching -/// `OutgoingArg` stack slot. The assignment must happen immediately before the call. -/// -/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches -/// or calls between writing the stack slots and the call instruction. Writing the slots earlier -/// could help reduce register pressure before the call. -fn spill_call_arguments(pos: &mut FuncCursor, isa: &dyn TargetIsa) -> bool { - let inst = pos - .current_inst() - .expect("Cursor must point to a call instruction"); - let sig_ref = pos - .func - .dfg - .call_signature(inst) - .expect("Call instruction expected."); - - // Start by building a list of stack slots and arguments to be replaced. - // This requires borrowing `pos.func.dfg`, so we can't change anything. - let arglist = { - let locations = &pos.func.locations; - let stack_slots = &mut pos.func.stack_slots; - pos.func - .dfg - .inst_variable_args(inst) - .iter() - .zip(&pos.func.dfg.signatures[sig_ref].params) - .enumerate() - .filter_map(|(idx, (&arg, abi))| { - match abi.location { - ArgumentLoc::Stack(offset) => { - // Assign `arg` to a new stack slot, unless it's already in the correct - // slot. The legalization needs to be idempotent, so we should see a - // correct outgoing slot on the second pass. - let (ss, size) = match abi.purpose { - ArgumentPurpose::StructArgument(size) => { - (stack_slots.get_outgoing_arg(size, offset), Some(size)) - } - _ => ( - stack_slots.get_outgoing_arg(abi.value_type.bytes(), offset), - None, - ), - }; - if locations[arg] != ValueLoc::Stack(ss) { - Some((idx, arg, ss, size)) - } else { - None - } - } - _ => None, - } - }) - .collect::>() - }; - - if arglist.is_empty() { - return false; - } - - let mut libc_memcpy = None; - let mut import_memcpy = |func: &mut Function, pointer_type| { - if let Some(libc_memcpy) = libc_memcpy { - return libc_memcpy; - } - - let signature = { - let mut s = Signature::new(isa.default_call_conv()); - s.params.push(AbiParam::new(pointer_type)); - s.params.push(AbiParam::new(pointer_type)); - // The last argument of `memcpy` is a `size_t`. This is the same size as a pointer on - // all architectures we are interested in. - s.params.push(AbiParam::new(pointer_type)); - legalize_libcall_signature(&mut s, isa); - func.import_signature(s) - }; - - let func = func.import_function(ExtFuncData { - name: ExternalName::LibCall(LibCall::Memcpy), - signature, - colocated: false, - }); - libc_memcpy = Some(func); - func - }; - - // Insert the spill instructions and rewrite call arguments. - for (idx, arg, ss, size) in arglist { - let stack_val = if let Some(size) = size { - // Struct argument - let pointer_type = pos.func.dfg.value_type(arg); - let src = arg; - let dest = pos.ins().stack_addr(pointer_type, ss, 0); - let size = pos.ins().iconst(pointer_type, i64::from(size)); - - let libc_memcpy = import_memcpy(pos.func, pointer_type); - pos.ins().call(libc_memcpy, &[dest, src, size]); - pos.ins().dummy_sarg_t() - } else { - // Non struct argument - pos.ins().spill(arg) - }; - pos.func.locations[stack_val] = ValueLoc::Stack(ss); - pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val; - } - - // We changed stuff. - true -} - -#[cfg(test)] -mod tests { - use super::round_up_to_multiple_of_pow2; - - #[test] - fn round_up_to_multiple_of_pow2_works() { - for (n, to, expected) in vec![ - (0, 1, 0), - (1, 1, 1), - (2, 1, 2), - (0, 2, 0), - (1, 2, 2), - (2, 2, 2), - (3, 2, 4), - (0, 4, 0), - (1, 4, 4), - (2, 4, 4), - (3, 4, 4), - (4, 4, 4), - (5, 4, 8), - ] { - let actual = round_up_to_multiple_of_pow2(n, to); - assert_eq!( - actual, expected, - "round_up_to_multiple_of_pow2(n = {}, to = {}) = {} (expected {})", - n, to, actual, expected - ); - } - } -} diff --git a/cranelift/codegen/src/legalizer/call.rs b/cranelift/codegen/src/legalizer/call.rs deleted file mode 100644 index 4321dbb90b..0000000000 --- a/cranelift/codegen/src/legalizer/call.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Legalization of calls. -//! -//! This module exports the `expand_call` function which transforms a `call` -//! instruction into `func_addr` and `call_indirect` instructions. - -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{self, InstBuilder}; -use crate::isa::TargetIsa; - -/// Expand a `call` instruction. This lowers it to a `call_indirect`, which -/// is only done if the ABI doesn't support direct calls. -pub fn expand_call( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - // Unpack the instruction. - let (func_ref, old_args) = match func.dfg[inst] { - ir::InstructionData::Call { - opcode, - ref args, - func_ref, - } => { - debug_assert_eq!(opcode, ir::Opcode::Call); - (func_ref, args.clone()) - } - _ => panic!("Wanted call: {}", func.dfg.display_inst(inst, None)), - }; - - let ptr_ty = isa.pointer_type(); - - let sig = func.dfg.ext_funcs[func_ref].signature; - - let callee = { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.ins().func_addr(ptr_ty, func_ref) - }; - - let mut new_args = ir::ValueList::default(); - new_args.push(callee, &mut func.dfg.value_lists); - for i in 0..old_args.len(&func.dfg.value_lists) { - new_args.push( - old_args.as_slice(&func.dfg.value_lists)[i], - &mut func.dfg.value_lists, - ); - } - - func.dfg - .replace(inst) - .CallIndirect(ir::Opcode::CallIndirect, ptr_ty, sig, new_args); -} diff --git a/cranelift/codegen/src/legalizer/globalvalue.rs b/cranelift/codegen/src/legalizer/globalvalue.rs index 5c7a72b45c..28cf966172 100644 --- a/cranelift/codegen/src/legalizer/globalvalue.rs +++ b/cranelift/codegen/src/legalizer/globalvalue.rs @@ -24,7 +24,7 @@ pub fn expand_global_value( debug_assert_eq!(opcode, ir::Opcode::GlobalValue); global_value } - _ => panic!("Wanted global_value: {}", func.dfg.display_inst(inst, None)), + _ => panic!("Wanted global_value: {}", func.dfg.display_inst(inst)), }; match func.global_values[gv] { diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index 503bec40db..5239c67daf 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -27,7 +27,7 @@ pub fn expand_heap_addr( debug_assert_eq!(opcode, ir::Opcode::HeapAddr); (heap, arg, u64::from(imm)) } - _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst, None)), + _ => panic!("Wanted heap_addr: {}", func.dfg.display_inst(inst)), }; match func.heaps[heap].style { diff --git a/cranelift/codegen/src/legalizer/libcall.rs b/cranelift/codegen/src/legalizer/libcall.rs deleted file mode 100644 index 0973422a24..0000000000 --- a/cranelift/codegen/src/legalizer/libcall.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! Expanding instructions as runtime library calls. - -use crate::ir; -use crate::ir::{libcall::get_libcall_funcref, InstBuilder}; -use crate::isa::{CallConv, TargetIsa}; -use crate::legalizer::boundary::legalize_libcall_signature; -use alloc::vec::Vec; - -/// Try to expand `inst` as a library call, returning true is successful. -pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn TargetIsa) -> bool { - // Does the opcode/ctrl_type combo even have a well-known runtime library name. - let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst)) - { - Some(lc) => lc, - None => return false, - }; - - // Now we convert `inst` to a call. First save the arguments. - let mut args = Vec::new(); - args.extend_from_slice(func.dfg.inst_args(inst)); - - let call_conv = CallConv::for_libcall(isa.flags(), isa.default_call_conv()); - if call_conv.extends_baldrdash() { - let vmctx = func - .special_param(ir::ArgumentPurpose::VMContext) - .expect("Missing vmctx parameter for baldrdash libcall"); - args.push(vmctx); - } - - // The replace builder will preserve the instruction result values. - let funcref = get_libcall_funcref(libcall, call_conv, func, inst, isa); - func.dfg.replace(inst).call(funcref, &args); - - // Ask the ISA to legalize the signature. - let fn_data = &func.dfg.ext_funcs[funcref]; - let sig_data = &mut func.dfg.signatures[fn_data.signature]; - legalize_libcall_signature(sig_data, isa); - - true -} diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 3b794a1e25..b09d1c601c 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -13,195 +13,20 @@ //! The legalizer does not deal with register allocation constraints. These constraints are derived //! from the encoding recipes, and solved later by the register allocator. -#[cfg(any(feature = "x86", feature = "riscv"))] -use crate::bitset::BitSet; use crate::cursor::{Cursor, FuncCursor}; use crate::flowgraph::ControlFlowGraph; -use crate::ir::types::{I32, I64}; +use crate::ir::types::I32; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; -#[cfg(any(feature = "x86", feature = "riscv"))] -use crate::predicates; -#[cfg(any(feature = "x86", feature = "riscv"))] -use alloc::vec::Vec; - -use crate::timing; -use alloc::collections::BTreeSet; - -mod boundary; -mod call; mod globalvalue; mod heap; -mod libcall; -mod split; mod table; -#[cfg(any(feature = "x86", feature = "riscv"))] -use self::call::expand_call; use self::globalvalue::expand_global_value; use self::heap::expand_heap_addr; -pub(crate) use self::libcall::expand_as_libcall; use self::table::expand_table_addr; -enum LegalizeInstResult { - Done, - Legalized, - SplitLegalizePending, -} - -/// Legalize `inst` for `isa`. -fn legalize_inst( - inst: ir::Inst, - pos: &mut FuncCursor, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) -> LegalizeInstResult { - let opcode = pos.func.dfg[inst].opcode(); - - // Check for ABI boundaries that need to be converted to the legalized signature. - if opcode.is_call() { - if boundary::handle_call_abi(isa, inst, pos.func, cfg) { - return LegalizeInstResult::Legalized; - } - } else if opcode.is_return() { - if boundary::handle_return_abi(inst, pos.func, cfg) { - return LegalizeInstResult::Legalized; - } - } else if opcode.is_branch() { - split::simplify_branch_arguments(&mut pos.func.dfg, inst); - } else if opcode == ir::Opcode::Isplit { - pos.use_srcloc(inst); - - let arg = match pos.func.dfg[inst] { - ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg), - _ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)), - }; - - match pos.func.dfg.value_def(arg) { - ir::ValueDef::Result(inst, _num) => { - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Iconcat, - .. - } = pos.func.dfg[inst] - { - // `arg` was created by an `iconcat` instruction. - } else { - // `arg` was not created by an `iconcat` instruction. Don't try to resolve it, - // as otherwise `split::isplit` will re-insert the original `isplit`, causing - // an endless loop. - return LegalizeInstResult::SplitLegalizePending; - } - } - ir::ValueDef::Param(_block, _num) => {} - } - - let res = pos.func.dfg.inst_results(inst).to_vec(); - assert_eq!(res.len(), 2); - let (resl, resh) = (res[0], res[1]); // Prevent borrowck error - - // Remove old isplit - pos.func.dfg.clear_results(inst); - pos.remove_inst(); - - let curpos = pos.position(); - let srcloc = pos.srcloc(); - let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg); - - pos.func.dfg.change_to_alias(resl, xl); - pos.func.dfg.change_to_alias(resh, xh); - - return LegalizeInstResult::Legalized; - } - - match pos.func.update_encoding(inst, isa) { - Ok(()) => LegalizeInstResult::Done, - Err(action) => { - // We should transform the instruction into legal equivalents. - // If the current instruction was replaced, we need to double back and revisit - // the expanded sequence. This is both to assign encodings and possible to - // expand further. - // There's a risk of infinite looping here if the legalization patterns are - // unsound. Should we attempt to detect that? - if action(inst, pos.func, cfg, isa) { - return LegalizeInstResult::Legalized; - } - - // We don't have any pattern expansion for this instruction either. - // Try converting it to a library call as a last resort. - if expand_as_libcall(inst, pos.func, isa) { - LegalizeInstResult::Legalized - } else { - LegalizeInstResult::Done - } - } - } -} - -/// Legalize `func` for `isa`. -/// -/// - Transform any instructions that don't have a legal representation in `isa`. -/// - Fill out `func.encodings`. -/// -pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { - let _tt = timing::legalize(); - debug_assert!(cfg.is_valid()); - - boundary::legalize_signatures(func, isa); - - func.encodings.resize(func.dfg.num_insts()); - - let mut pos = FuncCursor::new(func); - let func_begin = pos.position(); - - // Split block params before trying to legalize instructions, so that the newly introduced - // isplit instructions get legalized. - while let Some(block) = pos.next_block() { - split::split_block_params(pos.func, cfg, block); - } - - pos.set_position(func_begin); - - // This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases. - let mut pending_splits = BTreeSet::new(); - - // Process blocks in layout order. Some legalization actions may split the current block or append - // new ones to the end. We need to make sure we visit those new blocks too. - while let Some(_block) = pos.next_block() { - // Keep track of the cursor position before the instruction being processed, so we can - // double back when replacing instructions. - let mut prev_pos = pos.position(); - - while let Some(inst) = pos.next_inst() { - match legalize_inst(inst, &mut pos, cfg, isa) { - // Remember this position in case we need to double back. - LegalizeInstResult::Done => prev_pos = pos.position(), - - // Go back and legalize the inserted return value conversion instructions. - LegalizeInstResult::Legalized => pos.set_position(prev_pos), - - // The argument of a `isplit` or `vsplit` instruction didn't resolve to a - // `iconcat` or `vconcat` instruction. Try again after legalizing the rest of - // the instructions. - LegalizeInstResult::SplitLegalizePending => { - pending_splits.insert(inst); - } - } - } - } - - // Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized. - for inst in pending_splits { - pos.goto_inst(inst); - legalize_inst(inst, &mut pos, cfg, isa); - } - - // Now that we've lowered all br_tables, we don't need the jump tables anymore. - if !isa.flags().enable_jump_tables() { - pos.func.jump_tables.clear(); - } -} - /// Perform a simple legalization by expansion of the function, without /// platform-specific transforms. pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { @@ -215,7 +40,7 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: } => (arg, imm), _ => panic!( concat!("Expected ", stringify!($from), ": {}"), - $pos.func.dfg.display_inst($inst, None) + $pos.func.dfg.display_inst($inst) ), }; let ty = $pos.func.dfg.value_type(arg); @@ -232,7 +57,7 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: } => (arg, imm), _ => panic!( concat!("Expected ", stringify!($from), ": {}"), - $pos.func.dfg.display_inst($inst, None) + $pos.func.dfg.display_inst($inst) ), }; let imm = $pos.ins().iconst($ty, imm); @@ -281,10 +106,7 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: arg, imm, } => (arg, imm), - _ => panic!( - "Expected irsub_imm: {}", - pos.func.dfg.display_inst(inst, None) - ), + _ => panic!("Expected irsub_imm: {}", pos.func.dfg.display_inst(inst)), }; let ty = pos.func.dfg.value_type(arg); let imm = pos.ins().iconst(ty, imm); @@ -306,10 +128,7 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: arg, imm, } => (cond, arg, imm), - _ => panic!( - "Expected ircmp_imm: {}", - pos.func.dfg.display_inst(inst, None) - ), + _ => panic!("Expected ircmp_imm: {}", pos.func.dfg.display_inst(inst)), }; let ty = pos.func.dfg.value_type(x); let y = pos.ins().iconst(ty, y); @@ -329,12 +148,6 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: } } -// Include legalization patterns that were generated by `gen_legalizer.rs` from the -// `TransformGroup` in `cranelift-codegen/meta/shared/legalize.rs`. -// -// Concretely, this defines private functions `narrow()`, and `expand()`. -include!(concat!(env!("OUT_DIR"), "/legalizer.rs")); - /// Custom expansion for conditional trap instructions. /// TODO: Add CFG support to the Rust DSL patterns so we won't have to do this. fn expand_cond_trap( @@ -351,11 +164,11 @@ fn expand_cond_trap( trapz = match opcode { ir::Opcode::Trapz => true, ir::Opcode::Trapnz | ir::Opcode::ResumableTrapnz => false, - _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)), + _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst)), }; (arg, code, opcode) } - _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst, None)), + _ => panic!("Expected cond trap: {}", func.dfg.display_inst(inst)), }; // Split the block after `inst`: @@ -412,189 +225,6 @@ fn expand_cond_trap( cfg.recompute_block(pos.func, new_block_trap); } -/// Jump tables. -fn expand_br_table( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - if isa.flags().enable_jump_tables() { - expand_br_table_jt(inst, func, cfg, isa); - } else { - expand_br_table_conds(inst, func, cfg, isa); - } -} - -/// Expand br_table to jump table. -fn expand_br_table_jt( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - use crate::ir::condcodes::IntCC; - - let (arg, default_block, table) = match func.dfg[inst] { - ir::InstructionData::BranchTable { - opcode: ir::Opcode::BrTable, - arg, - destination, - table, - } => (arg, destination, table), - _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)), - }; - - // Rewrite: - // - // br_table $idx, default_block, $jt - // - // To: - // - // $oob = ifcmp_imm $idx, len($jt) - // brif uge $oob, default_block - // jump fallthrough_block - // - // fallthrough_block: - // $base = jump_table_base.i64 $jt - // $rel_addr = jump_table_entry.i64 $idx, $base, 4, $jt - // $addr = iadd $base, $rel_addr - // indirect_jump_table_br $addr, $jt - - let block = func.layout.pp_block(inst); - let jump_table_block = func.dfg.make_block(); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Bounds check. - let table_size = pos.func.jump_tables[table].len() as i64; - let oob = pos - .ins() - .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size); - - pos.ins().brnz(oob, default_block, &[]); - pos.ins().jump(jump_table_block, &[]); - pos.insert_block(jump_table_block); - - let addr_ty = isa.pointer_type(); - - let arg = if pos.func.dfg.value_type(arg) == addr_ty { - arg - } else { - pos.ins().uextend(addr_ty, arg) - }; - - let base_addr = pos.ins().jump_table_base(addr_ty, table); - let entry = pos - .ins() - .jump_table_entry(arg, base_addr, I32.bytes() as u8, table); - - let addr = pos.ins().iadd(base_addr, entry); - pos.ins().indirect_jump_table_br(addr, table); - - pos.remove_inst(); - cfg.recompute_block(pos.func, block); - cfg.recompute_block(pos.func, jump_table_block); -} - -/// Expand br_table to series of conditionals. -fn expand_br_table_conds( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::condcodes::IntCC; - - let (arg, default_block, table) = match func.dfg[inst] { - ir::InstructionData::BranchTable { - opcode: ir::Opcode::BrTable, - arg, - destination, - table, - } => (arg, destination, table), - _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)), - }; - - let block = func.layout.pp_block(inst); - - // This is a poor man's jump table using just a sequence of conditional branches. - let table_size = func.jump_tables[table].len(); - let mut cond_failed_block = vec![]; - if table_size >= 1 { - cond_failed_block = alloc::vec::Vec::with_capacity(table_size - 1); - for _ in 0..table_size - 1 { - cond_failed_block.push(func.dfg.make_block()); - } - } - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Ignore the lint for this loop as the range needs to be 0 to table_size - #[allow(clippy::needless_range_loop)] - for i in 0..table_size { - let dest = pos.func.jump_tables[table].as_slice()[i]; - let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64); - pos.ins().brnz(t, dest, &[]); - // Jump to the next case. - if i < table_size - 1 { - let block = cond_failed_block[i]; - pos.ins().jump(block, &[]); - pos.insert_block(block); - } - } - - // `br_table` jumps to the default destination if nothing matches - pos.ins().jump(default_block, &[]); - - pos.remove_inst(); - cfg.recompute_block(pos.func, block); - for failed_block in cond_failed_block.into_iter() { - cfg.recompute_block(pos.func, failed_block); - } -} - -/// Expand the select instruction. -/// -/// Conditional moves are available in some ISAs for some register classes. The remaining selects -/// are handled by a branch. -fn expand_select( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let (ctrl, tval, fval) = match func.dfg[inst] { - ir::InstructionData::Ternary { - opcode: ir::Opcode::Select, - args, - } => (args[0], args[1], args[2]), - _ => panic!("Expected select: {}", func.dfg.display_inst(inst, None)), - }; - - // Replace `result = select ctrl, tval, fval` with: - // - // brnz ctrl, new_block(tval) - // jump new_block(fval) - // new_block(result): - let old_block = func.layout.pp_block(inst); - let result = func.dfg.first_result(inst); - func.dfg.clear_results(inst); - let new_block = func.dfg.make_block(); - func.dfg.attach_block_param(new_block, result); - - func.dfg.replace(inst).brnz(ctrl, new_block, &[tval]); - let mut pos = FuncCursor::new(func).after_inst(inst); - pos.use_srcloc(inst); - pos.ins().jump(new_block, &[fval]); - pos.insert_block(new_block); - - cfg.recompute_block(pos.func, new_block); - cfg.recompute_block(pos.func, old_block); -} - fn expand_br_icmp( inst: ir::Inst, func: &mut ir::Function, @@ -614,7 +244,7 @@ fn expand_br_icmp( destination, args.as_slice(&func.dfg.value_lists)[2..].to_vec(), ), - _ => panic!("Expected br_icmp {}", func.dfg.display_inst(inst, None)), + _ => panic!("Expected br_icmp {}", func.dfg.display_inst(inst)), }; let old_block = func.layout.pp_block(inst); @@ -629,34 +259,6 @@ fn expand_br_icmp( cfg.recompute_block(pos.func, old_block); } -/// Expand illegal `f32const` and `f64const` instructions. -fn expand_fconst( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let ty = func.dfg.value_type(func.dfg.first_result(inst)); - debug_assert!(!ty.is_vector(), "Only scalar fconst supported: {}", ty); - - // In the future, we may want to generate constant pool entries for these constants, but for - // now use an `iconst` and a bit cast. - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - let ival = match pos.func.dfg[inst] { - ir::InstructionData::UnaryIeee32 { - opcode: ir::Opcode::F32const, - imm, - } => pos.ins().iconst(ir::types::I32, i64::from(imm.bits())), - ir::InstructionData::UnaryIeee64 { - opcode: ir::Opcode::F64const, - imm, - } => pos.ins().iconst(ir::types::I64, imm.bits() as i64), - _ => panic!("Expected fconst: {}", pos.func.dfg.display_inst(inst, None)), - }; - pos.func.dfg.replace(inst).bitcast(ty, ival); -} - /// Expand illegal `stack_load` instructions. fn expand_stack_load( inst: ir::Inst, @@ -676,10 +278,7 @@ fn expand_stack_load( stack_slot, offset, } => (stack_slot, offset), - _ => panic!( - "Expected stack_load: {}", - pos.func.dfg.display_inst(inst, None) - ), + _ => panic!("Expected stack_load: {}", pos.func.dfg.display_inst(inst)), }; let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset); @@ -708,10 +307,7 @@ fn expand_stack_store( stack_slot, offset, } => (arg, stack_slot, offset), - _ => panic!( - "Expected stack_store: {}", - pos.func.dfg.display_inst(inst, None) - ), + _ => panic!("Expected stack_store: {}", pos.func.dfg.display_inst(inst)), }; let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset); @@ -722,171 +318,3 @@ fn expand_stack_store( mflags.set_aligned(); pos.func.dfg.replace(inst).store(mflags, val, addr, 0); } - -/// Split a load into two parts before `iconcat`ing the result together. -fn narrow_load( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let (ptr, offset, flags) = match pos.func.dfg[inst] { - ir::InstructionData::Load { - opcode: ir::Opcode::Load, - arg, - offset, - flags, - } => (arg, offset, flags), - _ => panic!("Expected load: {}", pos.func.dfg.display_inst(inst, None)), - }; - - let res_ty = pos.func.dfg.ctrl_typevar(inst); - let small_ty = res_ty.half_width().expect("Can't narrow load"); - - let al = pos.ins().load(small_ty, flags, ptr, offset); - let ah = pos.ins().load( - small_ty, - flags, - ptr, - offset.try_add_i64(8).expect("load offset overflow"), - ); - let (al, ah) = match flags.endianness(isa.endianness()) { - ir::Endianness::Little => (al, ah), - ir::Endianness::Big => (ah, al), - }; - pos.func.dfg.replace(inst).iconcat(al, ah); -} - -/// Split a store into two parts after `isplit`ing the value. -fn narrow_store( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let (val, ptr, offset, flags) = match pos.func.dfg[inst] { - ir::InstructionData::Store { - opcode: ir::Opcode::Store, - args, - offset, - flags, - } => (args[0], args[1], offset, flags), - _ => panic!("Expected store: {}", pos.func.dfg.display_inst(inst, None)), - }; - - let (al, ah) = pos.ins().isplit(val); - let (al, ah) = match flags.endianness(isa.endianness()) { - ir::Endianness::Little => (al, ah), - ir::Endianness::Big => (ah, al), - }; - pos.ins().store(flags, al, ptr, offset); - pos.ins().store( - flags, - ah, - ptr, - offset.try_add_i64(8).expect("store offset overflow"), - ); - pos.remove_inst(); -} - -/// Expands an illegal iconst value by splitting it into two. -fn narrow_iconst( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let imm: i64 = if let ir::InstructionData::UnaryImm { - opcode: ir::Opcode::Iconst, - imm, - } = &func.dfg[inst] - { - (*imm).into() - } else { - panic!("unexpected instruction in narrow_iconst"); - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let ty = pos.func.dfg.ctrl_typevar(inst); - if isa.pointer_bits() == 32 && ty == I64 { - let low = pos.ins().iconst(I32, imm & 0xffffffff); - let high = pos.ins().iconst(I32, imm >> 32); - // The instruction has as many results as iconcat, so no need to replace them. - pos.func.dfg.replace(inst).iconcat(low, high); - return; - } - - unimplemented!("missing encoding or legalization for iconst.{:?}", ty); -} - -fn narrow_icmp_imm( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::condcodes::{CondCode, IntCC}; - - let (arg, cond, imm): (ir::Value, IntCC, i64) = match func.dfg[inst] { - ir::InstructionData::IntCompareImm { - opcode: ir::Opcode::IcmpImm, - arg, - cond, - imm, - } => (arg, cond, imm.into()), - _ => panic!("unexpected instruction in narrow_icmp_imm"), - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - let ty = pos.func.dfg.ctrl_typevar(inst); - let ty_half = ty.half_width().unwrap(); - - let mask = ((1u128 << ty_half.bits()) - 1) as i64; - let imm_low = pos.ins().iconst(ty_half, imm & mask); - let imm_high = pos.ins().iconst( - ty_half, - imm.checked_shr(ty_half.bits().into()).unwrap_or(0) & mask, - ); - let (arg_low, arg_high) = pos.ins().isplit(arg); - - match cond { - IntCC::Equal => { - let res_low = pos.ins().icmp(cond, arg_low, imm_low); - let res_high = pos.ins().icmp(cond, arg_high, imm_high); - pos.func.dfg.replace(inst).band(res_low, res_high); - } - IntCC::NotEqual => { - let res_low = pos.ins().icmp(cond, arg_low, imm_low); - let res_high = pos.ins().icmp(cond, arg_high, imm_high); - pos.func.dfg.replace(inst).bor(res_low, res_high); - } - IntCC::SignedGreaterThan - | IntCC::SignedGreaterThanOrEqual - | IntCC::SignedLessThan - | IntCC::SignedLessThanOrEqual - | IntCC::UnsignedGreaterThan - | IntCC::UnsignedGreaterThanOrEqual - | IntCC::UnsignedLessThan - | IntCC::UnsignedLessThanOrEqual => { - let b1 = pos.ins().icmp(cond.without_equal(), arg_high, imm_high); - let b2 = pos - .ins() - .icmp(cond.inverse().without_equal(), arg_high, imm_high); - let b3 = pos.ins().icmp(cond.unsigned(), arg_low, imm_low); - let c1 = pos.ins().bnot(b2); - let c2 = pos.ins().band(c1, b3); - pos.func.dfg.replace(inst).bor(b1, c2); - } - _ => unimplemented!("missing legalization for condition {:?}", cond), - } -} diff --git a/cranelift/codegen/src/legalizer/split.rs b/cranelift/codegen/src/legalizer/split.rs deleted file mode 100644 index 7576926142..0000000000 --- a/cranelift/codegen/src/legalizer/split.rs +++ /dev/null @@ -1,405 +0,0 @@ -//! Value splitting. -//! -//! Some value types are too large to fit in registers, so they need to be split into smaller parts -//! that the ISA can operate on. There's two dimensions of splitting, represented by two -//! complementary instruction pairs: -//! -//! - `isplit` and `iconcat` for splitting integer types into smaller integers. -//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same -//! lane types. -//! -//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably -//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers. -//! This breakdown is handled by the ABI lowering. -//! -//! When legalizing a single instruction, it is wrapped in splits and concatenations: -//! -//! ```clif -//! v1 = bxor.i64 v2, v3 -//! ``` -//! -//! becomes: -//! -//! ```clif -//! v20, v21 = isplit v2 -//! v30, v31 = isplit v3 -//! v10 = bxor.i32 v20, v30 -//! v11 = bxor.i32 v21, v31 -//! v1 = iconcat v10, v11 -//! ``` -//! -//! This local expansion approach still leaves the original `i64` values in the code as operands on -//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as -//! values are constantly split and concatenated. -//! -//! # Optimized splitting -//! -//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value, -//! first check if the value is defined by the corresponding concatenation. If so, then just use -//! the two concatenation inputs directly: -//! -//! ```clif -//! v4 = iadd_imm.i64 v1, 1 -//! ``` -//! -//! becomes, using the expanded code from above: -//! -//! ```clif -//! v40, v5 = iadd_imm_cout.i32 v10, 1 -//! v6 = bint.i32 -//! v41 = iadd.i32 v11, v6 -//! v4 = iconcat v40, v41 -//! ``` -//! -//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they -//! can be trivially deleted by a dead code elimination pass. -//! -//! # block arguments -//! -//! If all instructions that produce an `i64` value are legalized as above, we will eventually end -//! up with no `i64` values anywhere, except for block arguments. We can work around this by -//! iteratively splitting block arguments too. That should leave us with no illegal value types -//! anywhere. -//! -//! It is possible to have circular dependencies of block arguments that are never used by any real -//! instructions. These loops will remain in the program. - -use crate::cursor::{Cursor, CursorPosition, FuncCursor}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::{self, Block, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef}; -use alloc::vec::Vec; -use core::iter; -use smallvec::SmallVec; - -/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values -/// if possible. -pub fn isplit( - func: &mut ir::Function, - cfg: &ControlFlowGraph, - pos: CursorPosition, - srcloc: ir::SourceLoc, - value: Value, -) -> (Value, Value) { - split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat) -} - -/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if -/// possible. -pub fn vsplit( - func: &mut ir::Function, - cfg: &ControlFlowGraph, - pos: CursorPosition, - srcloc: ir::SourceLoc, - value: Value, -) -> (Value, Value) { - split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat) -} - -/// After splitting a block argument, we need to go back and fix up all of the predecessor -/// instructions. This is potentially a recursive operation, but we don't implement it recursively -/// since that could use up too muck stack. -/// -/// Instead, the repairs are deferred and placed on a work list in stack form. -struct Repair { - concat: Opcode, - // The argument type after splitting. - split_type: Type, - // The destination block whose arguments have been split. - block: Block, - // Number of the original block argument which has been replaced by the low part. - num: usize, - // Number of the new block argument which represents the high part after the split. - hi_num: usize, -} - -/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode. -fn split_any( - func: &mut ir::Function, - cfg: &ControlFlowGraph, - pos: CursorPosition, - srcloc: ir::SourceLoc, - value: Value, - concat: Opcode, -) -> (Value, Value) { - let mut repairs = Vec::new(); - let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc); - let result = split_value(pos, value, concat, &mut repairs); - - perform_repairs(pos, cfg, repairs); - - result -} - -pub fn split_block_params(func: &mut ir::Function, cfg: &ControlFlowGraph, block: Block) { - let pos = &mut FuncCursor::new(func).at_top(block); - let block_params = pos.func.dfg.block_params(block); - - // Add further splittable types here. - fn type_requires_splitting(ty: Type) -> bool { - ty == ir::types::I128 - } - - // A shortcut. If none of the param types require splitting, exit now. This helps because - // the loop below necessarily has to copy the block params into a new vector, so it's better to - // avoid doing so when possible. - if !block_params - .iter() - .any(|block_param| type_requires_splitting(pos.func.dfg.value_type(*block_param))) - { - return; - } - - let mut repairs = Vec::new(); - for (num, block_param) in block_params.to_vec().into_iter().enumerate() { - if !type_requires_splitting(pos.func.dfg.value_type(block_param)) { - continue; - } - - split_block_param(pos, block, num, block_param, Opcode::Iconcat, &mut repairs); - } - - perform_repairs(pos, cfg, repairs); -} - -fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec) { - // We have split the value requested, and now we may need to fix some block predecessors. - while let Some(repair) = repairs.pop() { - for BlockPredecessor { inst, .. } in cfg.pred_iter(repair.block) { - let branch_opc = pos.func.dfg[inst].opcode(); - debug_assert!( - branch_opc.is_branch(), - "Predecessor not a branch: {}", - pos.func.dfg.display_inst(inst, None) - ); - let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments(); - let mut args = pos.func.dfg[inst] - .take_value_list() - .expect("Branches must have value lists."); - let num_args = args.len(&pos.func.dfg.value_lists); - // Get the old value passed to the block argument we're repairing. - let old_arg = args - .get(num_fixed_args + repair.num, &pos.func.dfg.value_lists) - .expect("Too few branch arguments"); - - // It's possible that the CFG's predecessor list has duplicates. Detect them here. - if pos.func.dfg.value_type(old_arg) == repair.split_type { - pos.func.dfg[inst].put_value_list(args); - continue; - } - - // Split the old argument, possibly causing more repairs to be scheduled. - pos.goto_inst(inst); - - let inst_block = pos.func.layout.inst_block(inst).expect("inst in block"); - - // Insert split values prior to the terminal branch group. - let canonical = pos - .func - .layout - .canonical_branch_inst(&pos.func.dfg, inst_block); - if let Some(first_branch) = canonical { - pos.goto_inst(first_branch); - } - - let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs); - - // The `lo` part replaces the original argument. - *args - .get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists) - .unwrap() = lo; - - // The `hi` part goes at the end. Since multiple repairs may have been scheduled to the - // same block, there could be multiple arguments missing. - if num_args > num_fixed_args + repair.hi_num { - *args - .get_mut( - num_fixed_args + repair.hi_num, - &mut pos.func.dfg.value_lists, - ) - .unwrap() = hi; - } else { - // We need to append one or more arguments. If we're adding more than one argument, - // there must be pending repairs on the stack that will fill in the correct values - // instead of `hi`. - args.extend( - iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args), - &mut pos.func.dfg.value_lists, - ); - } - - // Put the value list back after manipulating it. - pos.func.dfg[inst].put_value_list(args); - } - } -} - -/// Split a single value using the integer or vector semantics given by the `concat` opcode. -/// -/// If the value is defined by a `concat` instruction, just reuse the operand values of that -/// instruction. -/// -/// Return the two new values representing the parts of `value`. -fn split_value( - pos: &mut FuncCursor, - value: Value, - concat: Opcode, - repairs: &mut Vec, -) -> (Value, Value) { - let value = pos.func.dfg.resolve_aliases(value); - let mut reuse = None; - - match pos.func.dfg.value_def(value) { - ValueDef::Result(inst, num) => { - // This is an instruction result. See if the value was created by a `concat` - // instruction. - if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] { - debug_assert_eq!(num, 0); - if opcode == concat { - reuse = Some((args[0], args[1])); - } - } - } - ValueDef::Param(block, num) => { - // This is a block parameter. - // We can split the parameter value unless this is the entry block. - if pos.func.layout.entry_block() != Some(block) { - reuse = Some(split_block_param(pos, block, num, value, concat, repairs)); - } - } - } - - // Did the code above succeed in finding values we can reuse? - if let Some(pair) = reuse { - pair - } else { - // No, we'll just have to insert the requested split instruction at `pos`. Note that `pos` - // has not been moved by the block argument code above when `reuse` is `None`. - match concat { - Opcode::Iconcat => pos.ins().isplit(value), - Opcode::Vconcat => pos.ins().vsplit(value), - _ => panic!("Unhandled concat opcode: {}", concat), - } - } -} - -fn split_block_param( - pos: &mut FuncCursor, - block: Block, - param_num: usize, - value: Value, - concat: Opcode, - repairs: &mut Vec, -) -> (Value, Value) { - // We are going to replace the parameter at `num` with two new arguments. - // Determine the new value types. - let ty = pos.func.dfg.value_type(value); - let split_type = match concat { - Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"), - Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"), - _ => panic!("Unhandled concat opcode: {}", concat), - }; - - // Since the `repairs` stack potentially contains other parameter numbers for - // `block`, avoid shifting and renumbering block parameters. It could invalidate other - // `repairs` entries. - // - // Replace the original `value` with the low part, and append the high part at the - // end of the argument list. - let lo = pos.func.dfg.replace_block_param(value, split_type); - let hi_num = pos.func.dfg.num_block_params(block); - let hi = pos.func.dfg.append_block_param(block, split_type); - - // Now the original value is dangling. Insert a concatenation instruction that can - // compute it from the two new parameters. This also serves as a record of what we - // did so a future call to this function doesn't have to redo the work. - // - // Note that it is safe to move `pos` here since `reuse` was set above, so we don't - // need to insert a split instruction before returning. - pos.goto_first_inst(block); - pos.ins() - .with_result(value) - .Binary(concat, split_type, lo, hi); - - // Finally, splitting the block parameter is not enough. We also have to repair all - // of the predecessor instructions that branch here. - add_repair(concat, split_type, block, param_num, hi_num, repairs); - - (lo, hi) -} - -// Add a repair entry to the work list. -fn add_repair( - concat: Opcode, - split_type: Type, - block: Block, - num: usize, - hi_num: usize, - repairs: &mut Vec, -) { - repairs.push(Repair { - concat, - split_type, - block, - num, - hi_num, - }); -} - -/// Strip concat-split chains. Return a simpler way of computing the same value. -/// -/// Given this input: -/// -/// ```clif -/// v10 = iconcat v1, v2 -/// v11, v12 = isplit v10 -/// ``` -/// -/// This function resolves `v11` to `v1` and `v12` to `v2`. -fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value { - let value = dfg.resolve_aliases(value); - - // Deconstruct a split instruction. - let split_res; - let concat_opc; - let split_arg; - if let ValueDef::Result(inst, num) = dfg.value_def(value) { - split_res = num; - concat_opc = match dfg[inst].opcode() { - Opcode::Isplit => Opcode::Iconcat, - Opcode::Vsplit => Opcode::Vconcat, - _ => return value, - }; - split_arg = dfg.inst_args(inst)[0]; - } else { - return value; - } - - // See if split_arg is defined by a concatenation instruction. - if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) { - if dfg[inst].opcode() == concat_opc { - return dfg.inst_args(inst)[split_res]; - } - } - - value -} - -/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been -/// legalized. -/// -/// The branch argument repairs performed by `split_any()` above may be performed on branches that -/// have not yet been legalized. The repaired arguments can be defined by actual split -/// instructions in that case. -/// -/// After legalizing the instructions computing the value that was split, it is likely that we can -/// avoid depending on the split instruction. Its input probably comes from a concatenation. -pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) { - let mut new_args = SmallVec::<[Value; 32]>::new(); - - for &arg in dfg.inst_args(branch) { - let new_arg = resolve_splits(dfg, arg); - new_args.push(new_arg); - } - - dfg.inst_args_mut(branch).copy_from_slice(&new_args); -} diff --git a/cranelift/codegen/src/legalizer/table.rs b/cranelift/codegen/src/legalizer/table.rs index 0c4385e96b..14e653e944 100644 --- a/cranelift/codegen/src/legalizer/table.rs +++ b/cranelift/codegen/src/legalizer/table.rs @@ -28,7 +28,7 @@ pub fn expand_table_addr( debug_assert_eq!(opcode, ir::Opcode::TableAddr); (table, arg, offset) } - _ => panic!("Wanted table_addr: {}", func.dfg.display_inst(inst, None)), + _ => panic!("Wanted table_addr: {}", func.dfg.display_inst(inst)), }; dynamic_addr(inst, table, index, element_offset, func); diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 25f1e6902d..eca11115f3 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -59,7 +59,6 @@ use hashbrown::{hash_map, HashMap, HashSet}; use std::collections::{hash_map, HashMap, HashSet}; pub use crate::context::Context; -pub use crate::legalizer::legalize_function; pub use crate::value_label::{ValueLabelsRanges, ValueLocRange}; pub use crate::verifier::verify_function; pub use crate::write::write_function; @@ -87,7 +86,6 @@ pub use crate::entity::packed_option; pub use crate::machinst::buffer::MachSrcLoc; pub use crate::machinst::TextSectionBuilder; -mod abi; mod bitset; mod constant_hash; mod context; @@ -101,18 +99,11 @@ mod licm; mod log; mod machinst; mod nan_canonicalization; -mod partition_slice; -mod postopt; -mod predicates; -mod redundant_reload_remover; -mod regalloc; mod remove_constant_phis; mod result; mod scoped_hash_map; mod simple_gvn; mod simple_preopt; -mod stack_layout; -mod topo_order; mod unreachable_code; mod value_label; diff --git a/cranelift/codegen/src/licm.rs b/cranelift/codegen/src/licm.rs index 5e9e0c1262..3375a49a37 100644 --- a/cranelift/codegen/src/licm.rs +++ b/cranelift/codegen/src/licm.rs @@ -1,6 +1,6 @@ //! A Loop Invariant Code Motion optimization pass -use crate::cursor::{Cursor, EncCursor, FuncCursor}; +use crate::cursor::{Cursor, FuncCursor}; use crate::dominator_tree::DominatorTree; use crate::entity::{EntityList, ListPool}; use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; @@ -8,7 +8,6 @@ use crate::fx::FxHashSet; use crate::ir::{ Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value, }; -use crate::isa::TargetIsa; use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::timing; use alloc::vec::Vec; @@ -17,7 +16,6 @@ use alloc::vec::Vec; /// loop-invariant instructions out of them. /// Changes the CFG and domtree in-place during the operation. pub fn do_licm( - isa: &dyn TargetIsa, func: &mut Function, cfg: &mut ControlFlowGraph, domtree: &mut DominatorTree, @@ -40,7 +38,7 @@ pub fn do_licm( match has_pre_header(&func.layout, cfg, domtree, loop_analysis.loop_header(lp)) { None => { let pre_header = - create_pre_header(isa, loop_analysis.loop_header(lp), func, cfg, domtree); + create_pre_header(loop_analysis.loop_header(lp), func, cfg, domtree); pos = FuncCursor::new(func).at_last_inst(pre_header); } // If there is a natural pre-header we insert new instructions just before the @@ -64,7 +62,6 @@ pub fn do_licm( /// Insert a pre-header before the header, modifying the function layout and CFG to reflect it. /// A jump instruction to the header is placed at the end of the pre-header. fn create_pre_header( - isa: &dyn TargetIsa, header: Block, func: &mut Function, cfg: &mut ControlFlowGraph, @@ -93,7 +90,7 @@ fn create_pre_header( } // Inserts the pre-header at the right place in the layout. - let mut pos = EncCursor::new(func, isa).at_top(header); + let mut pos = FuncCursor::new(func).at_top(header); pos.insert_block(pre_header); pos.next_inst(); pos.ins().jump(header, pre_header_args_value.as_slice(pool)); diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs index c0bc76417f..ee797c466a 100644 --- a/cranelift/codegen/src/machinst/adapter.rs +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -1,22 +1,13 @@ //! Adapter for a `MachBackend` to implement the `TargetIsa` trait. -use crate::binemit; use crate::ir; -use crate::isa::{ - BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, -}; +use crate::isa::TargetIsa; use crate::machinst::*; -use crate::regalloc::RegisterSet; use crate::settings::{self, Flags}; -#[cfg(feature = "testing_hooks")] -use crate::regalloc::RegDiversions; - #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; -use core::any::Any; -use std::borrow::Cow; use std::fmt; use target_lexicon::Triple; @@ -64,76 +55,6 @@ impl TargetIsa for TargetIsaAdapter { self.backend.isa_flags() } - fn variant(&self) -> BackendVariant { - BackendVariant::MachInst - } - - fn hash_all_flags(&self, hasher: &mut dyn Hasher) { - self.backend.hash_all_flags(hasher); - } - - fn register_info(&self) -> RegInfo { - // Called from function's Display impl, so we need a stub here. - RegInfo { - banks: &[], - classes: &[], - } - } - - fn legal_encodings<'a>( - &'a self, - _func: &'a ir::Function, - _inst: &'a ir::InstructionData, - _ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - panic!("Should not be called when new-style backend is available!") - } - - fn encode( - &self, - _func: &ir::Function, - _inst: &ir::InstructionData, - _ctrl_typevar: ir::Type, - ) -> Result { - panic!("Should not be called when new-style backend is available!") - } - - fn encoding_info(&self) -> EncInfo { - panic!("Should not be called when new-style backend is available!") - } - - fn legalize_signature(&self, _sig: &mut Cow, _current: bool) { - panic!("Should not be called when new-style backend is available!") - } - - fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass { - panic!("Should not be called when new-style backend is available!") - } - - fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet { - panic!("Should not be called when new-style backend is available!") - } - - fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> { - panic!("Should not be called when new-style backend is available!") - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - _func: &ir::Function, - _inst: ir::Inst, - _divert: &mut RegDiversions, - _sink: &mut dyn binemit::CodeSink, - ) { - panic!("Should not be called when new-style backend is available!") - } - - /// Emit a whole function into memory. - fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) { - panic!("Should not be called when new-style backend is available!") - } - fn get_mach_backend(&self) -> Option<&dyn MachBackend> { Some(&*self.backend) } @@ -142,10 +63,6 @@ impl TargetIsa for TargetIsaAdapter { self.backend.unsigned_add_overflow_condition() } - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { - self.backend.unsigned_sub_overflow_condition() - } - #[cfg(feature = "unwind")] fn create_systemv_cie(&self) -> Option { self.backend.create_systemv_cie() @@ -155,8 +72,4 @@ impl TargetIsa for TargetIsaAdapter { fn map_regalloc_reg_to_dwarf(&self, r: Reg) -> Result { self.backend.map_reg_to_dwarf(r) } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } } diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index c16528474b..0702ee37b5 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -1641,10 +1641,8 @@ impl TextSectionBuilder for MachTextSectionBuilder { #[cfg(all(test, feature = "arm64"))] mod test { use super::*; - use crate::ir::{ConstantOffset, Function, JumpTable, Value}; use crate::isa::aarch64::inst::xreg; use crate::isa::aarch64::inst::{BranchTarget, CondBrKind, EmitInfo, Inst}; - use crate::isa::TargetIsa; use crate::machinst::MachInstEmit; use crate::settings; use std::default::Default; @@ -2068,15 +2066,12 @@ mod test { fn reloc_external(&mut self, _: SourceLoc, r: Reloc, _: &ExternalName, _: Addend) { self.relocs.push((self.offset, r)); } - fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset) {} - fn reloc_jt(&mut self, _: Reloc, _: JumpTable) {} fn trap(&mut self, t: TrapCode, _: SourceLoc) { self.traps.push((self.offset, t)); } fn begin_jumptables(&mut self) {} fn begin_rodata(&mut self) {} fn end_codegen(&mut self) {} - fn add_stack_map(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa) {} fn add_call_site(&mut self, op: Opcode, _: SourceLoc) { self.callsites.push((self.offset, op)); } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 554e31ad4e..b0122dfe9d 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -69,7 +69,6 @@ use crate::value_label::ValueLabelsRanges; use alloc::boxed::Box; use alloc::vec::Vec; use core::fmt::Debug; -use core::hash::Hasher; use cranelift_entity::PrimaryMap; use regalloc::RegUsageCollector; use regalloc::{ @@ -389,9 +388,6 @@ pub trait MachBackend { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; - /// Hashes all flags, both ISA-independent and ISA-dependent, into the specified hasher. - fn hash_all_flags(&self, hasher: &mut dyn Hasher); - /// Return triple for this backend. fn triple(&self) -> Triple; @@ -405,10 +401,6 @@ pub trait MachBackend { /// Condition that will be true when an IaddIfcout overflows. fn unsigned_add_overflow_condition(&self) -> IntCC; - /// Machine-specific condcode info needed by TargetIsa. - /// Condition that will be true when an IsubIfcout overflows. - fn unsigned_sub_overflow_condition(&self) -> IntCC; - /// Produces unwind info based on backend results. #[cfg(feature = "unwind")] fn emit_unwind_info( diff --git a/cranelift/codegen/src/partition_slice.rs b/cranelift/codegen/src/partition_slice.rs deleted file mode 100644 index 959f8c1102..0000000000 --- a/cranelift/codegen/src/partition_slice.rs +++ /dev/null @@ -1,97 +0,0 @@ -//! Rearrange the elements in a slice according to a predicate. - -use core::mem; - -/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede -/// the elements where `p(t)` is false. -/// -/// The order of elements is not preserved, unless the slice is already partitioned. -/// -/// Returns the number of elements where `p(t)` is true. -pub fn partition_slice(s: &mut [T], mut p: F) -> usize -where - F: FnMut(&T) -> bool, -{ - // The iterator works like a deque which we can pop from both ends. - let mut i = s.iter_mut(); - - // Number of elements for which the predicate is known to be true. - let mut pos = 0; - - loop { - // Find the first element for which the predicate fails. - let head = loop { - match i.next() { - Some(head) => { - if !p(&head) { - break head; - } - } - None => return pos, - } - pos += 1; - }; - - // Find the last element for which the predicate succeeds. - let tail = loop { - match i.next_back() { - Some(tail) => { - if p(&tail) { - break tail; - } - } - None => return pos, - } - }; - - // Swap the two elements into the right order. - mem::swap(head, tail); - pos += 1; - } -} - -#[cfg(test)] -mod tests { - use super::partition_slice; - use alloc::vec::Vec; - - fn check(x: &[u32], want: &[u32]) { - assert_eq!(x.len(), want.len()); - let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count(); - let mut v = Vec::new(); - v.extend(x.iter().cloned()); - let count = partition_slice(&mut v[..], |&x| x % 10 == 0); - assert_eq!(v, want); - assert_eq!(count, want_count); - } - - #[test] - fn empty() { - check(&[], &[]); - } - - #[test] - fn singles() { - check(&[0], &[0]); - check(&[1], &[1]); - check(&[10], &[10]); - } - - #[test] - fn doubles() { - check(&[0, 0], &[0, 0]); - check(&[0, 5], &[0, 5]); - check(&[5, 0], &[0, 5]); - check(&[5, 4], &[5, 4]); - } - - #[test] - fn longer() { - check(&[1, 2, 3], &[1, 2, 3]); - check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required. - check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required. - check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required. - check(&[1, 20, 3, 10], &[10, 20, 3, 1]); - check(&[20, 3, 10, 1], &[20, 10, 3, 1]); - } -} diff --git a/cranelift/codegen/src/peepmatic.rs b/cranelift/codegen/src/peepmatic.rs index d676dbad93..a7e37c9ba0 100644 --- a/cranelift/codegen/src/peepmatic.rs +++ b/cranelift/codegen/src/peepmatic.rs @@ -2,6 +2,7 @@ use crate::cursor::{Cursor, FuncCursor}; use crate::ir::{ + condcodes::IntCC, dfg::DataFlowGraph, entities::{Inst, Value}, immediates::{Imm64, Uimm64}, @@ -9,7 +10,6 @@ use crate::ir::{ types, InstBuilder, }; use crate::isa::TargetIsa; -use cranelift_codegen_shared::condcodes::IntCC; use peepmatic_runtime::{ cc::ConditionCode, instruction_set::InstructionSet, diff --git a/cranelift/codegen/src/postopt.rs b/cranelift/codegen/src/postopt.rs deleted file mode 100644 index ada14e1ff8..0000000000 --- a/cranelift/codegen/src/postopt.rs +++ /dev/null @@ -1,427 +0,0 @@ -//! A post-legalization rewriting pass. - -#![allow(non_snake_case)] - -use crate::cursor::{Cursor, EncCursor}; -use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; -use crate::ir::dfg::ValueDef; -use crate::ir::immediates::{Imm64, Offset32}; -use crate::ir::instructions::{Opcode, ValueList}; -use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value}; -use crate::isa::TargetIsa; -use crate::timing; - -/// Information collected about a compare+branch sequence. -struct CmpBrInfo { - /// The branch instruction. - br_inst: Inst, - /// The icmp, icmp_imm, or fcmp instruction. - cmp_inst: Inst, - /// The destination of the branch. - destination: Block, - /// The arguments of the branch. - args: ValueList, - /// The first argument to the comparison. The second is in the `kind` field. - cmp_arg: Value, - /// If the branch is `brz` rather than `brnz`, we need to invert the condition - /// before the branch. - invert_branch_cond: bool, - /// The kind of comparison, and the second argument. - kind: CmpBrKind, -} - -enum CmpBrKind { - Icmp { cond: IntCC, arg: Value }, - IcmpImm { cond: IntCC, imm: Imm64 }, - Fcmp { cond: FloatCC, arg: Value }, -} - -/// Optimize comparisons to use flags values, to avoid materializing conditions -/// in integer registers. -/// -/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff -/// sequences. -fn optimize_cpu_flags( - pos: &mut EncCursor, - inst: Inst, - last_flags_clobber: Option, - isa: &dyn TargetIsa, -) { - // Look for compare and branch patterns. - // This code could be considerably simplified with non-lexical lifetimes. - let info = match pos.func.dfg[inst] { - InstructionData::Branch { - opcode, - destination, - ref args, - } => { - let first_arg = args.first(&pos.func.dfg.value_lists).unwrap(); - let invert_branch_cond = match opcode { - Opcode::Brz => true, - Opcode::Brnz => false, - _ => panic!(), - }; - if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) { - match pos.func.dfg[cond_inst] { - InstructionData::IntCompare { - cond, - args: cmp_args, - .. - } => CmpBrInfo { - br_inst: inst, - cmp_inst: cond_inst, - destination, - args: args.clone(), - cmp_arg: cmp_args[0], - invert_branch_cond, - kind: CmpBrKind::Icmp { - cond, - arg: cmp_args[1], - }, - }, - InstructionData::IntCompareImm { - cond, - arg: cmp_arg, - imm: cmp_imm, - .. - } => CmpBrInfo { - br_inst: inst, - cmp_inst: cond_inst, - destination, - args: args.clone(), - cmp_arg, - invert_branch_cond, - kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm }, - }, - InstructionData::FloatCompare { - cond, - args: cmp_args, - .. - } => CmpBrInfo { - br_inst: inst, - cmp_inst: cond_inst, - destination, - args: args.clone(), - cmp_arg: cmp_args[0], - invert_branch_cond, - kind: CmpBrKind::Fcmp { - cond, - arg: cmp_args[1], - }, - }, - _ => return, - } - } else { - return; - } - } - // TODO: trapif, trueif, selectif, and their ff counterparts. - _ => return, - }; - - // If any instructions clobber the flags between the comparison and the branch, - // don't optimize them. - if last_flags_clobber != Some(info.cmp_inst) { - return; - } - - // We found a compare+branch pattern. Transform it to use flags. - let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec(); - pos.goto_inst(info.cmp_inst); - pos.use_srcloc(info.cmp_inst); - match info.kind { - CmpBrKind::Icmp { mut cond, arg } => { - let flags = pos.ins().ifcmp(info.cmp_arg, arg); - pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags); - if info.invert_branch_cond { - cond = cond.inverse(); - } - pos.func - .dfg - .replace(info.br_inst) - .brif(cond, flags, info.destination, &args); - } - CmpBrKind::IcmpImm { mut cond, imm } => { - let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm); - pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags); - if info.invert_branch_cond { - cond = cond.inverse(); - } - pos.func - .dfg - .replace(info.br_inst) - .brif(cond, flags, info.destination, &args); - } - CmpBrKind::Fcmp { mut cond, arg } => { - let flags = pos.ins().ffcmp(info.cmp_arg, arg); - pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags); - if info.invert_branch_cond { - cond = cond.inverse(); - } - pos.func - .dfg - .replace(info.br_inst) - .brff(cond, flags, info.destination, &args); - } - } - let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok(); - debug_assert!(ok); - let ok = pos.func.update_encoding(info.br_inst, isa).is_ok(); - debug_assert!(ok); -} - -struct MemOpInfo { - opcode: Opcode, - itype: Type, - arg: Value, - st_arg: Option, - flags: MemFlags, - offset: Offset32, -} - -fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) { - // Look for simple loads and stores we can optimize. - let info = match pos.func.dfg[inst] { - InstructionData::Load { - opcode, - arg, - flags, - offset, - } => MemOpInfo { - opcode, - itype: pos.func.dfg.ctrl_typevar(inst), - arg, - st_arg: None, - flags, - offset, - }, - InstructionData::Store { - opcode, - args, - flags, - offset, - } => MemOpInfo { - opcode, - itype: pos.func.dfg.ctrl_typevar(inst), - arg: args[1], - st_arg: Some(args[0]), - flags, - offset, - }, - _ => return, - }; - - // Examine the instruction that defines the address operand. - if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) { - match pos.func.dfg[result_inst] { - InstructionData::Binary { - opcode: Opcode::Iadd, - args, - } => match info.opcode { - // Operand is an iadd. Fold it into a memory address with a complex address mode. - Opcode::Load => { - pos.func.dfg.replace(inst).load_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Uload8 => { - pos.func.dfg.replace(inst).uload8_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Sload8 => { - pos.func.dfg.replace(inst).sload8_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Uload16 => { - pos.func.dfg.replace(inst).uload16_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Sload16 => { - pos.func.dfg.replace(inst).sload16_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Uload32 => { - pos.func - .dfg - .replace(inst) - .uload32_complex(info.flags, &args, info.offset); - } - Opcode::Sload32 => { - pos.func - .dfg - .replace(inst) - .sload32_complex(info.flags, &args, info.offset); - } - Opcode::Uload8x8 => { - pos.func - .dfg - .replace(inst) - .uload8x8_complex(info.flags, &args, info.offset); - } - Opcode::Sload8x8 => { - pos.func - .dfg - .replace(inst) - .sload8x8_complex(info.flags, &args, info.offset); - } - Opcode::Uload16x4 => { - pos.func - .dfg - .replace(inst) - .uload16x4_complex(info.flags, &args, info.offset); - } - Opcode::Sload16x4 => { - pos.func - .dfg - .replace(inst) - .sload16x4_complex(info.flags, &args, info.offset); - } - Opcode::Uload32x2 => { - pos.func - .dfg - .replace(inst) - .uload32x2_complex(info.flags, &args, info.offset); - } - Opcode::Sload32x2 => { - pos.func - .dfg - .replace(inst) - .sload32x2_complex(info.flags, &args, info.offset); - } - Opcode::Store => { - pos.func.dfg.replace(inst).store_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - Opcode::Istore8 => { - pos.func.dfg.replace(inst).istore8_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - Opcode::Istore16 => { - pos.func.dfg.replace(inst).istore16_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - Opcode::Istore32 => { - pos.func.dfg.replace(inst).istore32_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - _ => panic!("Unsupported load or store opcode"), - }, - InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg, - imm, - } => match pos.func.dfg[inst] { - // Operand is an iadd_imm. Fold the immediate into the offset if possible. - InstructionData::Load { - arg: ref mut load_arg, - ref mut offset, - .. - } => { - if let Some(imm) = offset.try_add_i64(imm.into()) { - *load_arg = arg; - *offset = imm; - } else { - // Overflow. - return; - } - } - InstructionData::Store { - args: ref mut store_args, - ref mut offset, - .. - } => { - if let Some(imm) = offset.try_add_i64(imm.into()) { - store_args[1] = arg; - *offset = imm; - } else { - // Overflow. - return; - } - } - _ => panic!(), - }, - _ => { - // Address value is defined by some other kind of instruction. - return; - } - } - } else { - // Address value is not the result of an instruction. - return; - } - - let ok = pos.func.update_encoding(inst, isa).is_ok(); - debug_assert!( - ok, - "failed to update encoding for `{}`", - pos.func.dfg.display_inst(inst, isa) - ); -} - -//---------------------------------------------------------------------- -// -// The main post-opt pass. - -pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) { - let _tt = timing::postopt(); - let mut pos = EncCursor::new(func, isa); - let is_mach_backend = isa.get_mach_backend().is_some(); - while let Some(_block) = pos.next_block() { - let mut last_flags_clobber = None; - while let Some(inst) = pos.next_inst() { - if !is_mach_backend && isa.uses_cpu_flags() { - // Optimize instructions to make use of flags. - optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa); - - // Track the most recent seen instruction that clobbers the flags. - if let Some(constraints) = isa - .encoding_info() - .operand_constraints(pos.func.encodings[inst]) - { - if constraints.clobbers_flags { - last_flags_clobber = Some(inst) - } - } - } - - if isa.uses_complex_addresses() { - optimize_complex_addresses(&mut pos, inst, isa); - } - } - } -} diff --git a/cranelift/codegen/src/predicates.rs b/cranelift/codegen/src/predicates.rs deleted file mode 100644 index 5812163e09..0000000000 --- a/cranelift/codegen/src/predicates.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! Predicate functions for testing instruction fields. -//! -//! This module defines functions that are used by the instruction predicates defined by -//! `cranelift-codegen/meta/src/cdsl/instructions.rs` classes. -//! -//! The predicates the operate on integer fields use `Into` as a shared trait bound. This -//! bound is implemented by all the native integer types as well as `Imm64`. -//! -//! Some of these predicates may be unused in certain ISA configurations, so we suppress the -//! dead code warning. - -use crate::ir; -use crate::ir::ConstantData; - -/// Check that an integer value is zero. -#[allow(dead_code)] -pub fn is_zero_int>(x: T) -> bool { - x.into() == 0 -} - -/// Check that a 64-bit floating point value is zero. -#[allow(dead_code)] -pub fn is_zero_64_bit_float>(x: T) -> bool { - let x64 = x.into(); - x64.bits() == 0 -} - -/// Check that a 32-bit floating point value is zero. -#[allow(dead_code)] -pub fn is_zero_32_bit_float>(x: T) -> bool { - let x32 = x.into(); - x32.bits() == 0 -} - -/// Check that a constant contains all zeroes. -#[allow(dead_code)] -pub fn is_all_zeroes(x: &ConstantData) -> bool { - x.iter().all(|&f| f == 0) -} - -/// Check that a constant contains all ones. -#[allow(dead_code)] -pub fn is_all_ones(x: &ConstantData) -> bool { - x.iter().all(|&f| f == 0xff) -} - -/// Check that `x` is the same as `y`. -#[allow(dead_code)] -pub fn is_equal + Copy>(x: T, y: O) -> bool { - x == y.into() -} - -/// Check that `x` can be represented as a `wd`-bit signed integer with `sc` low zero bits. -#[allow(dead_code)] -pub fn is_signed_int>(x: T, wd: u8, sc: u8) -> bool { - let s = x.into(); - s == (s >> sc << (64 - wd + sc) >> (64 - wd)) -} - -/// Check that `x` can be represented as a `wd`-bit unsigned integer with `sc` low zero bits. -#[allow(dead_code)] -pub fn is_unsigned_int>(x: T, wd: u8, sc: u8) -> bool { - let u = x.into() as u64; - // Bit-mask of the permitted bits. - let m = (1 << wd) - (1 << sc); - u == (u & m) -} - -#[allow(dead_code)] -pub fn is_colocated_func(func_ref: ir::FuncRef, func: &ir::Function) -> bool { - func.dfg.ext_funcs[func_ref].colocated -} - -#[allow(dead_code)] -pub fn is_colocated_data(global_value: ir::GlobalValue, func: &ir::Function) -> bool { - match func.global_values[global_value] { - ir::GlobalValueData::Symbol { colocated, .. } => colocated, - _ => panic!("is_colocated_data only makes sense for data with symbolic addresses"), - } -} - -#[allow(dead_code)] -pub fn has_length_of(value_list: &ir::ValueList, num: usize, func: &ir::Function) -> bool { - value_list.len(&func.dfg.value_lists) == num -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn cvt_u32() { - let x1 = 0u32; - let x2 = 1u32; - let x3 = 0xffff_fff0u32; - - assert!(is_signed_int(x1, 1, 0)); - assert!(is_signed_int(x1, 2, 1)); - assert!(is_signed_int(x2, 2, 0)); - assert!(!is_signed_int(x2, 2, 1)); - - // `u32` doesn't sign-extend when converted to `i64`. - assert!(!is_signed_int(x3, 8, 0)); - - assert!(is_unsigned_int(x1, 1, 0)); - assert!(is_unsigned_int(x1, 8, 4)); - assert!(is_unsigned_int(x2, 1, 0)); - assert!(!is_unsigned_int(x2, 8, 4)); - assert!(!is_unsigned_int(x3, 1, 0)); - assert!(is_unsigned_int(x3, 32, 4)); - } - - #[test] - fn cvt_imm64() { - use crate::ir::immediates::Imm64; - - let x1 = Imm64::new(-8); - let x2 = Imm64::new(8); - - assert!(is_signed_int(x1, 16, 2)); - assert!(is_signed_int(x2, 16, 2)); - assert!(!is_signed_int(x1, 16, 4)); - assert!(!is_signed_int(x2, 16, 4)); - } - - #[test] - fn check_is_all_zeroes() { - assert!(is_all_zeroes(&[0; 16].as_ref().into())); - assert!(is_all_zeroes( - &vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0].into() - )); - assert!(!is_all_zeroes(&[1; 16].as_ref().into())); - } - - #[test] - fn check_is_all_ones() { - assert!(!is_all_ones(&[0; 16].as_ref().into())); - assert!(is_all_ones(&[0xff; 16].as_ref().into())); - } -} diff --git a/cranelift/codegen/src/print_errors.rs b/cranelift/codegen/src/print_errors.rs index e4f6234ebd..e3fe736616 100644 --- a/cranelift/codegen/src/print_errors.rs +++ b/cranelift/codegen/src/print_errors.rs @@ -4,7 +4,6 @@ use crate::entity::SecondaryMap; use crate::ir; use crate::ir::entities::{AnyEntity, Block, Inst, Value}; use crate::ir::function::Function; -use crate::isa::TargetIsa; use crate::result::CodegenError; use crate::verifier::{VerifierError, VerifierErrors}; use crate::write::{decorate_function, FuncWriter, PlainWriter}; @@ -17,7 +16,6 @@ use core::fmt::Write; /// Pretty-print a verifier error. pub fn pretty_verifier_error<'a>( func: &ir::Function, - isa: Option<&dyn TargetIsa>, func_w: Option>, errors: VerifierErrors, ) -> String { @@ -29,7 +27,6 @@ pub fn pretty_verifier_error<'a>( &mut PrettyVerifierError(func_w.unwrap_or_else(|| Box::new(PlainWriter)), &mut errors), &mut w, func, - &isa.into(), ) .unwrap(); @@ -51,11 +48,10 @@ impl<'a> FuncWriter for PrettyVerifierError<'a> { &mut self, w: &mut dyn Write, func: &Function, - isa: Option<&dyn TargetIsa>, block: Block, indent: usize, ) -> fmt::Result { - pretty_block_header_error(w, func, isa, block, indent, &mut *self.0, self.1) + pretty_block_header_error(w, func, block, indent, &mut *self.0, self.1) } fn write_instruction( @@ -63,11 +59,10 @@ impl<'a> FuncWriter for PrettyVerifierError<'a> { w: &mut dyn Write, func: &Function, aliases: &SecondaryMap>, - isa: Option<&dyn TargetIsa>, inst: Inst, indent: usize, ) -> fmt::Result { - pretty_instruction_error(w, func, aliases, isa, inst, indent, &mut *self.0, self.1) + pretty_instruction_error(w, func, aliases, inst, indent, &mut *self.0, self.1) } fn write_entity_definition( @@ -85,14 +80,13 @@ impl<'a> FuncWriter for PrettyVerifierError<'a> { fn pretty_block_header_error( w: &mut dyn Write, func: &Function, - isa: Option<&dyn TargetIsa>, cur_block: Block, indent: usize, func_w: &mut dyn FuncWriter, errors: &mut Vec, ) -> fmt::Result { let mut s = String::new(); - func_w.write_block_header(&mut s, func, isa, cur_block, indent)?; + func_w.write_block_header(&mut s, func, cur_block, indent)?; write!(w, "{}", s)?; // TODO: Use drain_filter here when it gets stabilized @@ -124,14 +118,13 @@ fn pretty_instruction_error( w: &mut dyn Write, func: &Function, aliases: &SecondaryMap>, - isa: Option<&dyn TargetIsa>, cur_inst: Inst, indent: usize, func_w: &mut dyn FuncWriter, errors: &mut Vec, ) -> fmt::Result { let mut s = String::new(); - func_w.write_instruction(&mut s, func, aliases, isa, cur_inst, indent)?; + func_w.write_instruction(&mut s, func, aliases, cur_inst, indent)?; write!(w, "{}", s)?; // TODO: Use drain_filter here when it gets stabilized @@ -218,9 +211,9 @@ fn print_error(w: &mut dyn Write, err: VerifierError) -> fmt::Result { } /// Pretty-print a Cranelift error. -pub fn pretty_error(func: &ir::Function, isa: Option<&dyn TargetIsa>, err: CodegenError) -> String { +pub fn pretty_error(func: &ir::Function, err: CodegenError) -> String { if let CodegenError::Verifier(e) = err { - pretty_verifier_error(func, isa, None, e) + pretty_verifier_error(func, None, e) } else { err.to_string() } diff --git a/cranelift/codegen/src/redundant_reload_remover.rs b/cranelift/codegen/src/redundant_reload_remover.rs deleted file mode 100644 index 501c67ab6b..0000000000 --- a/cranelift/codegen/src/redundant_reload_remover.rs +++ /dev/null @@ -1,904 +0,0 @@ -//! This module implements a late-stage redundant-reload remover, which runs after registers have -//! been allocated and stack slots have been given specific offsets. - -use crate::cursor::{Cursor, CursorPosition, EncCursor, FuncCursor}; -use crate::entity::EntitySet; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::dfg::DataFlowGraph; -use crate::ir::instructions::BranchInfo; -use crate::ir::stackslot::{StackSlotKind, StackSlots}; -use crate::ir::{ - Block, Function, Inst, InstBuilder, InstructionData, Opcode, StackSlotData, Type, Value, - ValueLoc, -}; -use crate::isa::{RegInfo, RegUnit, TargetIsa}; -use crate::regalloc::RegDiversions; -use alloc::vec::Vec; -use core::convert::TryInto; -use cranelift_entity::{PrimaryMap, SecondaryMap}; - -// ============================================================================================= -// A description of the redundant-fill-removal algorithm -// -// -// The algorithm works forwards through each Block. It carries along and updates a table, -// AvailEnv, with which it tracks registers that are known to have the same value as some stack -// slot. The actions on encountering an instruction depend on the instruction, as follows: -// -// ss1 = spill r0: update the AvailEnv so as to note that slot `ss1` and register `r0` -// have the same value. -// -// r1 = fill ss0: look in the AvailEnv. If it tells us that register `r1` and slot `ss0` -// have the same value, then delete the instruction by converting it to a -// `fill_nop`. -// -// If it tells us that some other register `r2` has the same value as -// slot `ss0`, convert the instruction into a copy from `r2` to `r1`. -// -// any other insn: remove from the AvailEnv, any bindings associated with registers -// written by this instruction, since they will be invalidated by it. -// -// Tracking the effects of `copy` instructions in AvailEnv for the case when both source and -// destination are registers does not cause any more fills to be removed or converted to copies. -// It's not clear why. -// -// There are various other instruction-handling cases in `visit_inst`, which are documented -// in-line, and do not change the core algorithm, so are not described here. -// -// The registers tracked by AvailEnv are the post-diversion registers that are really used by the -// code; they are not the pre-diversion names associated with each SSA `Value`. The second -// `fill` case above opportunistically copies values from registers that may have been diversion -// targets in some predecessor block, and so are no longer associated with any specific SSA-level -// name at the point the copy is made. Hence those copies (from `r2` to `r1`) cannot be done -// with an ordinary `copy` instruction. Instead they have to be done using a new `copy_to_ssa` -// instruction, which copies from an arbitrary register to a register-resident `Value` (that is, -// "back to" SSA-world). -// -// That completes the description of the core algorithm. -// -// In the case where a block `A` jumps to `B` and `A` is the only predecessor of `B`, the -// AvailEnv at the end of `A` will still be valid at the entry to `B`. In such a case, we can -// profitably transform `B` using the AvailEnv "inherited" from `A`. In order to take full -// advantage of this, this module partitions the function's CFG into tree-shaped groups of -// blocks, and processes each tree as described above. So the AvailEnv is only initialised to -// empty at the start of blocks that form the root of each tree; that is, for blocks which have -// two or more predecessors. - -// ============================================================================================= -// Top level algorithm structure -// -// The overall algorithm, for a function, starts like this: -// -// * (once per function): finds Blocks that have two or more predecessors, since they will be the -// roots of Block trees. Also, the entry node for the function is considered to be a root. -// -// It then continues with a loop that first finds a tree of Blocks ("discovery") and then removes -// redundant fills as described above ("processing"): -// -// * (discovery; once per tree): for each root, performs a depth first search to find all the Blocks -// in the tree, guided by RedundantReloadRemover::discovery_stack. -// -// * (processing; once per tree): the just-discovered tree is then processed as described above, -// guided by RedundantReloadRemover::processing_stack. -// -// In this way, all Blocks reachable from the function's entry point are eventually processed. Note -// that each tree is processed as soon as it has been discovered, so the algorithm never creates a -// list of trees for the function. -// -// The running state is stored in `RedundantReloadRemover`. This is allocated once and can be -// reused for multiple functions so as to minimise heap turnover. The fields are, roughly: -// -// num_regunits -- constant for the whole function; used by the tree processing phase -// num_preds_per_block -- constant for the whole function; used by the tree discovery process -// -// discovery_stack -- used to guide the tree discovery process -// nodes_in_tree -- the discovered nodes are recorded here -// -// processing_stack -- used to guide the tree processing process -// nodes_already_visited -- used to ensure the tree processing logic terminates in the case -// where a tree has a branch back to its root node. -// -// There is further documentation in line below, as appropriate. - -// ============================================================================================= -// A side note on register choice heuristics - -// The core algorithm opportunistically replaces fill instructions when it knows of a register -// that already holds the required value. How effective this is largely depends on how long -// reloaded values happen to stay alive before the relevant register is overwritten. And that -// depends on the register allocator's register choice heuristics. The worst case is, when the -// register allocator reuses registers as soon as possible after they become free. Unfortunately -// that was indeed the selection scheme, prior to development of this pass. -// -// As part of this work, the register selection scheme has been changed as follows: for registers -// written by any instruction other than a fill, use the lowest numbered available register. But -// for registers written by a fill instruction, use the highest numbered available register. The -// aim is to try and keep reload- and non-reload registers disjoint to the extent possible. -// Several other schemes were tried, but this one is simple and can be worth an extra 2% of -// performance in some cases. -// -// The relevant change is more or less a one-line change in the solver. - -// ============================================================================================= -// Data structures used for discovery of trees - -// `ZeroOneOrMany` is used to record the number of predecessors a Block block has. The `Zero` case -// is included so as to cleanly handle the case where the incoming graph has unreachable Blocks. - -#[derive(Clone, PartialEq)] -enum ZeroOneOrMany { - Zero, - One, - Many, -} - -// ============================================================================================= -// Data structures used for processing of trees - -// `SlotInfo` describes a spill slot in the obvious way. Note that it doesn't indicate which -// register(s) are currently associated with the slot. That job is done by `AvailEnv` instead. -// -// In the CL framework, stack slots are partitioned into disjoint sets, one for each -// `StackSlotKind`. The offset and size only give a unique identity within any particular -// `StackSlotKind`. So, to uniquely identify a stack slot, all three fields are necessary. - -#[derive(Clone, Copy)] -struct SlotInfo { - kind: StackSlotKind, - offset: i32, - size: u32, -} - -// `AvailEnv` maps each possible register to a stack slot that holds the same value. The index -// space of `AvailEnv::map` is exactly the set of registers available on the current target. If -// (as is mostly the case) a register is not known to have the same value as a stack slot, then -// its entry is `None` rather than `Some(..)`. -// -// Invariants for AvailEnv: -// -// AvailEnv may have multiple different registers bound to the same stack slot -- that is, `(kind, -// offset, size)` triple. That's OK, and reflects the reality that those two registers contain -// the same value. This could happen, for example, in the case -// -// ss1 = spill r0 -// .. -// r2 = fill ss1 -// -// Then both `r0` and `r2` will have the same value as `ss1`, provided that ".." doesn't write to -// `r1`. -// -// To say that two different registers may be bound to the same stack slot is the same as saying -// that it is allowed to have two different entries in AvailEnv with the same `(kind, offset, -// size)` triple. What is *not* allowed is to have partial overlaps. That is, if two SlotInfos -// have the same `kind` field and have `offset` and `size` fields that overlap, then their -// `offset` and `size` fields must be identical. This is so as to make the algorithm safe against -// situations where, for example, a 64 bit register is spilled, but then only the bottom 32 bits -// are reloaded from the slot. -// -// Although in such a case it seems likely that the Cranelift IR would be ill-typed, and so this -// case could probably not occur in practice. - -#[derive(Clone)] -struct AvailEnv { - map: Vec>, -} - -// `ProcessingStackElem` combines AvailEnv with contextual information needed to "navigate" within -// a Block. -// -// A ProcessingStackElem conceptually has the lifetime of exactly one Block: once the current Block is -// completed, the ProcessingStackElem will be abandoned. In practice the top level state, -// RedundantReloadRemover, caches them, so as to avoid heap turnover. -// -// Note that ProcessingStackElem must contain a CursorPosition. The CursorPosition, which -// indicates where we are in the current Block, cannot be implicitly maintained by looping over all -// the instructions in a Block in turn, because we may choose to suspend processing the current Block -// at a side exit, continue by processing the subtree reached via the side exit, and only later -// resume the current Block. - -struct ProcessingStackElem { - /// Indicates the AvailEnv at the current point in the Block. - avail_env: AvailEnv, - - /// Shows where we currently are inside the Block. - cursor: CursorPosition, - - /// Indicates the currently active register diversions at the current point. - diversions: RegDiversions, -} - -// ============================================================================================= -// The top level data structure - -// `RedundantReloadRemover` contains data structures for the two passes: discovery of tree shaped -// regions, and processing of them. These are allocated once and stay alive for the entire -// function, even though they are cleared out for each new tree shaped region. It also caches -// `num_regunits` and `num_preds_per_block`, which are computed at the start of each function and -// then remain constant. - -/// The redundant reload remover's state. -pub struct RedundantReloadRemover { - /// The total number of RegUnits available on this architecture. This is unknown when the - /// RedundantReloadRemover is created. It becomes known at the beginning of processing of a - /// function. - num_regunits: Option, - - /// This stores, for each Block, a characterisation of the number of predecessors it has. - num_preds_per_block: PrimaryMap, - - /// The stack used for the first phase (discovery). There is one element on the discovery - /// stack for each currently unexplored Block in the tree being searched. - discovery_stack: Vec, - - /// The nodes in the discovered tree are inserted here. - nodes_in_tree: EntitySet, - - /// The stack used during the second phase (transformation). There is one element on the - /// processing stack for each currently-open node in the tree being transformed. - processing_stack: Vec, - - /// Used in the second phase to avoid visiting nodes more than once. - nodes_already_visited: EntitySet, -} - -// ============================================================================================= -// Miscellaneous small helper functions - -// Is this a kind of stack slot that is safe to track in AvailEnv? This is probably overly -// conservative, but tracking only the SpillSlot and IncomingArgument kinds catches almost all -// available redundancy in practice. -fn is_slot_kind_tracked(kind: StackSlotKind) -> bool { - match kind { - StackSlotKind::SpillSlot | StackSlotKind::IncomingArg => true, - _ => false, - } -} - -// Find out if the range `[offset, +size)` overlaps with the range in `si`. -fn overlaps(si: &SlotInfo, offset: i32, size: u32) -> bool { - let a_offset = si.offset as i64; - let a_size = si.size as i64; - let b_offset = offset as i64; - let b_size = size as i64; - let no_overlap = a_offset + a_size <= b_offset || b_offset + b_size <= a_offset; - !no_overlap -} - -// Find, in `reginfo`, the register bank that `reg` lives in, and return the lower limit and size -// of the bank. This is so the caller can conveniently iterate over all RegUnits in the bank that -// `reg` lives in. -fn find_bank_limits(reginfo: &RegInfo, reg: RegUnit) -> (RegUnit, u16) { - if let Some(bank) = reginfo.bank_containing_regunit(reg) { - return (bank.first_unit, bank.units); - } - // We should never get here, since `reg` must come from *some* RegBank. - panic!("find_regclass_limits: reg not found"); -} - -// Returns the register that `v` is allocated to. Assumes that `v` actually resides in a -// register. -fn reg_of_value(locations: &SecondaryMap, v: Value) -> RegUnit { - match locations[v] { - ValueLoc::Reg(ru) => ru, - _ => panic!("reg_of_value: value isn't in a reg"), - } -} - -// Returns the stack slot that `v` is allocated to. Assumes that `v` actually resides in a stack -// slot. -fn slot_of_value<'s>( - locations: &SecondaryMap, - stack_slots: &'s StackSlots, - v: Value, -) -> &'s StackSlotData { - match locations[v] { - ValueLoc::Stack(slot) => &stack_slots[slot], - _ => panic!("slot_of_value: value isn't in a stack slot"), - } -} - -// ============================================================================================= -// Top level: discovery of tree shaped regions - -impl RedundantReloadRemover { - // A helper for `add_nodes_to_tree` below. - fn discovery_stack_push_successors_of(&mut self, cfg: &ControlFlowGraph, node: Block) { - for successor in cfg.succ_iter(node) { - self.discovery_stack.push(successor); - } - } - - // Visit the tree of Blocks rooted at `starting_point` and add them to `self.nodes_in_tree`. - // `self.num_preds_per_block` guides the process, ensuring we don't leave the tree-ish region - // and indirectly ensuring that the process will terminate in the presence of cycles in the - // graph. `self.discovery_stack` holds the search state in this function. - fn add_nodes_to_tree(&mut self, cfg: &ControlFlowGraph, starting_point: Block) { - // One might well ask why this doesn't loop forever when it encounters cycles in the - // control flow graph. The reason is that any cycle in the graph that is reachable from - // anywhere outside the cycle -- in particular, that is reachable from the function's - // entry node -- must have at least one node that has two or more predecessors. So the - // logic below won't follow into it, because it regards any such node as the root of some - // other tree. - debug_assert!(self.discovery_stack.is_empty()); - debug_assert!(self.nodes_in_tree.is_empty()); - - self.nodes_in_tree.insert(starting_point); - self.discovery_stack_push_successors_of(cfg, starting_point); - - while let Some(node) = self.discovery_stack.pop() { - match self.num_preds_per_block[node] { - // We arrived at a node with multiple predecessors, so it's a new root. Ignore it. - ZeroOneOrMany::Many => {} - // This node has just one predecessor, so we should incorporate it in the tree and - // immediately transition into searching from it instead. - ZeroOneOrMany::One => { - self.nodes_in_tree.insert(node); - self.discovery_stack_push_successors_of(cfg, node); - } - // This is meaningless. We arrived at a node that doesn't point back at where we - // came from. - ZeroOneOrMany::Zero => panic!("add_nodes_to_tree: inconsistent graph"), - } - } - } -} - -// ============================================================================================= -// Operations relating to `AvailEnv` - -impl AvailEnv { - // Create a new one. - fn new(size: usize) -> Self { - let mut env = Self { - map: Vec::>::new(), - }; - env.map.resize(size, None); - env - } - - // Debug only: checks (some of) the required AvailEnv invariants. - #[cfg(debug_assertions)] - fn check_invariants(&self) -> bool { - // Check that any overlapping entries overlap exactly. This is super lame (quadratic), - // but it's only used in debug builds. - for i in 0..self.map.len() { - if let Some(si) = self.map[i] { - for j in i + 1..self.map.len() { - if let Some(sj) = self.map[j] { - // "si and sj overlap, but not exactly" - if si.kind == sj.kind - && overlaps(&si, sj.offset, sj.size) - && !(si.offset == sj.offset && si.size == sj.size) - { - return false; - } - } - } - } - } - true - } - - // Invalidates the binding associated with `reg`. Note that by construction of AvailEnv, - // `reg` can only be associated with one binding at once. - fn invalidate_by_reg(&mut self, reg: RegUnit) { - self.map[reg as usize] = None; - } - - // Invalidates any binding that has any overlap with `(kind, offset, size)`. - fn invalidate_by_offset(&mut self, kind: StackSlotKind, offset: i32, size: u32) { - debug_assert!(is_slot_kind_tracked(kind)); - for i in 0..self.map.len() { - if let Some(si) = &self.map[i] { - if si.kind == kind && overlaps(&si, offset, size) { - self.map[i] = None; - } - } - } - } - - // Invalidates all bindings. - fn invalidate_all(&mut self) { - for i in 0..self.map.len() { - self.map[i] = None; - } - } - - // Updates AvailEnv to track the effect of a `regmove` instruction. - fn copy_reg(&mut self, src: RegUnit, dst: RegUnit) { - self.map[dst as usize] = self.map[src as usize]; - } - - // Does `env` have the exact binding characterised by `(reg, kind, offset, size)` ? - fn has_exact_binding(&self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) -> bool { - debug_assert!(is_slot_kind_tracked(kind)); - if let Some(si) = &self.map[reg as usize] { - return si.kind == kind && si.offset == offset && si.size == size; - } - // No such binding. - false - } - - // Does `env` have a binding characterised by `(kind, offset, size)` but to a register, let's - // call it `other_reg`, that isn't `reg`? If so, return `other_reg`. Note that `other_reg` - // will have the same bank as `reg`. It is a checked error to call this function with a - // binding matching all four of `(reg, kind, offset, size)`. - fn has_inexact_binding( - &self, - reginfo: &RegInfo, - reg: RegUnit, - kind: StackSlotKind, - offset: i32, - size: u32, - ) -> Option { - debug_assert!(is_slot_kind_tracked(kind)); - // Find the range of RegUnit numbers for the bank that contains `reg`, and use that as our - // search space. This is so as to guarantee that any match is restricted to the same bank - // as `reg`. - let (first_unit, num_units) = find_bank_limits(reginfo, reg); - for other_reg in first_unit..first_unit + num_units { - if let Some(si) = &self.map[other_reg as usize] { - if si.kind == kind && si.offset == offset && si.size == size { - if other_reg == reg { - panic!("has_inexact_binding: binding *is* exact!"); - } - return Some(other_reg); - } - } - } - // No such binding. - None - } - - // Create the binding `(reg, kind, offset, size)` in `env`, and throw away any previous - // binding associated with either `reg` or the `(kind, offset, size)` triple. - fn bind(&mut self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) { - debug_assert!(is_slot_kind_tracked(kind)); - self.invalidate_by_offset(kind, offset, size); - self.map[reg as usize] = Some(SlotInfo { kind, offset, size }); - } -} - -// Invalidates in `avail_env`, any binding associated with a regunit that is written by `inst`. -fn invalidate_regs_written_by_inst( - locations: &SecondaryMap, - diversions: &RegDiversions, - dfg: &DataFlowGraph, - avail_env: &mut AvailEnv, - inst: Inst, -) { - for v in dfg.inst_results(inst).iter() { - if let ValueLoc::Reg(ru) = locations[*v] { - // This must be true. It would be meaningless for an SSA value to be diverted before - // the point where it is defined. - debug_assert!(diversions.reg(*v, locations) == ru); - avail_env.invalidate_by_reg(ru); - } - } -} - -// ============================================================================================= -// Processing of individual instructions - -impl RedundantReloadRemover { - // Process `inst`, possibly changing it into a different instruction, and possibly changing - // `self.avail_env` and `func.dfg`. - fn visit_inst( - &mut self, - func: &mut Function, - reginfo: &RegInfo, - isa: &dyn TargetIsa, - inst: Inst, - ) { - // Get hold of the top-of-stack work item. This is the state that we will mutate during - // processing of this instruction. - debug_assert!(!self.processing_stack.is_empty()); - let ProcessingStackElem { - avail_env, - diversions, - .. - } = self.processing_stack.last_mut().unwrap(); - - #[cfg(debug_assertions)] - debug_assert!( - avail_env.check_invariants(), - "visit_inst: env invariants not ok" - ); - - let dfg = &mut func.dfg; - let locations = &func.locations; - let stack_slots = &func.stack_slots; - - // To avoid difficulties with the borrow checker, do this in two stages. First, examine - // the instruction to see if it can be deleted or modified, and park the relevant - // information in `transform`. Update `self.avail_env` too. Later, use `transform` to - // actually do the transformation if necessary. - enum Transform { - NoChange, - ChangeToNopFill(Value), // delete this insn entirely - ChangeToCopyToSSA(Type, RegUnit), // change it into a copy from the specified reg - } - let mut transform = Transform::NoChange; - - // In this match { .. } statement, either we must treat the instruction specially, or we - // must call `invalidate_regs_written_by_inst` on it. - match &dfg[inst] { - InstructionData::Unary { - opcode: Opcode::Spill, - arg: src_value, - } => { - // Extract: (src_reg, kind, offset, size) - // Invalidate: (kind, offset, size) - // Add new binding: {src_reg -> (kind, offset, size)} - // Don't forget that src_value might be diverted, so we have to deref it. - let slot = slot_of_value(locations, stack_slots, dfg.inst_results(inst)[0]); - let src_reg = diversions.reg(*src_value, locations); - let kind = slot.kind; - if is_slot_kind_tracked(kind) { - let offset = slot.offset.expect("visit_inst: spill with no offset"); - let size = slot.size; - avail_env.bind(src_reg, kind, offset, size); - } else { - // We don't expect this insn to write any regs. But to be consistent with the - // rule above, do this anyway. - invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); - } - } - InstructionData::Unary { - opcode: Opcode::Fill, - arg: src_value, - } => { - // Extract: (dst_reg, kind, offset, size) - // Invalidate: (kind, offset, size) - // Add new: {dst_reg -> (dst_value, kind, offset, size)} - let slot = slot_of_value(locations, stack_slots, *src_value); - let dst_value = dfg.inst_results(inst)[0]; - let dst_reg = reg_of_value(locations, dst_value); - // This must be true. It would be meaningless for an SSA value to be diverted - // before it was defined. - debug_assert!(dst_reg == diversions.reg(dst_value, locations)); - let kind = slot.kind; - if is_slot_kind_tracked(kind) { - let offset = slot.offset.expect("visit_inst: fill with no offset"); - let size = slot.size; - if avail_env.has_exact_binding(dst_reg, kind, offset, size) { - // This instruction is an exact copy of a fill we saw earlier, and the - // loaded value is still valid. So we'll schedule this instruction for - // deletion (below). No need to make any changes to `avail_env`. - transform = Transform::ChangeToNopFill(*src_value); - } else if let Some(other_reg) = - avail_env.has_inexact_binding(reginfo, dst_reg, kind, offset, size) - { - // This fill is from the required slot, but into a different register - // `other_reg`. So replace it with a copy from `other_reg` to `dst_reg` - // and update `dst_reg`s binding to make it the same as `other_reg`'s, so - // as to maximise the chances of future matches after this instruction. - debug_assert!(other_reg != dst_reg); - transform = - Transform::ChangeToCopyToSSA(dfg.value_type(dst_value), other_reg); - avail_env.copy_reg(other_reg, dst_reg); - } else { - // This fill creates some new binding we don't know about. Update - // `avail_env` to track it. - avail_env.bind(dst_reg, kind, offset, size); - } - } else { - // Else it's "just another instruction that writes a reg", so we'd better - // treat it as such, just as we do below for instructions that we don't handle - // specially. - invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); - } - } - InstructionData::RegMove { src, dst, .. } => { - // These happen relatively rarely, but just frequently enough that it's worth - // tracking the copy (at the machine level, it's really a copy) in `avail_env`. - avail_env.copy_reg(*src, *dst); - } - InstructionData::RegSpill { .. } - | InstructionData::RegFill { .. } - | InstructionData::Call { .. } - | InstructionData::CallIndirect { .. } - | InstructionData::StackLoad { .. } - | InstructionData::StackStore { .. } - | InstructionData::Unary { - opcode: Opcode::AdjustSpDown, - .. - } - | InstructionData::UnaryImm { - opcode: Opcode::AdjustSpUpImm, - .. - } - | InstructionData::UnaryImm { - opcode: Opcode::AdjustSpDownImm, - .. - } => { - // All of these change, or might change, the memory-register bindings tracked in - // `avail_env` in some way we don't know about, or at least, we might be able to - // track, but for which the effort-to-benefit ratio seems too low to bother. So - // play safe: forget everything we know. - // - // For Call/CallIndirect, we could do better when compiling for calling - // conventions that have callee-saved registers, since bindings for them would - // remain valid across the call. - avail_env.invalidate_all(); - } - _ => { - // Invalidate: any `avail_env` entry associated with a reg written by `inst`. - invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); - } - } - - // Actually do the transformation. - match transform { - Transform::NoChange => {} - Transform::ChangeToNopFill(arg) => { - // Load is completely redundant. Convert it to a no-op. - dfg.replace(inst).fill_nop(arg); - let ok = func.update_encoding(inst, isa).is_ok(); - debug_assert!( - ok, - "fill_nop encoding missing for this type: `{}`", - func.dfg.display_inst(inst, isa) - ); - } - Transform::ChangeToCopyToSSA(ty, reg) => { - // We already have the relevant value in some other register. Convert the - // load into a reg-reg copy. - dfg.replace(inst).copy_to_ssa(ty, reg); - let ok = func.update_encoding(inst, isa).is_ok(); - debug_assert!(ok, "copy_to_ssa encoding missing for type {}", ty); - } - } - } -} - -// ============================================================================================= -// Top level: processing of tree shaped regions - -impl RedundantReloadRemover { - // Push a clone of the top-of-stack ProcessingStackElem. This will be used to process exactly - // one Block. The diversions are created new, rather than cloned, to reflect the fact - // that diversions are local to each Block. - fn processing_stack_push(&mut self, cursor: CursorPosition) { - let avail_env = if let Some(stack_top) = self.processing_stack.last() { - stack_top.avail_env.clone() - } else { - AvailEnv::new( - self.num_regunits - .expect("processing_stack_push: num_regunits unknown!") - as usize, - ) - }; - self.processing_stack.push(ProcessingStackElem { - avail_env, - cursor, - diversions: RegDiversions::new(), - }); - } - - // This pushes the node `dst` onto the processing stack, and sets up the new - // ProcessingStackElem accordingly. But it does all that only if `dst` is part of the current - // tree *and* we haven't yet visited it. - fn processing_stack_maybe_push(&mut self, dst: Block) { - if self.nodes_in_tree.contains(dst) && !self.nodes_already_visited.contains(dst) { - if !self.processing_stack.is_empty() { - // If this isn't the outermost node in the tree (that is, the root), then it must - // have exactly one predecessor. Nodes with no predecessors are dead and not - // incorporated in any tree. Nodes with two or more predecessors are the root of - // some other tree, and visiting them as if they were part of the current tree - // would be a serious error. - debug_assert!(self.num_preds_per_block[dst] == ZeroOneOrMany::One); - } - self.processing_stack_push(CursorPosition::Before(dst)); - self.nodes_already_visited.insert(dst); - } - } - - // Perform redundant-reload removal on the tree shaped region of graph defined by `root` and - // `self.nodes_in_tree`. The following state is modified: `self.processing_stack`, - // `self.nodes_already_visited`, and `func.dfg`. - fn process_tree( - &mut self, - func: &mut Function, - reginfo: &RegInfo, - isa: &dyn TargetIsa, - root: Block, - ) { - debug_assert!(self.nodes_in_tree.contains(root)); - debug_assert!(self.processing_stack.is_empty()); - debug_assert!(self.nodes_already_visited.is_empty()); - - // Create the initial work item - self.processing_stack_maybe_push(root); - - while !self.processing_stack.is_empty() { - // It seems somewhat ridiculous to construct a whole new FuncCursor just so we can do - // next_inst() on it once, and then copy the resulting position back out. But use of - // a function-global FuncCursor, or of the EncCursor in struct Context, leads to - // borrow checker problems, as does including FuncCursor directly in - // ProcessingStackElem. In any case this is not as bad as it looks, since profiling - // shows that the build-insert-step-extract work is reduced to just 8 machine - // instructions in an optimised x86_64 build, presumably because rustc can inline and - // then optimise out almost all the work. - let tos = self.processing_stack.len() - 1; - let mut pos = FuncCursor::new(func).at_position(self.processing_stack[tos].cursor); - let maybe_inst = pos.next_inst(); - self.processing_stack[tos].cursor = pos.position(); - - if let Some(inst) = maybe_inst { - // Deal with this insn, possibly changing it, possibly updating the top item of - // `self.processing_stack`. - self.visit_inst(func, reginfo, isa, inst); - - // Update diversions after the insn. - self.processing_stack[tos].diversions.apply(&func.dfg[inst]); - - // If the insn can branch outside this Block, push work items on the stack for all - // target Blocks that are part of the same tree and that we haven't yet visited. - // The next iteration of this instruction-processing loop will immediately start - // work on the most recently pushed Block, and will eventually continue in this Block - // when those new items have been removed from the stack. - match func.dfg.analyze_branch(inst) { - BranchInfo::NotABranch => (), - BranchInfo::SingleDest(dst, _) => { - self.processing_stack_maybe_push(dst); - } - BranchInfo::Table(jt, default) => { - func.jump_tables[jt] - .iter() - .for_each(|dst| self.processing_stack_maybe_push(*dst)); - if let Some(dst) = default { - self.processing_stack_maybe_push(dst); - } - } - } - } else { - // We've come to the end of the current work-item (Block). We'll already have - // processed the fallthrough/continuation/whatever for it using the logic above. - // Pop it off the stack and resume work on its parent. - self.processing_stack.pop(); - } - } - } -} - -// ============================================================================================= -// Top level: perform redundant fill removal for a complete function - -impl RedundantReloadRemover { - /// Create a new remover state. - pub fn new() -> Self { - Self { - num_regunits: None, - num_preds_per_block: PrimaryMap::::with_capacity(8), - discovery_stack: Vec::::with_capacity(16), - nodes_in_tree: EntitySet::::new(), - processing_stack: Vec::::with_capacity(8), - nodes_already_visited: EntitySet::::new(), - } - } - - /// Clear the state of the remover. - pub fn clear(&mut self) { - self.clear_for_new_function(); - } - - fn clear_for_new_function(&mut self) { - self.num_preds_per_block.clear(); - self.clear_for_new_tree(); - } - - fn clear_for_new_tree(&mut self) { - self.discovery_stack.clear(); - self.nodes_in_tree.clear(); - self.processing_stack.clear(); - self.nodes_already_visited.clear(); - } - - #[inline(never)] - fn do_redundant_fill_removal_on_function( - &mut self, - func: &mut Function, - reginfo: &RegInfo, - isa: &dyn TargetIsa, - cfg: &ControlFlowGraph, - ) { - // Fail in an obvious way if there are more than (2^32)-1 Blocks in this function. - let num_blocks: u32 = func.dfg.num_blocks().try_into().unwrap(); - - // Clear out per-tree state. - self.clear_for_new_function(); - - // Create a PrimaryMap that summarises the number of predecessors for each block, as 0, 1 - // or "many", and that also claims the entry block as having "many" predecessors. - self.num_preds_per_block.clear(); - self.num_preds_per_block.reserve(num_blocks as usize); - - for i in 0..num_blocks { - let mut pi = cfg.pred_iter(Block::from_u32(i)); - let mut n_pi = ZeroOneOrMany::Zero; - if pi.next().is_some() { - n_pi = ZeroOneOrMany::One; - if pi.next().is_some() { - n_pi = ZeroOneOrMany::Many; - // We don't care if there are more than two preds, so stop counting now. - } - } - self.num_preds_per_block.push(n_pi); - } - debug_assert!(self.num_preds_per_block.len() == num_blocks as usize); - - // The entry block must be the root of some tree, so set up the state to reflect that. - let entry_block = func - .layout - .entry_block() - .expect("do_redundant_fill_removal_on_function: entry block unknown"); - debug_assert!(self.num_preds_per_block[entry_block] == ZeroOneOrMany::Zero); - self.num_preds_per_block[entry_block] = ZeroOneOrMany::Many; - - // Now build and process trees. - for root_ix in 0..self.num_preds_per_block.len() { - let root = Block::from_u32(root_ix as u32); - - // Build a tree for each node that has two or more preds, and ignore all other nodes. - if self.num_preds_per_block[root] != ZeroOneOrMany::Many { - continue; - } - - // Clear out per-tree state. - self.clear_for_new_tree(); - - // Discovery phase: build the tree, as `root` and `self.nodes_in_tree`. - self.add_nodes_to_tree(cfg, root); - debug_assert!(self.nodes_in_tree.cardinality() > 0); - debug_assert!(self.num_preds_per_block[root] == ZeroOneOrMany::Many); - - // Processing phase: do redundant-reload-removal. - self.process_tree(func, reginfo, isa, root); - debug_assert!( - self.nodes_in_tree.cardinality() == self.nodes_already_visited.cardinality() - ); - } - } -} - -// ============================================================================================= -// Top level: the external interface - -struct Context<'a> { - // Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - // Cached ISA information. We save it here to avoid frequent virtual function calls on the - // `TargetIsa` trait object. - reginfo: RegInfo, - - // References to contextual data structures we need. - cfg: &'a ControlFlowGraph, - - // The running state. - state: &'a mut RedundantReloadRemover, -} - -impl RedundantReloadRemover { - /// Run the remover. - pub fn run(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { - let ctx = Context { - cur: EncCursor::new(func, isa), - reginfo: isa.register_info(), - cfg, - state: self, - }; - let mut total_regunits = 0; - for rb in isa.register_info().banks { - total_regunits += rb.units; - } - ctx.state.num_regunits = Some(total_regunits); - ctx.state.do_redundant_fill_removal_on_function( - ctx.cur.func, - &ctx.reginfo, - ctx.cur.isa, - &ctx.cfg, - ); - } -} diff --git a/cranelift/codegen/src/regalloc/affinity.rs b/cranelift/codegen/src/regalloc/affinity.rs deleted file mode 100644 index efcc4dabfa..0000000000 --- a/cranelift/codegen/src/regalloc/affinity.rs +++ /dev/null @@ -1,126 +0,0 @@ -//! Value affinity for register allocation. -//! -//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class -//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy -//! instruction operand constraints. -//! -//! For values that want to be in registers, the affinity hint includes a register class or -//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a -//! larger register class instead. - -use crate::ir::{AbiParam, ArgumentLoc}; -use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa}; -use core::fmt; - -/// Preferred register allocation for an SSA value. -#[derive(Clone, Copy, Debug)] -pub enum Affinity { - /// No affinity. - /// - /// This indicates a value that is not defined or used by any real instructions. It is a ghost - /// value that won't appear in the final program. - Unassigned, - - /// This value should be placed in a spill slot on the stack. - Stack, - - /// This value prefers a register from the given register class. - Reg(RegClassIndex), -} - -impl Default for Affinity { - fn default() -> Self { - Self::Unassigned - } -} - -impl Affinity { - /// Create an affinity that satisfies a single constraint. - /// - /// This will never create an `Affinity::Unassigned`. - /// Use the `Default` implementation for that. - pub fn new(constraint: &OperandConstraint) -> Self { - if constraint.kind == ConstraintKind::Stack { - Self::Stack - } else { - Self::Reg(constraint.regclass.into()) - } - } - - /// Create an affinity that matches an ABI argument for `isa`. - pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self { - match arg.location { - ArgumentLoc::Unassigned => Self::Unassigned, - ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()), - ArgumentLoc::Stack(_) => Self::Stack, - } - } - - /// Is this the `Unassigned` affinity? - pub fn is_unassigned(self) -> bool { - match self { - Self::Unassigned => true, - _ => false, - } - } - - /// Is this the `Reg` affinity? - pub fn is_reg(self) -> bool { - match self { - Self::Reg(_) => true, - _ => false, - } - } - - /// Is this the `Stack` affinity? - pub fn is_stack(self) -> bool { - match self { - Self::Stack => true, - _ => false, - } - } - - /// Merge an operand constraint into this affinity. - /// - /// Note that this does not guarantee that the register allocator will pick a register that - /// satisfies the constraint. - pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) { - match *self { - Self::Unassigned => *self = Self::new(constraint), - Self::Reg(rc) => { - // If the preferred register class is a subclass of the constraint, there's no need - // to change anything. - if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc) - { - // If the register classes overlap, try to shrink our preferred register class. - if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) { - *self = Self::Reg(subclass); - } - } - } - Self::Stack => {} - } - } - - /// Return an object that can display this value affinity, using the register info from the - /// target ISA. - pub fn display<'a, R: Into>>(self, regs: R) -> DisplayAffinity<'a> { - DisplayAffinity(self, regs.into()) - } -} - -/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA. -pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayAffinity<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - Affinity::Unassigned => write!(f, "unassigned"), - Affinity::Stack => write!(f, "stack"), - Affinity::Reg(rci) => match self.1 { - Some(regs) => write!(f, "{}", regs.rc(rci)), - None => write!(f, "{}", rci), - }, - } - } -} diff --git a/cranelift/codegen/src/regalloc/branch_splitting.rs b/cranelift/codegen/src/regalloc/branch_splitting.rs deleted file mode 100644 index 4e9a159f3e..0000000000 --- a/cranelift/codegen/src/regalloc/branch_splitting.rs +++ /dev/null @@ -1,169 +0,0 @@ -//! Split the outgoing edges of conditional branches that pass parameters. -//! -//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions -//! between a conditional branch and the following terminator. -use alloc::vec::Vec; - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList}; -use crate::isa::TargetIsa; -use crate::topo_order::TopoOrder; - -pub fn run( - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, - topo: &mut TopoOrder, -) { - let mut ctx = Context { - has_new_blocks: false, - cur: EncCursor::new(func, isa), - domtree, - topo, - cfg, - }; - ctx.run() -} - -struct Context<'a> { - /// True if new blocks were inserted. - has_new_blocks: bool, - - /// Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - /// References to contextual data structures we need. - domtree: &'a mut DominatorTree, - topo: &'a mut TopoOrder, - cfg: &'a mut ControlFlowGraph, -} - -impl<'a> Context<'a> { - fn run(&mut self) { - // Any block order will do. - self.topo.reset(self.cur.func.layout.blocks()); - while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { - // Branches can only be at the last or second to last position in an extended basic - // block. - self.cur.goto_last_inst(block); - let terminator_inst = self.cur.current_inst().expect("terminator"); - if let Some(inst) = self.cur.prev_inst() { - let opcode = self.cur.func.dfg[inst].opcode(); - if opcode.is_branch() { - self.visit_conditional_branch(inst, opcode); - self.cur.goto_inst(terminator_inst); - self.visit_terminator_branch(terminator_inst); - } - } - } - - // If blocks were added the cfg and domtree are inconsistent and must be recomputed. - if self.has_new_blocks { - self.cfg.compute(&self.cur.func); - self.domtree.compute(&self.cur.func, self.cfg); - } - } - - fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) { - // TODO: target = dfg[branch].branch_destination().expect("conditional branch"); - let target = match self.cur.func.dfg[branch] { - InstructionData::Branch { destination, .. } - | InstructionData::BranchIcmp { destination, .. } - | InstructionData::BranchInt { destination, .. } - | InstructionData::BranchFloat { destination, .. } => destination, - _ => panic!("Unexpected instruction in visit_conditional_branch"), - }; - - // If there are any parameters, split the edge. - if self.should_split_edge(target) { - // Create the block the branch will jump to. - let new_block = self.cur.func.dfg.make_block(); - - // Insert the new block before the destination, such that it can fallthrough in the - // target block. - assert_ne!(Some(target), self.cur.layout().entry_block()); - self.cur.layout_mut().insert_block(new_block, target); - self.has_new_blocks = true; - - // Extract the arguments of the branch instruction, split the Block parameters and the - // branch arguments - let num_fixed = opcode.constraints().num_fixed_value_arguments(); - let dfg = &mut self.cur.func.dfg; - let old_args: Vec<_> = { - let args = dfg[branch].take_value_list().expect("block parameters"); - args.as_slice(&dfg.value_lists).iter().copied().collect() - }; - let (branch_args, block_params) = old_args.split_at(num_fixed); - - // Replace the branch destination by the new Block created with no parameters, and restore - // the branch arguments, without the original Block parameters. - { - let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists); - let data = &mut dfg[branch]; - *data.branch_destination_mut().expect("branch") = new_block; - data.put_value_list(branch_args); - } - let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok(); - debug_assert!(ok); - - // Insert a jump to the original target with its arguments into the new block. - self.cur.goto_first_insertion_point(new_block); - self.cur.ins().jump(target, block_params); - - // Reset the cursor to point to the branch. - self.cur.goto_inst(branch); - } - } - - fn visit_terminator_branch(&mut self, inst: Inst) { - let inst_data = &self.cur.func.dfg[inst]; - let opcode = inst_data.opcode(); - if opcode != Opcode::Jump && opcode != Opcode::Fallthrough { - // This opcode is ignored as it does not have any block parameters. - if opcode != Opcode::IndirectJumpTableBr { - debug_assert!(!opcode.is_branch()) - } - return; - } - - let target = match inst_data { - InstructionData::Jump { destination, .. } => destination, - _ => panic!( - "Unexpected instruction {} in visit_terminator_branch", - self.cur.display_inst(inst) - ), - }; - debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator()); - - // If there are any parameters, split the edge. - if self.should_split_edge(*target) { - // Create the block the branch will jump to. - let new_block = self.cur.func.dfg.make_block(); - self.has_new_blocks = true; - - // Split the current block before its terminator, and insert a new jump instruction to - // jump to it. - let jump = self.cur.ins().jump(new_block, &[]); - self.cur.insert_block(new_block); - - // Reset the cursor to point to new terminator of the old block. - self.cur.goto_inst(jump); - } - } - - /// Returns whether we should introduce a new branch. - fn should_split_edge(&self, target: Block) -> bool { - // We should split the edge if the target has any parameters. - if !self.cur.func.dfg.block_params(target).is_empty() { - return true; - }; - - // Or, if the target has more than one block reaching it. - debug_assert!(self.cfg.pred_iter(target).next() != None); - - self.cfg.pred_iter(target).nth(1).is_some() - } -} diff --git a/cranelift/codegen/src/regalloc/coalescing.rs b/cranelift/codegen/src/regalloc/coalescing.rs deleted file mode 100644 index 512d77da0b..0000000000 --- a/cranelift/codegen/src/regalloc/coalescing.rs +++ /dev/null @@ -1,1106 +0,0 @@ -//! Constructing Conventional SSA form. -//! -//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related -//! values do not interfere. We construct CSSA by building virtual registers that are as large as -//! possible and inserting copies where necessary such that all argument values passed to a block -//! parameter will belong to the same virtual register as the block parameter value itself. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dbg::DisplayList; -use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::fx::FxHashMap; -use crate::ir::{self, InstBuilder, ProgramOrder}; -use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value}; -use crate::isa::{EncInfo, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::virtregs::{VirtReg, VirtRegs}; -use crate::timing; -use alloc::vec::Vec; -use core::cmp; -use core::fmt; -use core::iter; -use core::slice; - -// # Implementation -// -// The coalescing algorithm implemented follows this paper fairly closely: -// -// Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and -// live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534 -// -// We use a more efficient dominator forest representation (a linear stack) described here: -// -// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for -// correctness, code quality and efficiency. -// -// The algorithm has two main phases: -// -// Phase 1: Union-find. -// -// We use the union-find support in `VirtRegs` to build virtual registers such that block parameter -// values always belong to the same virtual register as their corresponding block arguments at the -// predecessor branches. Trivial interferences between parameter and argument value live ranges are -// detected and resolved before unioning congruence classes, but non-trivial interferences between -// values that end up in the same congruence class are possible. -// -// Phase 2: Dominator forests. -// -// The virtual registers formed in phase 1 can contain interferences that we need to detect and -// eliminate. By ordering the values in a virtual register according to a dominator tree pre-order, -// we can identify all interferences in the virtual register in linear time. -// -// Interfering values are isolated and virtual registers rebuilt. - -/// Data structures to be used by the coalescing pass. -pub struct Coalescing { - preorder: DominatorTreePreorder, - forest: DomForest, - vcopies: VirtualCopies, - values: Vec, - predecessors: Vec, - backedges: Vec, -} - -/// One-shot context created once per invocation. -struct Context<'a> { - isa: &'a dyn TargetIsa, - encinfo: EncInfo, - - func: &'a mut Function, - cfg: &'a ControlFlowGraph, - domtree: &'a DominatorTree, - preorder: &'a DominatorTreePreorder, - liveness: &'a mut Liveness, - virtregs: &'a mut VirtRegs, - - forest: &'a mut DomForest, - vcopies: &'a mut VirtualCopies, - values: &'a mut Vec, - predecessors: &'a mut Vec, - backedges: &'a mut Vec, -} - -impl Coalescing { - /// Create a new coalescing pass. - pub fn new() -> Self { - Self { - forest: DomForest::new(), - preorder: DominatorTreePreorder::new(), - vcopies: VirtualCopies::new(), - values: Vec::new(), - predecessors: Vec::new(), - backedges: Vec::new(), - } - } - - /// Clear all data structures in this coalescing pass. - pub fn clear(&mut self) { - self.forest.clear(); - self.vcopies.clear(); - self.values.clear(); - self.predecessors.clear(); - self.backedges.clear(); - } - - /// Convert `func` to Conventional SSA form and build virtual registers in the process. - pub fn conventional_ssa( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &ControlFlowGraph, - domtree: &DominatorTree, - liveness: &mut Liveness, - virtregs: &mut VirtRegs, - ) { - let _tt = timing::ra_cssa(); - log::trace!("Coalescing for:\n{}", func.display(isa)); - self.preorder.compute(domtree, &func.layout); - let mut context = Context { - isa, - encinfo: isa.encoding_info(), - func, - cfg, - domtree, - preorder: &self.preorder, - liveness, - virtregs, - forest: &mut self.forest, - vcopies: &mut self.vcopies, - values: &mut self.values, - predecessors: &mut self.predecessors, - backedges: &mut self.backedges, - }; - - // Run phase 1 (union-find) of the coalescing algorithm on the current function. - for &block in domtree.cfg_postorder() { - context.union_find_block(block); - } - context.finish_union_find(); - - // Run phase 2 (dominator forests) on the current function. - context.process_vregs(); - } -} - -/// Phase 1: Union-find. -/// -/// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`. -impl<'a> Context<'a> { - /// Run the union-find algorithm on the parameter values on `block`. - /// - /// This ensure that all block parameters will belong to the same virtual register as their - /// corresponding arguments at all predecessor branches. - pub fn union_find_block(&mut self, block: Block) { - let num_params = self.func.dfg.num_block_params(block); - if num_params == 0 { - return; - } - - self.isolate_conflicting_params(block, num_params); - - for i in 0..num_params { - self.union_pred_args(block, i); - } - } - - // Identify block parameter values that are live at one of the predecessor branches. - // - // Such a parameter value will conflict with any argument value at the predecessor branch, so - // it must be isolated by inserting a copy. - fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) { - debug_assert_eq!(num_params, self.func.dfg.num_block_params(block)); - // The only way a parameter value can interfere with a predecessor branch is if the block is - // dominating the predecessor branch. That is, we are looking for loop back-edges. - for BlockPredecessor { - block: pred_block, - inst: pred_inst, - } in self.cfg.pred_iter(block) - { - // The quick pre-order dominance check is accurate because the block parameter is defined - // at the top of the block before any branches. - if !self.preorder.dominates(block, pred_block) { - continue; - } - - log::trace!( - " - checking {} params at back-edge {}: {}", - num_params, - pred_block, - self.func.dfg.display_inst(pred_inst, self.isa) - ); - - // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values - // to be live at the use. - for i in 0..num_params { - let param = self.func.dfg.block_params(block)[i]; - if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) { - self.isolate_param(block, param); - } - } - } - } - - // Union block parameter value `num` with the corresponding block arguments on the predecessor - // branches. - // - // Detect cases where the argument value is live-in to `block` so it conflicts with any block - // parameter. Isolate the argument in those cases before unioning it with the parameter value. - fn union_pred_args(&mut self, block: Block, argnum: usize) { - let param = self.func.dfg.block_params(block)[argnum]; - - for BlockPredecessor { - block: pred_block, - inst: pred_inst, - } in self.cfg.pred_iter(block) - { - let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; - - // Never coalesce incoming function parameters on the stack. These parameters are - // pre-spilled, and the rest of the virtual register would be forced to spill to the - // `incoming_arg` stack slot too. - if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) { - if Some(def_block) == self.func.layout.entry_block() - && self.func.signature.params[def_num].location.is_stack() - { - log::trace!("-> isolating function stack parameter {}", arg); - let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); - self.virtregs.union(param, new_arg); - continue; - } - } - - // Check for basic interference: If `arg` overlaps a value defined at the entry to - // `block`, it can never be used as a block argument. - let interference = { - let lr = &self.liveness[arg]; - - // There are two ways the argument value can interfere with `block`: - // - // 1. It is defined in a dominating block and live-in to `block`. - // 2. If is itself a parameter value for `block`. This case should already have been - // eliminated by `isolate_conflicting_params()`. - debug_assert!( - lr.def() != block.into(), - "{} parameter {} was missed by isolate_conflicting_params()", - block, - arg - ); - - // The only other possibility is that `arg` is live-in to `block`. - lr.is_livein(block, &self.func.layout) - }; - - if interference { - let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); - self.virtregs.union(param, new_arg); - } else { - self.virtregs.union(param, arg); - } - } - } - - // Isolate block parameter value `param` on `block`. - // - // When `param=v10`: - // - // block1(v10: i32): - // foo - // - // becomes: - // - // block1(v11: i32): - // v10 = copy v11 - // foo - // - // This function inserts the copy and updates the live ranges of the old and new parameter - // values. Returns the new parameter value. - fn isolate_param(&mut self, block: Block, param: Value) -> Value { - debug_assert_eq!( - self.func.dfg.value_def(param).pp(), - ExpandedProgramPoint::Block(block) - ); - let ty = self.func.dfg.value_type(param); - let new_val = self.func.dfg.replace_block_param(param, ty); - - // Insert a copy instruction at the top of `block`. - let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block); - if let Some(inst) = pos.current_inst() { - pos.use_srcloc(inst); - } - pos.ins().with_result(param).copy(new_val); - let inst = pos.built_inst(); - self.liveness.move_def_locally(param, inst); - - log::trace!( - "-> inserted {}, following {}({}: {})", - pos.display_inst(inst), - block, - new_val, - ty - ); - - // Create a live range for the new value. - // TODO: Should we handle ghost values? - let affinity = Affinity::new( - &self - .encinfo - .operand_constraints(pos.func.encodings[inst]) - .expect("Bad copy encoding") - .outs[0], - ); - self.liveness.create_dead(new_val, block, affinity); - self.liveness - .extend_locally(new_val, block, inst, &pos.func.layout); - - new_val - } - - // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`. - // - // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block - // argument is `pred_val`. Since the argument value interferes with the corresponding block - // parameter at the destination, a copy is used instead: - // - // brnz v1, block2(v10) - // - // Becomes: - // - // v11 = copy v10 - // brnz v1, block2(v11) - // - // This way the interference with the block parameter is avoided. - // - // A live range for the new value is created while the live range for `pred_val` is left - // unaltered. - // - // The new argument value is returned. - fn isolate_arg( - &mut self, - pred_block: Block, - pred_inst: Inst, - argnum: usize, - pred_val: Value, - ) -> Value { - let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst); - pos.use_srcloc(pred_inst); - let copy = pos.ins().copy(pred_val); - let inst = pos.built_inst(); - - // Create a live range for the new value. - // TODO: Handle affinity for ghost values. - let affinity = Affinity::new( - &self - .encinfo - .operand_constraints(pos.func.encodings[inst]) - .expect("Bad copy encoding") - .outs[0], - ); - self.liveness.create_dead(copy, inst, affinity); - self.liveness - .extend_locally(copy, pred_block, pred_inst, &pos.func.layout); - - pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; - - log::trace!( - "-> inserted {}, before {}: {}", - pos.display_inst(inst), - pred_block, - pos.display_inst(pred_inst) - ); - - copy - } - - /// Finish the union-find part of the coalescing algorithm. - /// - /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric - /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`. - fn finish_union_find(&mut self) { - self.virtregs.finish_union_find(None); - log::trace!("After union-find phase:{}", self.virtregs); - } -} - -/// Phase 2: Dominator forests. -/// -/// The main entry point is `process_vregs()`. -impl<'a> Context<'a> { - /// Check al virtual registers for interference and fix conflicts. - pub fn process_vregs(&mut self) { - for vreg in self.virtregs.all_virtregs() { - self.process_vreg(vreg); - } - } - - // Check `vreg` for interferences and fix conflicts. - fn process_vreg(&mut self, vreg: VirtReg) { - if !self.check_vreg(vreg) { - self.synthesize_vreg(vreg); - } - } - - // Check `vreg` for interferences. - // - // We use a Budimlic dominator forest to check for interferences between the values in `vreg` - // and identify values that should be isolated. - // - // Returns true if `vreg` is free of interference. - fn check_vreg(&mut self, vreg: VirtReg) -> bool { - // Order the values according to the dominator pre-order of their definition. - let values = self.virtregs.sort_values(vreg, self.func, self.preorder); - log::trace!("Checking {} = {}", vreg, DisplayList(values)); - - // Now push the values in order to the dominator forest. - // This gives us the closest dominating value def for each of the values. - self.forest.clear(); - for &value in values { - let node = Node::value(value, 0, self.func); - - // Push this value and get the nearest dominating def back. - let parent = match self - .forest - .push_node(node, self.func, self.domtree, self.preorder) - { - None => continue, - Some(n) => n, - }; - - // Check for interference between `parent` and `value`. Since `parent` dominates - // `value`, we only have to check if it overlaps the definition. - if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) { - // The two values are interfering, so they can't be in the same virtual register. - log::trace!("-> interference: {} overlaps def of {}", parent, value); - return false; - } - } - - // No interference found. - true - } - - /// Destroy and rebuild `vreg` by iterative coalescing. - /// - /// When detecting that a virtual register formed in phase 1 contains interference, we have to - /// start over in a more careful way. We'll split the vreg into individual values and then - /// reassemble virtual registers using an iterative algorithm of pairwise merging. - /// - /// It is possible to recover multiple large virtual registers this way while still avoiding - /// a lot of copies. - fn synthesize_vreg(&mut self, vreg: VirtReg) { - self.vcopies.initialize( - self.virtregs.values(vreg), - self.func, - self.cfg, - self.preorder, - ); - log::trace!( - "Synthesizing {} from {} branches and params {}", - vreg, - self.vcopies.branches.len(), - DisplayList(&self.vcopies.params) - ); - self.virtregs.remove(vreg); - - while let Some(param) = self.vcopies.next_param() { - self.merge_param(param); - self.vcopies.merged_param(param, self.func); - } - } - - /// Merge block parameter value `param` with virtual registers at its predecessors. - fn merge_param(&mut self, param: Value) { - let (block, argnum) = match self.func.dfg.value_def(param) { - ir::ValueDef::Param(e, n) => (e, n), - ir::ValueDef::Result(_, _) => panic!("Expected parameter"), - }; - - // Collect all the predecessors and rearrange them. - // - // The order we process the predecessors matters because once one predecessor's virtual - // register is merged, it can cause interference with following merges. This means that the - // first predecessors processed are more likely to be copy-free. We want an ordering that - // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses - // instruction numbers which is not great for reproducible test cases. - // - // First merge loop back-edges in layout order, on the theory that shorter back-edges are - // more sensitive to inserted copies. - // - // Second everything else in reverse layout order. Again, short forward branches get merged - // first. There can also be backwards branches mixed in here, though, as long as they are - // not loop backedges. - debug_assert!(self.predecessors.is_empty()); - debug_assert!(self.backedges.is_empty()); - for BlockPredecessor { - block: pred_block, - inst: pred_inst, - } in self.cfg.pred_iter(block) - { - if self.preorder.dominates(block, pred_block) { - self.backedges.push(pred_inst); - } else { - self.predecessors.push(pred_inst); - } - } - // Order instructions in reverse order so we can pop them off the back. - { - let l = &self.func.layout; - self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a)); - self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b)); - self.predecessors.extend_from_slice(&self.backedges); - self.backedges.clear(); - } - - while let Some(pred_inst) = self.predecessors.pop() { - let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; - - // We want to merge the vreg containing `param` with the vreg containing `arg`. - if self.try_merge_vregs(param, arg) { - continue; - } - - // Can't merge because of interference. Insert a copy instead. - let pred_block = self.func.layout.pp_block(pred_inst); - let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); - self.virtregs - .insert_single(param, new_arg, self.func, self.preorder); - } - } - - /// Merge the virtual registers containing `param` and `arg` if possible. - /// - /// Use self.vcopies to check for virtual copy interference too. - /// - /// Returns true if the virtual registers are successfully merged. - fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool { - if self.virtregs.same_class(param, arg) { - return true; - } - - if !self.can_merge_vregs(param, arg) { - return false; - } - - let _vreg = self.virtregs.unify(self.values); - log::trace!("-> merged into {} = {}", _vreg, DisplayList(self.values)); - true - } - - /// Check if it is possible to merge two virtual registers. - /// - /// Also leave `self.values` with the ordered list of values in the merged vreg. - fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool { - // We only need an immutable function reference. - let func = &*self.func; - let domtree = self.domtree; - let preorder = self.preorder; - - // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties - // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`. - self.vcopies - .set_filter([param, arg], func, self.virtregs, preorder); - - // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual - // registers and the filtered virtual copies. - let v0 = self.virtregs.congruence_class(¶m); - let v1 = self.virtregs.congruence_class(&arg); - log::trace!( - " - set 0: {}\n - set 1: {}", - DisplayList(v0), - DisplayList(v1) - ); - let nodes = MergeNodes::new( - func, - preorder, - MergeNodes::new( - func, - preorder, - v0.iter().map(|&value| Node::value(value, 0, func)), - v1.iter().map(|&value| Node::value(value, 1, func)), - ), - self.vcopies.iter(func), - ); - - // Now push the values in order to the dominator forest. - // This gives us the closest dominating value def for each of the values. - self.forest.clear(); - self.values.clear(); - for node in nodes { - // Accumulate ordered values for the new vreg. - if node.is_value() { - self.values.push(node.value); - } - - // Push this value and get the nearest dominating def back. - let parent = match self.forest.push_node(node, func, domtree, preorder) { - None => { - if node.is_vcopy { - self.forest.pop_last(); - } - continue; - } - Some(n) => n, - }; - - if node.is_vcopy { - // Vcopy nodes don't represent interference if they are copies of the parent value. - // In that case, the node must be removed because the parent value can still be - // live belong the vcopy. - if parent.is_vcopy || node.value == parent.value { - self.forest.pop_last(); - continue; - } - - // Check if the parent value interferes with the virtual copy. - let inst = node.def.unwrap_inst(); - if node.set_id != parent.set_id - && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout) - { - log::trace!( - " - interference: {} overlaps vcopy at {}:{}", - parent, - node.block, - self.func.dfg.display_inst(inst, self.isa) - ); - return false; - } - - // Keep this vcopy on the stack. It will save us a few interference checks. - continue; - } - - // Parent vcopies never represent any interference. We only keep them on the stack to - // avoid an interference check against a value higher up. - if parent.is_vcopy { - continue; - } - - // Both node and parent are values, so check for interference. - debug_assert!(node.is_value() && parent.is_value()); - if node.set_id != parent.set_id - && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) - { - // The two values are interfering. - log::trace!(" - interference: {} overlaps def of {}", parent, node.value); - return false; - } - } - - // The values vector should receive all values. - debug_assert_eq!(v0.len() + v1.len(), self.values.len()); - - // No interference found. - true - } -} - -/// Dominator forest. -/// -/// This is a utility type used for detecting interference in virtual registers, where each virtual -/// register is a list of values ordered according to the dominator tree pre-order. -/// -/// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack -/// representation in the Boissinot paper. Our version of the linear stack is slightly modified -/// because we have a pre-order of the dominator tree at the block granularity, not basic block -/// granularity. -/// -/// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed, -/// `push_node` will return the nearest previously pushed value that dominates the definition. -#[allow(dead_code)] -struct DomForest { - // Stack representing the rightmost edge of the dominator forest so far, ending in the last - // element of `values`. - // - // At all times, the block of each element in the stack dominates the block of the next one. - stack: Vec, -} - -/// A node in the dominator forest. -#[derive(Clone, Copy, Debug)] -#[allow(dead_code)] -struct Node { - /// The program point where the live range is defined. - def: ExpandedProgramPoint, - /// block containing `def`. - block: Block, - /// Is this a virtual copy or a value? - is_vcopy: bool, - /// Set identifier. - set_id: u8, - /// For a value node: The value defined at `def`. - /// For a vcopy node: The relevant branch argument at `def`. - value: Value, -} - -impl Node { - /// Create a node representing `value`. - pub fn value(value: Value, set_id: u8, func: &Function) -> Self { - let def = func.dfg.value_def(value).pp(); - let block = func.layout.pp_block(def); - Self { - def, - block, - is_vcopy: false, - set_id, - value, - } - } - - /// Create a node representing a virtual copy. - pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self { - let def = branch.into(); - let block = func.layout.pp_block(def); - Self { - def, - block, - is_vcopy: true, - set_id, - value, - } - } - - /// IF this a value node? - pub fn is_value(&self) -> bool { - !self.is_vcopy - } -} - -impl fmt::Display for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_vcopy { - write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block) - } else { - write!(f, "{}:{}@{}", self.set_id, self.value, self.block) - } - } -} - -impl DomForest { - /// Create a new empty dominator forest. - pub fn new() -> Self { - Self { stack: Vec::new() } - } - - /// Clear all data structures in this dominator forest. - pub fn clear(&mut self) { - self.stack.clear(); - } - - /// Add a single node to the forest. - /// - /// Update the stack so its dominance invariants are preserved. Detect a parent node on the - /// stack which is the closest one dominating the new node and return it. - fn push_node( - &mut self, - node: Node, - func: &Function, - domtree: &DominatorTree, - preorder: &DominatorTreePreorder, - ) -> Option { - // The stack contains the current sequence of dominating defs. Pop elements until we - // find one whose block dominates `node.block`. - while let Some(top) = self.stack.pop() { - if preorder.dominates(top.block, node.block) { - // This is the right insertion spot for `node`. - self.stack.push(top); - self.stack.push(node); - - // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does - // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` - // program point may be below the last branch in `top.block` that dominates - // `node.def`. - // - // We do know, though, that if there is a nearest value dominating `node.def`, it - // will be on the stack. We just need to find the last stack entry that actually - // dominates. - let mut last_dom = node.def; - for &n in self.stack.iter().rev().skip(1) { - // If the node is defined at the block header, it does in fact dominate - // everything else pushed on the stack. - let def_inst = match n.def { - ExpandedProgramPoint::Block(_) => return Some(n), - ExpandedProgramPoint::Inst(i) => i, - }; - - // We need to find the last program point in `n.block` to dominate `node.def`. - last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) { - None => n.block.into(), - Some(inst) => { - if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater { - return Some(n); - } - inst.into() - } - }; - } - - // No real dominator found on the stack. - return None; - } - } - - // No dominators, start a new tree in the forest. - self.stack.push(node); - None - } - - pub fn pop_last(&mut self) { - self.stack.pop().expect("Stack is empty"); - } -} - -/// Virtual copies. -/// -/// When building a full virtual register at once, like phase 1 does with union-find, it is good -/// enough to check for interference between the values in the full virtual register like -/// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual -/// registers that don't represent the full transitive closure of the block argument-parameter -/// relation. This means that just checking for interference between values is inadequate. -/// -/// Example: -/// -/// v1 = iconst.i32 1 -/// brnz v10, block1(v1) -/// v2 = iconst.i32 2 -/// brnz v11, block1(v2) -/// return v1 -/// -/// block1(v3: i32): -/// v4 = iadd v3, v1 -/// -/// With just value interference checking, we could build the virtual register [v3, v1] since those -/// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2 -/// interfere. However, we can't resolve that interference either by inserting a copy: -/// -/// v1 = iconst.i32 1 -/// brnz v10, block1(v1) -/// v2 = iconst.i32 2 -/// v20 = copy v2 <-- new value -/// brnz v11, block1(v20) -/// return v1 -/// -/// block1(v3: i32): -/// v4 = iadd v3, v1 -/// -/// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We -/// shouldn't have placed v1 and v3 in the same virtual register to begin with. -/// -/// LLVM detects this form of interference by inserting copies in the predecessors of all phi -/// instructions, then attempting to delete the copies. This is quite expensive because it involves -/// creating a large number of copies and value. -/// -/// We'll detect this form of interference with *virtual copies*: Each block parameter value that -/// hasn't yet been fully merged with its block argument values is given a set of virtual copies at -/// the predecessors. Any candidate value to be merged is checked for interference against both the -/// virtual register and the virtual copies. -/// -/// In the general case, we're checking if two virtual registers can be merged, and both can -/// contain incomplete block parameter values with associated virtual copies. -/// -/// The `VirtualCopies` struct represents a set of incomplete parameters and their associated -/// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes -/// representing the virtual copies in both vregs. -struct VirtualCopies { - // Incomplete block parameters. These don't need to belong to the same virtual register. - params: Vec, - - // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks - // whose parameters can be found in `params`. - // - // Ordered by dominator tree pre-order of the branch instructions. - branches: Vec<(Inst, Block)>, - - // Filter for the currently active node iterator. - // - // A block => (set_id, num) entry means that branches to `block` are active in `set_id` with - // branch argument number `num`. - filter: FxHashMap, -} - -impl VirtualCopies { - /// Create an empty VirtualCopies struct. - pub fn new() -> Self { - Self { - params: Vec::new(), - branches: Vec::new(), - filter: FxHashMap(), - } - } - - /// Clear all state. - pub fn clear(&mut self) { - self.params.clear(); - self.branches.clear(); - self.filter.clear(); - } - - /// Initialize virtual copies from the (interfering) values in a union-find virtual register - /// that is going to be broken up and reassembled iteratively. - /// - /// The values are assumed to be in domtree pre-order. - /// - /// This will extract the block parameter values and associate virtual copies all of them. - pub fn initialize( - &mut self, - values: &[Value], - func: &Function, - cfg: &ControlFlowGraph, - preorder: &DominatorTreePreorder, - ) { - self.clear(); - - let mut last_block = None; - for &val in values { - if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) { - self.params.push(val); - - // We may have multiple parameters from the same block, but we only need to collect - // predecessors once. Also verify the ordering of values. - if let Some(last) = last_block { - match preorder.pre_cmp_block(last, block) { - cmp::Ordering::Less => {} - cmp::Ordering::Equal => continue, - cmp::Ordering::Greater => panic!("values in wrong order"), - } - } - - // This block hasn't been seen before. - for BlockPredecessor { - inst: pred_inst, .. - } in cfg.pred_iter(block) - { - self.branches.push((pred_inst, block)); - } - last_block = Some(block); - } - } - - // Reorder the predecessor branches as required by the dominator forest. - self.branches - .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout)); - } - - /// Get the next unmerged parameter value. - pub fn next_param(&self) -> Option { - self.params.last().cloned() - } - - /// Indicate that `param` is now fully merged. - pub fn merged_param(&mut self, param: Value, func: &Function) { - let popped = self.params.pop(); - debug_assert_eq!(popped, Some(param)); - - // The domtree pre-order in `self.params` guarantees that all parameters defined at the - // same block will be adjacent. This means we can see when all parameters at a block have been - // merged. - // - // We don't care about the last parameter - when that is merged we are done. - let last = match self.params.last() { - None => return, - Some(x) => *x, - }; - let block = func.dfg.value_def(param).unwrap_block(); - if func.dfg.value_def(last).unwrap_block() == block { - // We're not done with `block` parameters yet. - return; - } - - // Alright, we know there are no remaining `block` parameters in `self.params`. This means we - // can get rid of the `block` predecessors in `self.branches`. We don't have to, the - // `VCopyIter` will just skip them, but this reduces its workload. - self.branches.retain(|&(_, dest)| dest != block); - } - - /// Set a filter for the virtual copy nodes we're generating. - /// - /// Only generate nodes for parameter values that are in the same congruence class as `reprs`. - /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's - /// congruence class. - pub fn set_filter( - &mut self, - reprs: [Value; 2], - func: &Function, - virtregs: &VirtRegs, - preorder: &DominatorTreePreorder, - ) { - self.filter.clear(); - - // Parameters in `self.params` are ordered according to the domtree per-order, and they are - // removed from the back once they are fully merged. This means we can stop looking for - // parameters once we're beyond the last one. - let last_param = *self.params.last().expect("No more parameters"); - let limit = func.dfg.value_def(last_param).unwrap_block(); - - for (set_id, repr) in reprs.iter().enumerate() { - let set_id = set_id as u8; - for &value in virtregs.congruence_class(repr) { - if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) { - if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater { - // Stop once we're outside the bounds of `self.params`. - break; - } - self.filter.insert(block, (set_id, num)); - } - } - } - } - - /// Look up the set_id and argument number for `block` in the current filter. - /// - /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise - /// returns `(set_id, argnum)` for an active parameter defined at `block`. - fn lookup(&self, block: Block) -> Option<(u8, usize)> { - self.filter.get(&block).cloned() - } - - /// Get an iterator of dom-forest nodes corresponding to the current filter. - pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter { - VCopyIter { - func, - vcopies: self, - branches: self.branches.iter(), - } - } -} - -/// Virtual copy iterator. -/// -/// This iterator produces dom-forest nodes corresponding to the current filter in the virtual -/// copies container. -struct VCopyIter<'a> { - func: &'a Function, - vcopies: &'a VirtualCopies, - branches: slice::Iter<'a, (Inst, Block)>, -} - -impl<'a> Iterator for VCopyIter<'a> { - type Item = Node; - - fn next(&mut self) -> Option { - while let Some(&(branch, dest)) = self.branches.next() { - if let Some((set_id, argnum)) = self.vcopies.lookup(dest) { - let arg = self.func.dfg.inst_variable_args(branch)[argnum]; - return Some(Node::vcopy(branch, arg, set_id, self.func)); - } - } - None - } -} - -/// Node-merging iterator. -/// -/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. -struct MergeNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - a: iter::Peekable, - b: iter::Peekable, - layout: &'a ir::Layout, - preorder: &'a DominatorTreePreorder, -} - -impl<'a, IA, IB> MergeNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self { - MergeNodes { - a: a.peekable(), - b: b.peekable(), - layout: &func.layout, - preorder, - } - } -} - -impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - type Item = Node; - - fn next(&mut self) -> Option { - let ord = match (self.a.peek(), self.b.peek()) { - (Some(a), Some(b)) => { - let layout = self.layout; - self.preorder - .pre_cmp_block(a.block, b.block) - .then_with(|| layout.cmp(a.def, b.def)) - } - (Some(_), None) => cmp::Ordering::Less, - (None, Some(_)) => cmp::Ordering::Greater, - (None, None) => return None, - }; - // When the nodes compare equal, prefer the `a` side. - if ord != cmp::Ordering::Greater { - self.a.next() - } else { - self.b.next() - } - } -} diff --git a/cranelift/codegen/src/regalloc/coloring.rs b/cranelift/codegen/src/regalloc/coloring.rs deleted file mode 100644 index 2226784b25..0000000000 --- a/cranelift/codegen/src/regalloc/coloring.rs +++ /dev/null @@ -1,1323 +0,0 @@ -//! Register allocator coloring pass. -//! -//! The coloring pass assigns a physical register to every SSA value with a register affinity, -//! under the assumption that the register pressure has been lowered sufficiently by spilling and -//! splitting. -//! -//! # Preconditions -//! -//! The coloring pass doesn't work on arbitrary code. Certain preconditions must be satisfied: -//! -//! 1. All instructions must be legalized and assigned an encoding. The encoding recipe guides the -//! register assignments and provides exact constraints. -//! -//! 2. Instructions with tied operands must be in a coloring-friendly state. Specifically, the -//! values used by the tied operands must be killed by the instruction. This can be achieved by -//! inserting a `copy` to a new value immediately before the two-address instruction. -//! -//! 3. If a value is bound to more than one operand on the same instruction, the operand -//! constraints must be compatible. This can also be achieved by inserting copies so the -//! incompatible operands get different values. -//! -//! 4. The register pressure must be lowered sufficiently by inserting spill code. Register -//! operands are allowed to read spilled values, but each such instance must be counted as using -//! a register. -//! -//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed -//! as arguments when branching to a block must belong to the same virtual register as the -//! corresponding block argument value. -//! -//! # Iteration order -//! -//! The SSA property guarantees that whenever the live range of two values overlap, one of the -//! values will be live at the definition point of the other value. If we visit the instructions in -//! a topological order relative to the dominance relation, we can assign colors to the values -//! defined by the instruction and only consider the colors of other values that are live at the -//! instruction. -//! -//! The first time we see a branch to a block, the block's argument values are colored to match the -//! registers currently holding branch argument values passed to the predecessor branch. By -//! visiting blocks in a CFG topological order, we guarantee that at least one predecessor branch has -//! been visited before the destination block. Therefore, the block's arguments are already colored. -//! -//! The exception is the entry block whose arguments are colored from the ABI requirements. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{ArgumentLoc, InstBuilder, ValueDef}; -use crate::ir::{Block, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc}; -use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit}; -use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa}; -use crate::packed_option::PackedOption; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::diversion::RegDiversions; -use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::liverange::LiveRange; -use crate::regalloc::register_set::RegisterSet; -use crate::regalloc::solver::{Solver, SolverError}; -use crate::timing; -use core::mem; - -/// Data structures for the coloring pass. -/// -/// These are scratch space data structures that can be reused between invocations. -pub struct Coloring { - divert: RegDiversions, - solver: Solver, -} - -/// Kinds of ABI parameters. -enum AbiParams { - Parameters(SigRef), - Returns, -} - -/// Bundle of references that the coloring algorithm needs. -/// -/// Some of the needed mutable references are passed around as explicit function arguments so we -/// can avoid many fights with the borrow checker over mutable borrows of `self`. This includes the -/// `Function` and `LiveValueTracker` references. -/// -/// Immutable context information and mutable references that don't need to be borrowed across -/// method calls should go in this struct. -struct Context<'a> { - // Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - // Cached ISA information. - // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. - reginfo: RegInfo, - encinfo: EncInfo, - - // References to contextual data structures we need. - cfg: &'a ControlFlowGraph, - domtree: &'a DominatorTree, - liveness: &'a mut Liveness, - - // References to working set data structures. - // If we need to borrow out of a data structure across a method call, it must be passed as a - // function argument instead, see the `LiveValueTracker` arguments. - divert: &'a mut RegDiversions, - solver: &'a mut Solver, - - // Pristine set of registers that the allocator can use. - // This set remains immutable, we make clones. - usable_regs: RegisterSet, - - uses_pinned_reg: bool, -} - -impl Coloring { - /// Allocate scratch space data structures for the coloring pass. - pub fn new() -> Self { - Self { - divert: RegDiversions::new(), - solver: Solver::new(), - } - } - - /// Clear all data structures in this coloring pass. - pub fn clear(&mut self) { - self.divert.clear(); - self.solver.clear(); - } - - /// Run the coloring algorithm over `func`. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &ControlFlowGraph, - domtree: &DominatorTree, - liveness: &mut Liveness, - tracker: &mut LiveValueTracker, - ) { - let _tt = timing::ra_coloring(); - log::trace!("Coloring for:\n{}", func.display(isa)); - let mut ctx = Context { - usable_regs: isa.allocatable_registers(func), - uses_pinned_reg: isa.flags().enable_pinned_reg(), - cur: EncCursor::new(func, isa), - reginfo: isa.register_info(), - encinfo: isa.encoding_info(), - cfg, - domtree, - liveness, - divert: &mut self.divert, - solver: &mut self.solver, - }; - ctx.run(tracker) - } -} - -impl<'a> Context<'a> { - /// Is the pinned register usage enabled, and is this register the pinned register? - #[inline] - fn is_pinned_reg(&self, rc: RegClass, reg: RegUnit) -> bool { - rc.is_pinned_reg(self.uses_pinned_reg, reg) - } - - /// Run the coloring algorithm. - fn run(&mut self, tracker: &mut LiveValueTracker) { - self.cur - .func - .locations - .resize(self.cur.func.dfg.num_values()); - - // Visit blocks in reverse post-order. We need to ensure that at least one predecessor has - // been visited before each block. That guarantees that the block arguments have been colored. - for &block in self.domtree.cfg_postorder().iter().rev() { - self.visit_block(block, tracker); - } - } - - /// Visit `block`, assuming that the immediate dominator has already been visited. - fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Coloring {}:", block); - let mut regs = self.visit_block_header(block, tracker); - tracker.drop_dead_params(); - - // Now go through the instructions in `block` and color the values they define. - self.cur.goto_top(block); - while let Some(inst) = self.cur.next_inst() { - self.cur.use_srcloc(inst); - let opcode = self.cur.func.dfg[inst].opcode(); - if !opcode.is_ghost() { - // This is an instruction which either has an encoding or carries ABI-related - // register allocation constraints. - let enc = self.cur.func.encodings[inst]; - let constraints = self.encinfo.operand_constraints(enc); - if self.visit_inst(inst, constraints, tracker, &mut regs) { - self.replace_global_defines(inst, tracker); - // Restore cursor location after `replace_global_defines` moves it. - // We want to revisit the copy instructions it inserted. - self.cur.goto_inst(inst); - } - } else { - // This is a ghost instruction with no encoding and no extra constraints. - let (_throughs, kills) = tracker.process_ghost(inst); - self.process_ghost_kills(kills, &mut regs); - } - tracker.drop_dead(inst); - - // We are not able to insert any regmove for diversion or un-diversion after the first - // branch. Instead, we record the diversion to be restored at the entry of the next block, - // which should have a single predecessor. - if opcode.is_branch() { - // The next instruction is necessarily an unconditional branch. - if let Some(branch) = self.cur.next_inst() { - log::trace!( - "Skip coloring {}\n from {}\n with diversions {}", - self.cur.display_inst(branch), - regs.input.display(&self.reginfo), - self.divert.display(&self.reginfo) - ); - use crate::ir::instructions::BranchInfo::*; - let target = match self.cur.func.dfg.analyze_branch(branch) { - NotABranch | Table(_, _) => panic!( - "unexpected instruction {} after a conditional branch", - self.cur.display_inst(branch) - ), - SingleDest(block, _) => block, - }; - - // We have a single branch with a single target, and a block with a single - // predecessor. Thus we can forward the diversion set to the next block. - if self.cfg.pred_iter(target).count() == 1 { - // Transfer the diversion to the next block. - self.divert - .save_for_block(&mut self.cur.func.entry_diversions, target); - log::trace!( - "Set entry-diversion for {} to\n {}", - target, - self.divert.display(&self.reginfo) - ); - } else { - debug_assert!( - self.divert.is_empty(), - "Divert set is non-empty after the terminator." - ); - } - assert_eq!( - self.cur.next_inst(), - None, - "Unexpected instruction after a branch group." - ); - } else { - assert!(opcode.is_terminator()); - } - } - } - } - - /// Visit the `block` header. - /// - /// Initialize the set of live registers and color the arguments to `block`. - fn visit_block_header( - &mut self, - block: Block, - tracker: &mut LiveValueTracker, - ) -> AvailableRegs { - // Reposition the live value tracker and deal with the block arguments. - tracker.block_top( - block, - &self.cur.func.dfg, - self.liveness, - &self.cur.func.layout, - self.domtree, - ); - - // Copy the content of the registered diversions to be reused at the - // entry of this basic block. - self.divert.at_block(&self.cur.func.entry_diversions, block); - log::trace!( - "Start {} with entry-diversion set to\n {}", - block, - self.divert.display(&self.reginfo) - ); - - if self.cur.func.layout.entry_block() == Some(block) { - // Parameters on the entry block have ABI constraints. - self.color_entry_params(tracker.live()) - } else { - // The live-ins and parameters of a non-entry block have already been assigned a register. - // Reconstruct the allocatable set. - self.livein_regs(tracker.live()) - } - } - - /// Initialize a set of allocatable registers from the values that are live-in to a block. - /// These values must already be colored when the dominating blocks were processed. - /// - /// Also process the block arguments which were colored when the first predecessor branch was - /// encountered. - fn livein_regs(&self, live: &[LiveValue]) -> AvailableRegs { - // Start from the registers that are actually usable. We don't want to include any reserved - // registers in the set. - let mut regs = AvailableRegs::new(&self.usable_regs); - - for lv in live.iter().filter(|lv| !lv.is_dead) { - log::trace!( - "Live-in: {}:{} in {}", - lv.value, - lv.affinity.display(&self.reginfo), - self.divert - .get(lv.value, &self.cur.func.locations) - .display(&self.reginfo) - ); - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let loc = self.cur.func.locations[lv.value]; - let reg = match loc { - ValueLoc::Reg(reg) => reg, - ValueLoc::Unassigned => panic!("Live-in {} wasn't assigned", lv.value), - ValueLoc::Stack(ss) => { - panic!("Live-in {} is in {}, should be register", lv.value, ss) - } - }; - if lv.is_local { - regs.take(rc, reg, lv.is_local); - } else { - let loc = self.divert.get(lv.value, &self.cur.func.locations); - let reg_divert = match loc { - ValueLoc::Reg(reg) => reg, - ValueLoc::Unassigned => { - panic!("Diversion: Live-in {} wasn't assigned", lv.value) - } - ValueLoc::Stack(ss) => panic!( - "Diversion: Live-in {} is in {}, should be register", - lv.value, ss - ), - }; - regs.take_divert(rc, reg, reg_divert); - } - } - } - - regs - } - - /// Color the parameters on the entry block. - /// - /// These are function parameters that should already have assigned register units in the - /// function signature. - /// - /// Return the set of remaining allocatable registers after filtering out the dead arguments. - fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs { - let sig = &self.cur.func.signature; - debug_assert_eq!(sig.params.len(), args.len()); - - let mut regs = AvailableRegs::new(&self.usable_regs); - - for (lv, abi) in args.iter().zip(&sig.params) { - match lv.affinity { - Affinity::Reg(rci) => { - let rc = self.reginfo.rc(rci); - if let ArgumentLoc::Reg(reg) = abi.location { - if !lv.is_dead { - regs.take(rc, reg, lv.is_local); - } - self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); - } else { - // This should have been fixed by the reload pass. - panic!( - "Entry arg {} has {} affinity, but ABI {}", - lv.value, - lv.affinity.display(&self.reginfo), - abi.display(&self.reginfo) - ); - } - } - // The spiller will have assigned an incoming stack slot already. - Affinity::Stack => debug_assert!(abi.location.is_stack()), - // This is a ghost value, unused in the function. Don't assign it to a location - // either. - Affinity::Unassigned => {} - } - } - - regs - } - - /// Program the input-side ABI constraints for `inst` into the constraint solver. - /// - /// ABI constraints are the fixed register assignments useds for calls and returns. - fn program_input_abi(&mut self, inst: Inst, abi_params: AbiParams) { - let abi_types = match abi_params { - AbiParams::Parameters(sig) => &self.cur.func.dfg.signatures[sig].params, - AbiParams::Returns => &self.cur.func.signature.returns, - }; - - for (abi, &value) in abi_types - .iter() - .zip(self.cur.func.dfg.inst_variable_args(inst)) - { - if let ArgumentLoc::Reg(reg) = abi.location { - if let Affinity::Reg(rci) = self - .liveness - .get(value) - .expect("ABI register must have live range") - .affinity - { - let rc = self.reginfo.rc(rci); - let cur_reg = self.divert.reg(value, &self.cur.func.locations); - self.solver.reassign_in(value, rc, cur_reg, reg); - } else { - panic!("ABI argument {} should be in a register", value); - } - } - } - } - - /// Color the values defined by `inst` and insert any necessary shuffle code to satisfy - /// instruction constraints. - /// - /// Update `regs` to reflect the allocated registers after `inst`, including removing any dead - /// or killed values from the set. - /// - /// Returns true when the global values defined by `inst` must be replaced by local values. - fn visit_inst( - &mut self, - inst: Inst, - constraints: Option<&RecipeConstraints>, - tracker: &mut LiveValueTracker, - regs: &mut AvailableRegs, - ) -> bool { - log::trace!( - "Coloring {}\n from {}", - self.cur.display_inst(inst), - regs.input.display(&self.reginfo), - ); - - // block whose arguments should be colored to match the current branch instruction's - // arguments. - let mut color_dest_args = None; - - // Program the solver with register constraints for the input side. - self.solver.reset(®s.input); - - if let Some(constraints) = constraints { - self.program_input_constraints(inst, constraints.ins); - } - - let call_sig = self.cur.func.dfg.call_signature(inst); - if let Some(sig) = call_sig { - self.program_input_abi(inst, AbiParams::Parameters(sig)); - } else if self.cur.func.dfg[inst].opcode().is_return() { - self.program_input_abi(inst, AbiParams::Returns); - } else if self.cur.func.dfg[inst].opcode().is_branch() { - // This is a branch, so we need to make sure that globally live values are in their - // global registers. For blocks that take arguments, we also need to place the argument - // values in the expected registers. - if let Some(dest) = self.cur.func.dfg[inst].branch_destination() { - if self.program_block_arguments(inst, dest) { - color_dest_args = Some(dest); - } - } else { - // This is a multi-way branch like `br_table`. We only support arguments on - // single-destination branches. - debug_assert_eq!( - self.cur.func.dfg.inst_variable_args(inst).len(), - 0, - "Can't handle block arguments: {}", - self.cur.display_inst(inst) - ); - self.undivert_regs(|lr, _| !lr.is_local()); - } - } - - if self.solver.has_fixed_input_conflicts() { - self.divert_fixed_input_conflicts(tracker.live()); - } - - self.solver.inputs_done(); - - // Update the live value tracker with this instruction. - let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - - // Get rid of the killed values. - for lv in kills { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let reg = self.divert.reg(lv.value, &self.cur.func.locations); - - if self.is_pinned_reg(rc, reg) { - // Don't kill the pinned reg, either in the local or global register sets. - debug_assert!(lv.is_local, "pinned register SSA value can't be global"); - continue; - } - - log::trace!( - " kill {} in {} ({} {})", - lv.value, - self.reginfo.display_regunit(reg), - if lv.is_local { "local" } else { "global" }, - rc - ); - self.solver.add_kill(lv.value, rc, reg); - - // Update the global register set which has no diversions. - if !lv.is_local { - regs.global - .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); - } - } - } - - // This aligns with the " from" line at the top of the function. - log::trace!(" glob {}", regs.global.display(&self.reginfo)); - - // This flag is set when the solver failed to find a solution for the global defines that - // doesn't interfere with `regs.global`. We need to rewrite all of `inst`s global defines - // as local defines followed by copies. - let mut replace_global_defines = false; - - // Program the fixed output constraints before the general defines. This allows us to - // detect conflicts between fixed outputs and tied operands where the input value hasn't - // been converted to a solver variable. - if let Some(constraints) = constraints { - if constraints.fixed_outs { - self.program_fixed_outputs( - constraints.outs, - defs, - throughs, - &mut replace_global_defines, - ®s.global, - ); - } - } - - if let Some(sig) = call_sig { - self.program_output_abi( - sig, - defs, - throughs, - &mut replace_global_defines, - ®s.global, - ); - } - - if let Some(constraints) = constraints { - self.program_output_constraints( - inst, - constraints.outs, - defs, - &mut replace_global_defines, - ®s.global, - ); - } - - // Finally, we've fully programmed the constraint solver. - // We expect a quick solution in most cases. - let is_reload = match &self.cur.func.dfg[inst] { - InstructionData::Unary { - opcode: Opcode::Fill, - .. - } => true, - _ => false, - }; - - let output_regs = self - .solver - .quick_solve(®s.global, is_reload) - .unwrap_or_else(|_| { - log::trace!("quick_solve failed for {}", self.solver); - self.iterate_solution( - throughs, - ®s.global, - &mut replace_global_defines, - is_reload, - ) - }); - - // The solution and/or fixed input constraints may require us to shuffle the set of live - // registers around. - self.shuffle_inputs(&mut regs.input); - - // If this is the first time we branch to `dest`, color its arguments to match the current - // register state. - if let Some(dest) = color_dest_args { - self.color_block_params(inst, dest); - } - - // Apply the solution to the defs. - for v in self.solver.vars().iter().filter(|&v| v.is_define()) { - self.cur.func.locations[v.value] = ValueLoc::Reg(v.solution); - } - - // Tied defs are not part of the solution above. - // Copy register assignments from tied inputs to tied outputs. - if let Some(constraints) = constraints { - if constraints.tied_ops { - for (constraint, lv) in constraints.outs.iter().zip(defs) { - if let ConstraintKind::Tied(num) = constraint.kind { - let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; - let reg = self.divert.reg(arg, &self.cur.func.locations); - self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); - } - } - } - } - - // Update `regs` for the next instruction. - regs.input = output_regs; - for lv in defs { - let loc = self.cur.func.locations[lv.value]; - log::trace!( - " color {} -> {}{}", - lv.value, - loc.display(&self.reginfo), - if lv.is_local { - "" - } else if replace_global_defines { - " (global to be replaced)" - } else { - " (global)" - } - ); - - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let reg = loc.unwrap_reg(); - - debug_assert!( - !self.is_pinned_reg(rc, reg) - || self.cur.func.dfg[inst].opcode() == Opcode::GetPinnedReg, - "pinned register may not be part of outputs for '{}'.", - self.cur.func.dfg[inst].opcode() - ); - - if self.is_pinned_reg(rc, reg) { - continue; - } - - // Remove the dead defs. - if lv.endpoint == inst { - regs.input.free(rc, reg); - debug_assert!(lv.is_local); - } - - // Track globals in their undiverted locations. - if !lv.is_local && !replace_global_defines { - regs.global.take(rc, reg); - } - } - } - - self.forget_diverted(kills); - - replace_global_defines - } - - /// Program the input-side constraints for `inst` into the constraint solver. - fn program_input_constraints(&mut self, inst: Inst, constraints: &[OperandConstraint]) { - for (constraint, &arg_val) in constraints - .iter() - .zip(self.cur.func.dfg.inst_args(inst)) - .filter(|&(constraint, _)| constraint.kind != ConstraintKind::Stack) - { - // Reload pass is supposed to ensure that all arguments to register operands are - // already in a register. - let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); - match constraint.kind { - ConstraintKind::FixedReg(regunit) => { - // Add the fixed constraint even if `cur_reg == regunit`. - // It is possible that we will want to convert the value to a variable later, - // and this identity assignment prevents that from happening. - self.solver - .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); - } - ConstraintKind::FixedTied(regunit) => { - // The pinned register may not be part of a fixed tied requirement. If this - // becomes the case, then it must be changed to a different register. - debug_assert!( - !self.is_pinned_reg(constraint.regclass, regunit), - "see comment above" - ); - // See comment right above. - self.solver - .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); - } - ConstraintKind::Tied(_) => { - if self.is_pinned_reg(constraint.regclass, cur_reg) { - // Divert the pinned register; it shouldn't be reused for a tied input. - if self.solver.can_add_var(constraint.regclass, cur_reg) { - self.solver.add_var(arg_val, constraint.regclass, cur_reg); - } - } else if !constraint.regclass.contains(cur_reg) { - self.solver.add_var(arg_val, constraint.regclass, cur_reg); - } - } - ConstraintKind::Reg => { - if !constraint.regclass.contains(cur_reg) { - self.solver.add_var(arg_val, constraint.regclass, cur_reg); - } - } - ConstraintKind::Stack => unreachable!(), - } - } - } - - /// Program the complete set of input constraints into the solver. - /// - /// The `program_input_constraints()` function above will not tell the solver about any values - /// that are already assigned to appropriate registers. This is normally fine, but if we want - /// to add additional variables to help the solver, we need to make sure that they are - /// constrained properly. - /// - /// This function completes the work of `program_input_constraints()` by calling `add_var` for - /// all values used by the instruction. - fn program_complete_input_constraints(&mut self) { - let inst = self.cur.current_inst().expect("Not on an instruction"); - let constraints = self - .encinfo - .operand_constraints(self.cur.func.encodings[inst]) - .expect("Current instruction not encoded") - .ins; - - for (constraint, &arg_val) in constraints.iter().zip(self.cur.func.dfg.inst_args(inst)) { - match constraint.kind { - ConstraintKind::Reg | ConstraintKind::Tied(_) => { - let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); - - // This is the opposite condition of `program_input_constraints()`. The pinned - // register mustn't be added back as a variable. - if constraint.regclass.contains(cur_reg) - && !self.is_pinned_reg(constraint.regclass, cur_reg) - { - // This code runs after calling `solver.inputs_done()` so we must identify - // the new variable as killed or live-through. - let layout = &self.cur.func.layout; - if self.liveness[arg_val].killed_at(inst, layout.pp_block(inst), layout) { - self.solver - .add_killed_var(arg_val, constraint.regclass, cur_reg); - } else { - self.solver - .add_through_var(arg_val, constraint.regclass, cur_reg); - } - } - } - ConstraintKind::FixedReg(_) - | ConstraintKind::FixedTied(_) - | ConstraintKind::Stack => {} - } - } - } - - /// Prepare for a branch to `dest`. - /// - /// 1. Any values that are live-in to `dest` must be un-diverted so they live in their globally - /// assigned register. - /// 2. If the `dest` block takes arguments, reassign the branch argument values to the matching - /// registers. - /// - /// Returns true if this is the first time a branch to `dest` is seen, so the `dest` argument - /// values should be colored after `shuffle_inputs`. - fn program_block_arguments(&mut self, inst: Inst, dest: Block) -> bool { - // Find diverted registers that are live-in to `dest` and reassign them to their global - // home. - // - // Values with a global live range that are not live in to `dest` could appear as branch - // arguments, so they can't always be un-diverted. - self.undivert_regs(|lr, layout| lr.is_livein(dest, layout)); - - // Now handle the block arguments. - let br_args = self.cur.func.dfg.inst_variable_args(inst); - let dest_args = self.cur.func.dfg.block_params(dest); - debug_assert_eq!(br_args.len(), dest_args.len()); - for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { - // The first time we encounter a branch to `dest`, we get to pick the location. The - // following times we see a branch to `dest`, we must follow suit. - match self.cur.func.locations[dest_arg] { - ValueLoc::Unassigned => { - // This is the first branch to `dest`, so we should color `dest_arg` instead of - // `br_arg`. However, we don't know where `br_arg` will end up until - // after `shuffle_inputs`. See `color_block_params` below. - // - // It is possible for `dest_arg` to have no affinity, and then it should simply - // be ignored. - if self.liveness[dest_arg].affinity.is_reg() { - return true; - } - } - ValueLoc::Reg(dest_reg) => { - // We've branched to `dest` before. Make sure we use the correct argument - // registers by reassigning `br_arg`. - if let Affinity::Reg(rci) = self.liveness[br_arg].affinity { - let rc = self.reginfo.rc(rci); - let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); - self.solver.reassign_in(br_arg, rc, br_reg, dest_reg); - } else { - panic!("Branch argument {} is not in a register", br_arg); - } - } - ValueLoc::Stack(ss) => { - // The spiller should already have given us identical stack slots. - debug_assert_eq!(ValueLoc::Stack(ss), self.cur.func.locations[br_arg]); - } - } - } - - // No `dest` arguments need coloring. - false - } - - /// Knowing that we've never seen a branch to `dest` before, color its parameters to match our - /// register state. - /// - /// This function is only called when `program_block_arguments()` returned `true`. - fn color_block_params(&mut self, inst: Inst, dest: Block) { - let br_args = self.cur.func.dfg.inst_variable_args(inst); - let dest_args = self.cur.func.dfg.block_params(dest); - debug_assert_eq!(br_args.len(), dest_args.len()); - for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { - match self.cur.func.locations[dest_arg] { - ValueLoc::Unassigned => { - if self.liveness[dest_arg].affinity.is_reg() { - let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); - self.cur.func.locations[dest_arg] = ValueLoc::Reg(br_reg); - } - } - ValueLoc::Reg(_) => panic!("{} arg {} already colored", dest, dest_arg), - // Spilled value consistency is verified by `program_block_arguments()` above. - ValueLoc::Stack(_) => {} - } - } - } - - /// Find all diverted registers where `pred` returns `true` and undo their diversion so they - /// are reallocated to their global register assignments. - fn undivert_regs(&mut self, mut pred: Pred) - where - Pred: FnMut(&LiveRange, &Layout) -> bool, - { - for (&value, rdiv) in self.divert.iter() { - let lr = self - .liveness - .get(value) - .expect("Missing live range for diverted register"); - if pred(lr, &self.cur.func.layout) { - if let Affinity::Reg(rci) = lr.affinity { - let rc = self.reginfo.rc(rci); - // Stack diversions should not be possible here. They only live transiently - // during `shuffle_inputs()`. - self.solver.reassign_in( - value, - rc, - rdiv.to.unwrap_reg(), - rdiv.from.unwrap_reg(), - ); - } else { - panic!( - "Diverted register {} with {} affinity", - value, - lr.affinity.display(&self.reginfo) - ); - } - } - } - } - - /// Find existing live values that conflict with the fixed input register constraints programmed - /// into the constraint solver. Convert them to solver variables so they can be diverted. - fn divert_fixed_input_conflicts(&mut self, live: &[LiveValue]) { - for lv in live { - if let Affinity::Reg(rci) = lv.affinity { - let toprc = self.reginfo.toprc(rci); - let reg = self.divert.reg(lv.value, &self.cur.func.locations); - if self.solver.is_fixed_input_conflict(toprc, reg) { - log::trace!( - "adding var to divert fixed input conflict for {}", - toprc.info.display_regunit(reg) - ); - self.solver.add_var(lv.value, toprc, reg); - } - } - } - } - - /// Program any fixed-register output constraints into the solver. This may also detect - /// conflicts between live-through registers and fixed output registers. These live-through - /// values need to be turned into solver variables so they can be reassigned. - fn program_fixed_outputs( - &mut self, - constraints: &[OperandConstraint], - defs: &[LiveValue], - throughs: &[LiveValue], - replace_global_defines: &mut bool, - global_regs: &RegisterSet, - ) { - for (constraint, lv) in constraints.iter().zip(defs) { - match constraint.kind { - ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => { - self.add_fixed_output(lv.value, constraint.regclass, reg, throughs); - if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { - log::trace!( - "Fixed output {} in {}:{} is not available in global regs", - lv.value, - constraint.regclass, - self.reginfo.display_regunit(reg) - ); - *replace_global_defines = true; - } - } - ConstraintKind::Reg | ConstraintKind::Tied(_) | ConstraintKind::Stack => {} - } - } - } - - /// Program the output-side ABI constraints for `inst` into the constraint solver. - /// - /// That means return values for a call instruction. - fn program_output_abi( - &mut self, - sig: SigRef, - defs: &[LiveValue], - throughs: &[LiveValue], - replace_global_defines: &mut bool, - global_regs: &RegisterSet, - ) { - // It's technically possible for a call instruction to have fixed results before the - // variable list of results, but we have no known instances of that. - // Just assume all results are variable return values. - debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len()); - for (i, lv) in defs.iter().enumerate() { - let abi = self.cur.func.dfg.signatures[sig].returns[i]; - if let ArgumentLoc::Reg(reg) = abi.location { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - self.add_fixed_output(lv.value, rc, reg, throughs); - if !lv.is_local && !global_regs.is_avail(rc, reg) { - log::trace!( - "ABI output {} in {}:{} is not available in global regs", - lv.value, - rc, - self.reginfo.display_regunit(reg) - ); - *replace_global_defines = true; - } - } else { - panic!("ABI argument {} should be in a register", lv.value); - } - } - } - } - - /// Add a single fixed output value to the solver. - fn add_fixed_output( - &mut self, - value: Value, - rc: RegClass, - reg: RegUnit, - throughs: &[LiveValue], - ) { - // Pinned register is already unavailable in the solver, since it is copied in the - // available registers on entry. - if !self.is_pinned_reg(rc, reg) && !self.solver.add_fixed_output(rc, reg) { - // The fixed output conflicts with some of the live-through registers. - for lv in throughs { - if let Affinity::Reg(rci) = lv.affinity { - let toprc2 = self.reginfo.toprc(rci); - let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); - if regs_overlap(rc, reg, toprc2, reg2) { - // This live-through value is interfering with the fixed output assignment. - // Convert it to a solver variable. - self.solver.add_through_var(lv.value, toprc2, reg2); - } - } - } - - let ok = self.solver.add_fixed_output(rc, reg); - debug_assert!(ok, "Couldn't clear fixed output interference for {}", value); - } - self.cur.func.locations[value] = ValueLoc::Reg(reg); - } - - /// Program the output-side constraints for `inst` into the constraint solver. - /// - /// It is assumed that all fixed outputs have already been handled. - fn program_output_constraints( - &mut self, - inst: Inst, - constraints: &[OperandConstraint], - defs: &[LiveValue], - replace_global_defines: &mut bool, - global_regs: &RegisterSet, - ) { - for (constraint, lv) in constraints.iter().zip(defs) { - match constraint.kind { - ConstraintKind::FixedReg(_) - | ConstraintKind::FixedTied(_) - | ConstraintKind::Stack => continue, - ConstraintKind::Reg => { - self.solver - .add_def(lv.value, constraint.regclass, !lv.is_local); - } - ConstraintKind::Tied(num) => { - // Find the input operand we're tied to. - // The solver doesn't care about the output value. - let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; - let reg = self.divert.reg(arg, &self.cur.func.locations); - - if let Some(reg) = - self.solver - .add_tied_input(arg, constraint.regclass, reg, !lv.is_local) - { - // The value we're tied to has been assigned to a fixed register. - // We need to make sure that fixed output register is compatible with the - // global register set. - if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { - log::trace!( - "Tied output {} in {}:{} is not available in global regs", - lv.value, - constraint.regclass, - self.reginfo.display_regunit(reg) - ); - *replace_global_defines = true; - } - } - } - } - } - } - - /// Try harder to find a solution to the constraint problem since `quick_solve()` failed. - /// - /// We may need to move more registers around before a solution is possible. Use an iterative - /// algorithm that adds one more variable until a solution can be found. - fn iterate_solution( - &mut self, - throughs: &[LiveValue], - global_regs: &RegisterSet, - replace_global_defines: &mut bool, - is_reload: bool, - ) -> RegisterSet { - // Make sure `try_add_var()` below doesn't create a variable with too loose constraints. - self.program_complete_input_constraints(); - - loop { - match self.solver.real_solve(global_regs, is_reload) { - Ok(regs) => return regs, - Err(SolverError::Divert(rc)) => { - // Do we have any live-through `rc` registers that are not already variables? - let added = self.try_add_var(rc, throughs); - debug_assert!(added, "Ran out of registers in {}", rc); - } - Err(SolverError::Global(_value)) => { - log::trace!( - "Not enough global registers for {}, trying as local", - _value - ); - // We'll clear the `is_global` flag on all solver variables and instead make a - // note to replace all global defines with local defines followed by a copy. - *replace_global_defines = true; - self.solver.clear_all_global_flags(); - } - }; - } - } - - /// Try to add an `rc` variable to the solver from the `throughs` set. - fn try_add_var(&mut self, rc: RegClass, throughs: &[LiveValue]) -> bool { - log::trace!("Trying to add a {} reg from {} values", rc, throughs.len()); - - for lv in throughs { - if let Affinity::Reg(rci) = lv.affinity { - // The new variable gets to roam the whole top-level register class because it is - // not actually constrained by the instruction. We just want it out of the way. - let toprc2 = self.reginfo.toprc(rci); - let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); - if rc.contains(reg2) - && self.solver.can_add_var(toprc2, reg2) - && !self.is_live_on_outgoing_edge(lv.value) - { - self.solver.add_through_var(lv.value, toprc2, reg2); - return true; - } - } - } - - false - } - - /// Determine if `value` is live on a CFG edge from the current instruction. - /// - /// This means that the current instruction is a branch and `value` is live in to one of the - /// branch destinations. Branch arguments and block parameters are not considered live on the - /// edge. - fn is_live_on_outgoing_edge(&self, value: Value) -> bool { - use crate::ir::instructions::BranchInfo::*; - - let inst = self.cur.current_inst().expect("Not on an instruction"); - let layout = &self.cur.func.layout; - match self.cur.func.dfg.analyze_branch(inst) { - NotABranch => false, - SingleDest(block, _) => { - let lr = &self.liveness[value]; - lr.is_livein(block, layout) - } - Table(jt, block) => { - let lr = &self.liveness[value]; - !lr.is_local() - && (block.map_or(false, |block| lr.is_livein(block, layout)) - || self.cur.func.jump_tables[jt] - .iter() - .any(|block| lr.is_livein(*block, layout))) - } - } - } - - /// Emit `regmove` instructions as needed to move the live registers into place before the - /// instruction. Also update `self.divert` accordingly. - /// - /// The `self.cur` cursor is expected to point at the instruction. The register moves are - /// inserted before. - /// - /// The solver needs to be reminded of the available registers before any moves are inserted. - fn shuffle_inputs(&mut self, regs: &mut RegisterSet) { - use crate::regalloc::solver::Move::*; - - let spills = self.solver.schedule_moves(regs); - - // The move operations returned by `schedule_moves` refer to emergency spill slots by - // consecutive indexes starting from 0. Map these to real stack slots. - // It is very unlikely (impossible?) that we would need more than one spill per top-level - // register class, so avoid allocation by using a fixed array here. - let mut slot = [PackedOption::default(); 8]; - debug_assert!(spills <= slot.len(), "Too many spills ({})", spills); - - for m in self.solver.moves() { - match *m { - Reg { - value, - from, - to, - rc, - } => { - debug_assert!( - !self.is_pinned_reg(rc, to), - "pinned register used in a regmove" - ); - self.divert.regmove(value, from, to); - self.cur.ins().regmove(value, from, to); - } - Spill { - value, - from, - to_slot, - .. - } => { - debug_assert_eq!(slot[to_slot].expand(), None, "Overwriting slot in use"); - let ss = self - .cur - .func - .stack_slots - .get_emergency_slot(self.cur.func.dfg.value_type(value), &slot[0..spills]); - slot[to_slot] = ss.into(); - self.divert.regspill(value, from, ss); - self.cur.ins().regspill(value, from, ss); - } - Fill { - value, - from_slot, - to, - rc, - } => { - debug_assert!( - !self.is_pinned_reg(rc, to), - "pinned register used in a regfill" - ); - // These slots are single use, so mark `ss` as available again. - let ss = slot[from_slot].take().expect("Using unallocated slot"); - self.divert.regfill(value, ss, to); - self.cur.ins().regfill(value, ss, to); - } - } - } - } - - /// Forget about any register diversions in `kills`. - fn forget_diverted(&mut self, kills: &[LiveValue]) { - if self.divert.is_empty() { - return; - } - - for lv in kills { - if lv.affinity.is_reg() { - self.divert.remove(lv.value); - } - } - } - - /// Replace all global values defined by `inst` with local values that are then copied into the - /// global value: - /// - /// v1 = foo - /// - /// becomes: - /// - /// v20 = foo - /// v1 = copy v20 - /// - /// This is sometimes necessary when there are no global registers available that can satisfy - /// the constraints on the instruction operands. - /// - fn replace_global_defines(&mut self, inst: Inst, tracker: &mut LiveValueTracker) { - log::trace!("Replacing global defs on {}", self.cur.display_inst(inst)); - - // We'll insert copies *after `inst`. Our caller will move the cursor back. - self.cur.next_inst(); - - // The tracker keeps the defs from `inst` at the end. Any dead defs have already been - // removed, so it's not obvious how many defs to process - for lv in tracker.live_mut().iter_mut().rev() { - // Keep going until we reach a value that is not defined by `inst`. - if match self.cur.func.dfg.value_def(lv.value) { - ValueDef::Result(i, _) => i != inst, - _ => true, - } { - break; - } - if lv.is_local || !lv.affinity.is_reg() { - continue; - } - - // Now `lv.value` is globally live and defined by `inst`. Replace it with a local live - // range that is copied after `inst`. - let ty = self.cur.func.dfg.value_type(lv.value); - let local = self.cur.func.dfg.replace_result(lv.value, ty); - self.cur.ins().with_result(lv.value).copy(local); - let copy = self.cur.built_inst(); - - // Create a live range for `local: inst -> copy`. - self.liveness.create_dead(local, inst, lv.affinity); - self.liveness.extend_locally( - local, - self.cur.func.layout.pp_block(inst), - copy, - &self.cur.func.layout, - ); - - // Move the definition of the global `lv.value`. - self.liveness.move_def_locally(lv.value, copy); - - // Transfer the register coloring to `local`. - let loc = mem::replace(&mut self.cur.func.locations[lv.value], ValueLoc::default()); - self.cur.func.locations[local] = loc; - - // Update `lv` to reflect the new `local` live range. - lv.value = local; - lv.endpoint = copy; - lv.is_local = true; - - log::trace!( - " + {} with {} in {}", - self.cur.display_inst(copy), - local, - loc.display(&self.reginfo) - ); - } - log::trace!("Done: {}", self.cur.display_inst(inst)); - } - - /// Process kills on a ghost instruction. - /// - Forget diversions. - /// - Free killed registers. - fn process_ghost_kills(&mut self, kills: &[LiveValue], regs: &mut AvailableRegs) { - for lv in kills { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let loc = match self.divert.remove(lv.value) { - Some(loc) => loc, - None => self.cur.func.locations[lv.value], - }; - regs.input.free(rc, loc.unwrap_reg()); - if !lv.is_local { - regs.global - .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); - } - } - } - } -} - -/// Keep track of the set of available registers in two interference domains: all registers -/// considering diversions and global registers not considering diversions. -struct AvailableRegs { - /// The exact set of registers available on the input side of the current instruction. This - /// takes into account register diversions, and it includes both local and global live ranges. - input: RegisterSet, - - /// Registers available for allocating globally live values. This set ignores any local values, - /// and it does not account for register diversions. - /// - /// Global values must be allocated out of this set because conflicts with other global values - /// can't be resolved with local diversions. - global: RegisterSet, -} - -impl AvailableRegs { - /// Initialize both the input and global sets from `regs`. - pub fn new(regs: &RegisterSet) -> Self { - Self { - input: regs.clone(), - global: regs.clone(), - } - } - - /// Take an un-diverted register from one or both sets. - pub fn take(&mut self, rc: RegClass, reg: RegUnit, is_local: bool) { - self.input.take(rc, reg); - if !is_local { - self.global.take(rc, reg); - } - } - - /// Take a diverted register from both sets for a non-local allocation. - pub fn take_divert(&mut self, rc: RegClass, reg: RegUnit, reg_divert: RegUnit) { - self.input.take(rc, reg_divert); - self.global.take(rc, reg); - } -} diff --git a/cranelift/codegen/src/regalloc/context.rs b/cranelift/codegen/src/regalloc/context.rs deleted file mode 100644 index 505b1d127a..0000000000 --- a/cranelift/codegen/src/regalloc/context.rs +++ /dev/null @@ -1,252 +0,0 @@ -//! Register allocator context. -//! -//! The `Context` struct contains data structures that should be preserved across invocations of -//! the register allocator algorithm. This doesn't preserve any data between functions, but it -//! avoids allocating data structures independently for each function begin compiled. - -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::Function; -use crate::isa::TargetIsa; -use crate::regalloc::branch_splitting; -use crate::regalloc::coalescing::Coalescing; -use crate::regalloc::coloring::Coloring; -use crate::regalloc::live_value_tracker::LiveValueTracker; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::reload::Reload; -use crate::regalloc::safepoint::emit_stack_maps; -use crate::regalloc::spilling::Spilling; -use crate::regalloc::virtregs::VirtRegs; -use crate::result::CodegenResult; -use crate::timing; -use crate::topo_order::TopoOrder; -use crate::verifier::{ - verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors, -}; - -/// Persistent memory allocations for register allocation. -pub struct Context { - liveness: Liveness, - virtregs: VirtRegs, - coalescing: Coalescing, - topo: TopoOrder, - tracker: LiveValueTracker, - spilling: Spilling, - reload: Reload, - coloring: Coloring, -} - -impl Context { - /// Create a new context for register allocation. - /// - /// This context should be reused for multiple functions in order to avoid repeated memory - /// allocations. - pub fn new() -> Self { - Self { - liveness: Liveness::new(), - virtregs: VirtRegs::new(), - coalescing: Coalescing::new(), - topo: TopoOrder::new(), - tracker: LiveValueTracker::new(), - spilling: Spilling::new(), - reload: Reload::new(), - coloring: Coloring::new(), - } - } - - /// Clear all data structures in this context. - pub fn clear(&mut self) { - self.liveness.clear(); - self.virtregs.clear(); - self.coalescing.clear(); - self.topo.clear(); - self.tracker.clear(); - self.spilling.clear(); - self.reload.clear(); - self.coloring.clear(); - } - - /// Current values liveness state. - pub fn liveness(&self) -> &Liveness { - &self.liveness - } - - /// Allocate registers in `func`. - /// - /// After register allocation, all values in `func` have been assigned to a register or stack - /// location that is consistent with instruction encoding constraints. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, - ) -> CodegenResult<()> { - let _tt = timing::regalloc(); - debug_assert!(domtree.is_valid()); - - let mut errors = VerifierErrors::default(); - - // `Liveness` and `Coloring` are self-clearing. - self.virtregs.clear(); - - // Tracker state (dominator live sets) is actually reused between the spilling and coloring - // phases. - self.tracker.clear(); - - // Pass: Split branches, add space where to add copy & regmove instructions. - branch_splitting::run(isa, func, cfg, domtree, &mut self.topo); - - // Pass: Liveness analysis. - self.liveness.compute(isa, func, cfg); - - if isa.flags().enable_verifier() { - let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Coalesce and create Conventional SSA form. - self.coalescing.conventional_ssa( - isa, - func, - cfg, - domtree, - &mut self.liveness, - &mut self.virtregs, - ); - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Spilling. - self.spilling.run( - isa, - func, - domtree, - &mut self.liveness, - &self.virtregs, - &mut self.topo, - &mut self.tracker, - ); - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Reload. - self.reload.run( - isa, - func, - domtree, - &mut self.liveness, - &mut self.topo, - &mut self.tracker, - ); - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Coloring. - self.coloring.run( - isa, - func, - cfg, - domtree, - &mut self.liveness, - &mut self.tracker, - ); - - // If there are any reference types used, encode safepoints and emit - // stack maps. - // - // This function runs after register allocation has taken place, meaning - // values have locations assigned already, which is necessary for - // creating the stack maps. - let safepoints_enabled = isa.flags().enable_safepoints(); - for val in func.dfg.values() { - let ty = func.dfg.value_type(val); - if ty.lane_type().is_ref() { - assert!( - safepoints_enabled, - "reference types were found but safepoints were not enabled" - ); - emit_stack_maps(func, domtree, &self.liveness, &mut self.tracker, isa); - break; - } - } - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Even if we arrive here, (non-fatal) errors might have been reported, so we - // must make sure absolutely nothing is wrong - if errors.is_empty() { - Ok(()) - } else { - Err(errors.into()) - } - } -} diff --git a/cranelift/codegen/src/regalloc/diversion.rs b/cranelift/codegen/src/regalloc/diversion.rs deleted file mode 100644 index ba91df14a5..0000000000 --- a/cranelift/codegen/src/regalloc/diversion.rs +++ /dev/null @@ -1,322 +0,0 @@ -//! Register diversions. -//! -//! Normally, a value is assigned to a single register or stack location by the register allocator. -//! Sometimes, it is necessary to move register values to a different register in order to satisfy -//! instruction constraints. -//! -//! These register diversions are local to a block. No values can be diverted when entering a new -//! block. - -use crate::fx::FxHashMap; -use crate::hash_map::{Entry, Iter}; -use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations}; -use crate::ir::{InstructionData, Opcode}; -use crate::isa::{RegInfo, RegUnit}; -use core::fmt; -use cranelift_entity::{SparseMap, SparseMapValue}; - -#[cfg(feature = "enable-serde")] -use serde::{Deserialize, Serialize}; - -/// A diversion of a value from its original location to a new register or stack location. -/// -/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the -/// same value. -/// -/// When tracking diversions, the `from` field is the original assigned value location, and `to` is -/// the current one. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Diversion { - /// The original value location. - pub from: ValueLoc, - /// The current value location. - pub to: ValueLoc, -} - -impl Diversion { - /// Make a new diversion. - pub fn new(from: ValueLoc, to: ValueLoc) -> Self { - debug_assert!(from.is_assigned() && to.is_assigned()); - Self { from, to } - } -} - -/// Keep track of diversions in a block. -#[derive(Clone)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct RegDiversions { - current: FxHashMap, -} - -/// Keep track of diversions at the entry of block. -#[derive(Clone)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -struct EntryRegDiversionsValue { - key: Block, - divert: RegDiversions, -} - -/// Map block to their matching RegDiversions at basic blocks entry. -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct EntryRegDiversions { - map: SparseMap, -} - -impl RegDiversions { - /// Create a new empty diversion tracker. - pub fn new() -> Self { - Self { - current: FxHashMap::default(), - } - } - - /// Clear the content of the diversions, to reset the state of the compiler. - pub fn clear(&mut self) { - self.current.clear() - } - - /// Are there any diversions? - pub fn is_empty(&self) -> bool { - self.current.is_empty() - } - - /// Get the current diversion of `value`, if any. - pub fn diversion(&self, value: Value) -> Option<&Diversion> { - self.current.get(&value) - } - - /// Get all current diversions. - pub fn iter(&self) -> Iter<'_, Value, Diversion> { - self.current.iter() - } - - /// Get the current location for `value`. Fall back to the assignment map for non-diverted - /// values - pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc { - match self.diversion(value) { - Some(d) => d.to, - None => locations[value], - } - } - - /// Get the current register location for `value`, or panic if `value` isn't in a register. - pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit { - self.get(value, locations).unwrap_reg() - } - - /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot. - pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot { - self.get(value, locations).unwrap_stack() - } - - /// Record any kind of move. - /// - /// The `from` location must match an existing `to` location, if any. - fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) { - debug_assert!(from.is_assigned() && to.is_assigned()); - match self.current.entry(value) { - Entry::Occupied(mut e) => { - // TODO: non-lexical lifetimes should allow removal of the scope and early return. - { - let d = e.get_mut(); - debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value); - if d.from != to { - d.to = to; - return; - } - } - e.remove(); - } - Entry::Vacant(e) => { - e.insert(Diversion::new(from, to)); - } - } - } - - /// Record a register -> register move. - pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) { - self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to)); - } - - /// Record a register -> stack move. - pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) { - self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to)); - } - - /// Record a stack -> register move. - pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) { - self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to)); - } - - /// Apply the effect of `inst`. - /// - /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to - /// match. - pub fn apply(&mut self, inst: &InstructionData) { - match *inst { - InstructionData::RegMove { - opcode: Opcode::Regmove, - arg, - src, - dst, - } => self.regmove(arg, src, dst), - InstructionData::RegSpill { - opcode: Opcode::Regspill, - arg, - src, - dst, - } => self.regspill(arg, src, dst), - InstructionData::RegFill { - opcode: Opcode::Regfill, - arg, - src, - dst, - } => self.regfill(arg, src, dst), - _ => {} - } - } - - /// Drop any recorded move for `value`. - /// - /// Returns the `to` location of the removed diversion. - pub fn remove(&mut self, value: Value) -> Option { - self.current.remove(&value).map(|d| d.to) - } - - /// Resets the state of the current diversions to the recorded diversions at the entry of the - /// given `block`. The recoded diversions is available after coloring on `func.entry_diversions` - /// field. - pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) { - self.clear(); - if let Some(entry_divert) = entry_diversions.map.get(block) { - let iter = entry_divert.divert.current.iter(); - self.current.extend(iter); - } - } - - /// Copy the current state of the diversions, and save it for the entry of the `block` given as - /// argument. - /// - /// Note: This function can only be called once on a `Block` with a given `entry_diversions` - /// argument, otherwise it would panic. - pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) { - // No need to save anything if there is no diversions to be recorded. - if self.is_empty() { - return; - } - debug_assert!(!entry_diversions.map.contains_key(target)); - let iter = self.current.iter(); - let mut entry_divert = Self::new(); - entry_divert.current.extend(iter); - entry_diversions.map.insert(EntryRegDiversionsValue { - key: target, - divert: entry_divert, - }); - } - - /// Check that the recorded entry for a given `block` matches what is recorded in the - /// `entry_diversions`. - pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool { - let entry_divert = match entry_diversions.map.get(target) { - Some(entry_divert) => entry_divert, - None => return self.is_empty(), - }; - - if entry_divert.divert.current.len() != self.current.len() { - return false; - } - - for (val, _) in entry_divert.divert.current.iter() { - if !self.current.contains_key(val) { - return false; - } - } - true - } - - /// Return an object that can display the diversions. - pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplayDiversions<'a> { - DisplayDiversions(&self, regs.into()) - } -} - -impl EntryRegDiversions { - /// Create a new empty entry diversion, to associate diversions to each block entry. - pub fn new() -> Self { - Self { - map: SparseMap::new(), - } - } - - pub fn clear(&mut self) { - self.map.clear(); - } -} - -impl Clone for EntryRegDiversions { - /// The Clone trait is required by `ir::Function`. - fn clone(&self) -> Self { - let mut tmp = Self::new(); - for v in self.map.values() { - tmp.map.insert(v.clone()); - } - tmp - } -} - -/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry -/// diversions for each block. -impl SparseMapValue for EntryRegDiversionsValue { - fn key(&self) -> Block { - self.key - } -} - -/// Object that displays register diversions. -pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayDiversions<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{{")?; - for (value, div) in self.0.current.iter() { - write!( - f, - " {}: {} -> {}", - value, - div.from.display(self.1), - div.to.display(self.1) - )? - } - write!(f, " }}") - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::entity::EntityRef; - use crate::ir::Value; - - #[test] - fn inserts() { - let mut divs = RegDiversions::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - - divs.regmove(v1, 10, 12); - assert_eq!( - divs.diversion(v1), - Some(&Diversion { - from: ValueLoc::Reg(10), - to: ValueLoc::Reg(12), - }) - ); - assert_eq!(divs.diversion(v2), None); - - divs.regmove(v1, 12, 11); - assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11)); - divs.regmove(v1, 11, 10); - assert_eq!(divs.diversion(v1), None); - } -} diff --git a/cranelift/codegen/src/regalloc/live_value_tracker.rs b/cranelift/codegen/src/regalloc/live_value_tracker.rs deleted file mode 100644 index ae33a15f4d..0000000000 --- a/cranelift/codegen/src/regalloc/live_value_tracker.rs +++ /dev/null @@ -1,344 +0,0 @@ -//! Track which values are live in a block with instruction granularity. -//! -//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in a block. -//! The sets of live values are computed on the fly as the tracker is moved from instruction to -//! instruction, starting at the block header. - -use crate::dominator_tree::DominatorTree; -use crate::entity::{EntityList, ListPool}; -use crate::fx::FxHashMap; -use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value}; -use crate::partition_slice::partition_slice; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::liverange::LiveRange; -use alloc::vec::Vec; - -type ValueList = EntityList; - -/// Compute and track live values throughout a block. -pub struct LiveValueTracker { - /// The set of values that are live at the current program point. - live: LiveValueVec, - - /// Saved set of live values for every jump and branch that can potentially be an immediate - /// dominator of a block. - /// - /// This is the set of values that are live *before* the branch. - idom_sets: FxHashMap, - - /// Memory pool for the live sets. - idom_pool: ListPool, -} - -/// Information about a value that is live at the current program point. -#[derive(Debug)] -pub struct LiveValue { - /// The live value. - pub value: Value, - - /// The local ending point of the live range in the current block, as returned by - /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`. - pub endpoint: Inst, - - /// The affinity of the value as represented in its `LiveRange`. - /// - /// This value is simply a copy of the affinity stored in the live range. We copy it because - /// almost all users of `LiveValue` need to look at it. - pub affinity: Affinity, - - /// The live range for this value never leaves its block. - pub is_local: bool, - - /// This value is dead - the live range ends immediately. - pub is_dead: bool, -} - -struct LiveValueVec { - /// The set of values that are live at the current program point. - values: Vec, - - /// How many values at the front of `values` are known to be live after `inst`? - /// - /// This is used to pass a much smaller slice to `partition_slice` when its called a second - /// time for the same instruction. - live_prefix: Option<(Inst, usize)>, -} - -impl LiveValueVec { - fn new() -> Self { - Self { - values: Vec::new(), - live_prefix: None, - } - } - - /// Add a new live value to `values`. Copy some properties from `lr`. - fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) { - self.values.push(LiveValue { - value, - endpoint, - affinity: lr.affinity, - is_local: lr.is_local(), - is_dead: lr.is_dead(), - }); - } - - /// Remove all elements. - fn clear(&mut self) { - self.values.clear(); - self.live_prefix = None; - } - - /// Make sure that the values killed by `next_inst` are moved to the end of the `values` - /// vector. - /// - /// Returns the number of values that will be live after `next_inst`. - fn live_after(&mut self, next_inst: Inst) -> usize { - // How many values at the front of the vector are already known to survive `next_inst`? - // We don't need to pass this prefix to `partition_slice()` - let keep = match self.live_prefix { - Some((i, prefix)) if i == next_inst => prefix, - _ => 0, - }; - - // Move the remaining surviving values to the front partition of the vector. - let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst); - - // Remember the new prefix length in case we get called again for the same `next_inst`. - self.live_prefix = Some((next_inst, prefix)); - prefix - } - - /// Remove the values killed by `next_inst`. - fn remove_kill_values(&mut self, next_inst: Inst) { - let keep = self.live_after(next_inst); - self.values.truncate(keep); - } - - /// Remove any dead values. - fn remove_dead_values(&mut self) { - self.values.retain(|v| !v.is_dead); - self.live_prefix = None; - } -} - -impl LiveValueTracker { - /// Create a new blank tracker. - pub fn new() -> Self { - Self { - live: LiveValueVec::new(), - idom_sets: FxHashMap(), - idom_pool: ListPool::new(), - } - } - - /// Clear all cached information. - pub fn clear(&mut self) { - self.live.clear(); - self.idom_sets.clear(); - self.idom_pool.clear(); - } - - /// Get the set of currently live values. - /// - /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and - /// defined by the current instruction. - pub fn live(&self) -> &[LiveValue] { - &self.live.values - } - - /// Get a mutable set of currently live values. - /// - /// Use with care and don't move entries around. - pub fn live_mut(&mut self) -> &mut [LiveValue] { - &mut self.live.values - } - - /// Move the current position to the top of `block`. - /// - /// This depends on the stored live value set at `block`'s immediate dominator, so that must have - /// been visited first. - /// - /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values - /// from the immediate dominator. The second slice is the set of `block` parameters. - /// - /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them. - pub fn block_top( - &mut self, - block: Block, - dfg: &DataFlowGraph, - liveness: &Liveness, - layout: &Layout, - domtree: &DominatorTree, - ) -> (&[LiveValue], &[LiveValue]) { - // Start over, compute the set of live values at the top of the block from two sources: - // - // 1. Values that were live before `block`'s immediate dominator, filtered for those that are - // actually live-in. - // 2. Arguments to `block` that are not dead. - // - self.live.clear(); - - // Compute the live-in values. Start by filtering the set of values that were live before - // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e., - // the entry block or an unreachable block). - if let Some(idom) = domtree.idom(block) { - // If the immediate dominator exits, we must have a stored list for it. This is a - // requirement to the order blocks are visited: All dominators must have been processed - // before the current block. - let idom_live_list = self - .idom_sets - .get(&idom) - .expect("No stored live set for dominator"); - // Get just the values that are live-in to `block`. - for &value in idom_live_list.as_slice(&self.idom_pool) { - let lr = liveness - .get(value) - .expect("Immediate dominator value has no live range"); - - // Check if this value is live-in here. - if let Some(endpoint) = lr.livein_local_end(block, layout) { - self.live.push(value, endpoint, lr); - } - } - } - - // Now add all the live parameters to `block`. - let first_arg = self.live.values.len(); - for &value in dfg.block_params(block) { - let lr = &liveness[value]; - debug_assert_eq!(lr.def(), block.into()); - match lr.def_local_end().into() { - ExpandedProgramPoint::Inst(endpoint) => { - self.live.push(value, endpoint, lr); - } - ExpandedProgramPoint::Block(local_block) => { - // This is a dead block parameter which is not even live into the first - // instruction in the block. - debug_assert_eq!( - local_block, block, - "block parameter live range ends at wrong block header" - ); - // Give this value a fake endpoint that is the first instruction in the block. - // We expect it to be removed by calling `drop_dead_args()`. - self.live - .push(value, layout.first_inst(block).expect("Empty block"), lr); - } - } - } - - self.live.values.split_at(first_arg) - } - - /// Prepare to move past `inst`. - /// - /// Determine the set of already live values that are killed by `inst`, and add the new defined - /// values to the tracked set. - /// - /// Returns `(throughs, kills, defs)` as a tuple of slices: - /// - /// 1. The `throughs` slice is the set of live-through values that are neither defined nor - /// killed by the instruction. - /// 2. The `kills` slice is the set of values that were live before the instruction and are - /// killed at the instruction. This does not include dead defs. - /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes - /// dead defines. - /// - /// The order of `throughs` and `kills` is arbitrary. - /// - /// The `drop_dead()` method must be called next to actually remove the dead values from the - /// tracked set after the two returned slices are no longer needed. - pub fn process_inst( - &mut self, - inst: Inst, - dfg: &DataFlowGraph, - liveness: &Liveness, - ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) { - // Save a copy of the live values before any branches or jumps that could be somebody's - // immediate dominator. - if dfg[inst].opcode().is_branch() { - self.save_idom_live_set(inst); - } - - // Move killed values to the end of the vector. - // Don't remove them yet, `drop_dead()` will do that. - let first_kill = self.live.live_after(inst); - - // Add the values defined by `inst`. - let first_def = self.live.values.len(); - for &value in dfg.inst_results(inst) { - let lr = &liveness[value]; - debug_assert_eq!(lr.def(), inst.into()); - match lr.def_local_end().into() { - ExpandedProgramPoint::Inst(endpoint) => { - self.live.push(value, endpoint, lr); - } - ExpandedProgramPoint::Block(block) => { - panic!("Instruction result live range can't end at {}", block); - } - } - } - - ( - &self.live.values[0..first_kill], - &self.live.values[first_kill..first_def], - &self.live.values[first_def..], - ) - } - - /// Prepare to move past a ghost instruction. - /// - /// This is like `process_inst`, except any defs are ignored. - /// - /// Returns `(throughs, kills)`. - pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) { - let first_kill = self.live.live_after(inst); - self.live.values.as_slice().split_at(first_kill) - } - - /// Drop the values that are now dead after moving past `inst`. - /// - /// This removes both live values that were killed by `inst` and dead defines on `inst` itself. - /// - /// This must be called after `process_inst(inst)` and before proceeding to the next - /// instruction. - pub fn drop_dead(&mut self, inst: Inst) { - // Remove both live values that were killed by `inst` and dead defines from `inst`. - self.live.remove_kill_values(inst); - } - - /// Drop any values that are marked as `is_dead`. - /// - /// Use this after calling `block_top` to clean out dead block parameters. - pub fn drop_dead_params(&mut self) { - self.live.remove_dead_values(); - } - - /// Process new spills. - /// - /// Any values where `f` returns true are spilled and will be treated as if their affinity was - /// `Stack`. - pub fn process_spills(&mut self, mut f: F) - where - F: FnMut(Value) -> bool, - { - for lv in &mut self.live.values { - if f(lv.value) { - lv.affinity = Affinity::Stack; - } - } - } - - /// Save the current set of live values so it is associated with `idom`. - fn save_idom_live_set(&mut self, idom: Inst) { - let values = self.live.values.iter().map(|lv| lv.value); - let pool = &mut self.idom_pool; - // If there already is a set saved for `idom`, just keep it. - self.idom_sets.entry(idom).or_insert_with(|| { - let mut list = ValueList::default(); - list.extend(values, pool); - list - }); - } -} diff --git a/cranelift/codegen/src/regalloc/liveness.rs b/cranelift/codegen/src/regalloc/liveness.rs deleted file mode 100644 index 2e9c5015bd..0000000000 --- a/cranelift/codegen/src/regalloc/liveness.rs +++ /dev/null @@ -1,443 +0,0 @@ -//! Liveness analysis for SSA values. -//! -//! This module computes the live range of all the SSA values in a function and produces a -//! `LiveRange` instance for each. -//! -//! -//! # Liveness consumers -//! -//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each -//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the -//! currently live values as it is iterating down the instructions in the block. It asks the -//! following questions: -//! -//! - What is the set of live values at the entry to the block? -//! - When moving past a use of a value, is that value still alive in the block, or was that the last -//! use? -//! - When moving past a branch, which of the live values are still live below the branch? -//! -//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and -//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the -//! dominator tree, so it can compute the set of live values at the beginning of a block by starting -//! from the set of live values at the dominating branch instruction and filtering it with -//! `livein_local_end`. These sets do not need to be stored in the liveness analysis. -//! -//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the -//! number of live values at every program point and insert spill code until the number of -//! registers needed is small enough. -//! -//! -//! # Alternative algorithms -//! -//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few -//! alternatives. -//! -//! ## Data-flow equations -//! -//! The classic *live variables analysis* that you will find in all compiler books from the -//! previous century does not depend on SSA form. It is typically implemented by iteratively -//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of -//! variables for every basic block in the program. -//! -//! This algorithm has some disadvantages that makes us look elsewhere: -//! -//! - Quadratic memory use. We need a bit per variable per basic block in the function. -//! - Dense representation of sparse data. In practice, the majority of SSA values never leave -//! their basic block, and those that do spa basic blocks rarely span a large number of basic -//! blocks. This makes the data stored in the bitvectors quite sparse. -//! - Traditionally, the data-flow equations were solved for real program *variables* which does -//! not include temporaries used in evaluating expressions. We have an SSA form program which -//! blurs the distinction between temporaries and variables. This makes the quadratic memory -//! problem worse because there are many more SSA values than there was variables in the original -//! program, and we don't know a priori which SSA values leave their basic block. -//! - Missing last-use information. For values that are not live-out of a basic block, we would -//! need to store information about the last use in the block somewhere. LLVM stores this -//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a -//! source of problems for LLVM's register allocator. -//! -//! Data-flow equations can detect when a variable is used uninitialized, and they can handle -//! multiple definitions of the same variable. We don't need this generality since we already have -//! a program in SSA form. -//! -//! ## LLVM's liveness analysis -//! -//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is -//! a disjoint union of related SSA values that should be assigned to the same physical register. -//! It uses a compact data structure very similar to our `LiveRange`. The important difference is -//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval` -//! describes the live range of a virtual register *and* which one of the related SSA values is -//! live at any given program point. -//! -//! LLVM computes the live range of each virtual register independently by using the use-def chains -//! that are baked into its IR. The algorithm for a single virtual register is: -//! -//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using -//! the def-chain. This does not include any phi-values. -//! 2. Go through the virtual register's use chain and perform the following steps at each use: -//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks -//! that already contain some liveness and extend the last live SSA value in the block to be -//! live-out. Also build a list of new basic blocks where the register needs to be live-in. -//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new -//! PHI values to be created when different SSA values can reach the same block. -//! -//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered -//! one SSA value. -//! -//! This algorithm has some advantages compared to the data-flow equations: -//! -//! - The live ranges of local virtual registers are computed very quickly without ever traversing -//! the CFG. The memory needed to store these live ranges is independent of the number of basic -//! blocks in the program. -//! - The time to compute the live range of a global virtual register is proportional to the number -//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large -//! functions. -//! - A single live range can be recomputed after making modifications to the IR. No global -//! algorithm is necessary. This feature depends on having use-def chains for virtual registers -//! which Cranelift doesn't. -//! -//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important -//! difference that live ranges are computed per SSA value instead of per virtual register, and the -//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that -//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses. -//! -//! ## Fast Liveness Checking for SSA-Form Programs -//! -//! A liveness analysis that is often brought up in the context of SSA-based register allocation -//! was presented at CGO 2008: -//! -//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness -//! Checking for SSA-Form Programs.* CGO. -//! -//! This analysis uses a global pre-computation that only depends on the CFG of the function. It -//! then allows liveness queries for any (value, program point) pair. Each query traverses the use -//! chain of the value and performs lookups in the precomputed bit-vectors. -//! -//! I did not seriously consider this analysis for Cranelift because: -//! -//! - It depends critically on use chains which Cranelift doesn't have. -//! - Popular variables like the `this` pointer in a C++ method can have very large use chains. -//! Traversing such a long use chain on every liveness lookup has the potential for some nasty -//! quadratic behavior in unfortunate cases. -//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow -//! based approach, which isn't that impressive. -//! -//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift -//! gains use chains, this approach would be worth a proper evaluation. -//! -//! -//! # Cranelift's liveness analysis -//! -//! The algorithm implemented in this module is similar to LLVM's with these differences: -//! -//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual -//! register. -//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers. -//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use -//! chains, so it is not possible to compute the live range for a single SSA value independently. -//! -//! The liveness computation visits all instructions in the program. The order is not important for -//! the algorithm to be correct. At each instruction, the used values are examined. -//! -//! - The first time a value is encountered, its live range is constructed as a dead live range -//! containing only the defining program point. -//! - The local interval of the value's live range is extended so it reaches the use. This may -//! require creating a new live-in local interval for the block. -//! - If the live range became live-in to the block, add the block to a work-list. -//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each -//! of the live-in block's CFG predecessor instructions as a 'use'. -//! -//! The effect of this algorithm is to extend the live range of each to reach uses as they are -//! visited. No data about each value beyond the live range is needed between visiting uses, so -//! nothing is lost by computing the live range of all values simultaneously. -//! -//! ## Cache efficiency of Cranelift vs LLVM -//! -//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the -//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use -//! chain order, some cache thrashing can occur as a result of pulling instructions into cache -//! somewhat chaotically. -//! -//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each -//! instruction is brought into cache only once, and it is likely that the other instructions on -//! the same cache line will be visited before the line is evicted. -//! -//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always -//! regularly. We should strive to make the `LiveRange` struct as small as possible such that -//! multiple related values can live on the same cache line. -//! -//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current -//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the -//! size to 32 bytes. -//! - Related values should be stored on the same cache line. The current sparse set implementation -//! does a decent job of that. -//! - For global values, the list of live-in intervals is very likely to fit on a single cache -//! line. These lists are very likely to be found in L2 cache at least. -//! -//! There is some room for improvement. - -use crate::entity::SparseMap; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::dfg::ValueDef; -use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value}; -use crate::isa::{EncInfo, OperandConstraint, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::liverange::LiveRange; -use crate::timing; -use alloc::vec::Vec; -use core::mem; -use core::ops::Index; - -/// A set of live ranges, indexed by value number. -type LiveRangeSet = SparseMap; - -/// Get a mutable reference to the live range for `value`. -/// Create it if necessary. -fn get_or_create<'a>( - lrset: &'a mut LiveRangeSet, - value: Value, - isa: &dyn TargetIsa, - func: &Function, - encinfo: &EncInfo, -) -> &'a mut LiveRange { - // It would be better to use `get_mut()` here, but that leads to borrow checker fighting - // which can probably only be resolved by non-lexical lifetimes. - // https://github.com/rust-lang/rfcs/issues/811 - if lrset.get(value).is_none() { - // Create a live range for value. We need the program point that defines it. - let def; - let affinity; - match func.dfg.value_def(value) { - ValueDef::Result(inst, rnum) => { - def = inst.into(); - // Initialize the affinity from the defining instruction's result constraints. - // Don't do this for call return values which are always tied to a single register. - affinity = encinfo - .operand_constraints(func.encodings[inst]) - .and_then(|rc| rc.outs.get(rnum)) - .map(Affinity::new) - .or_else(|| { - // If this is a call, get the return value affinity. - func.dfg - .call_signature(inst) - .map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)) - }) - .unwrap_or_default(); - } - ValueDef::Param(block, num) => { - def = block.into(); - if func.layout.entry_block() == Some(block) { - // The affinity for entry block parameters can be inferred from the function - // signature. - affinity = Affinity::abi(&func.signature.params[num], isa); - } else { - // Give normal block parameters a register affinity matching their type. - let rc = isa.regclass_for_abi_type(func.dfg.value_type(value)); - affinity = Affinity::Reg(rc.into()); - } - } - }; - lrset.insert(LiveRange::new(value, def, affinity)); - } - lrset.get_mut(value).unwrap() -} - -/// Extend the live range for `value` so it reaches `to` which must live in `block`. -fn extend_to_use( - lr: &mut LiveRange, - block: Block, - to: Inst, - worklist: &mut Vec, - func: &Function, - cfg: &ControlFlowGraph, -) { - // This is our scratch working space, and we'll leave it empty when we return. - debug_assert!(worklist.is_empty()); - - // Extend the range locally in `block`. - // If there already was a live interval in that block, we're done. - if lr.extend_in_block(block, to, &func.layout) { - worklist.push(block); - } - - // The work list contains those blocks where we have learned that the value needs to be - // live-in. - // - // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the - // CFG from the existing live range to `block`. - // - // Extend the live range as we go. The live range itself also serves as a visited set since - // `extend_in_block` will never return true twice for the same block. - // - while let Some(livein) = worklist.pop() { - // We've learned that the value needs to be live-in to the `livein` block. - // Make sure it is also live at all predecessor branches to `livein`. - for BlockPredecessor { - block: pred, - inst: branch, - } in cfg.pred_iter(livein) - { - if lr.extend_in_block(pred, branch, &func.layout) { - // This predecessor block also became live-in. We need to process it later. - worklist.push(pred); - } - } - } -} - -/// Liveness analysis for a function. -/// -/// Compute a live range for every SSA value used in the function. -pub struct Liveness { - /// The live ranges that have been computed so far. - ranges: LiveRangeSet, - - /// Working space for the `extend_to_use` algorithm. - /// This vector is always empty, except for inside that function. - /// It lives here to avoid repeated allocation of scratch memory. - worklist: Vec, -} - -impl Liveness { - /// Create a new empty liveness analysis. - /// - /// The memory allocated for this analysis can be reused for multiple functions. Use the - /// `compute` method to actually runs the analysis for a function. - pub fn new() -> Self { - Self { - ranges: LiveRangeSet::new(), - worklist: Vec::new(), - } - } - - /// Current live ranges. - pub fn ranges(&self) -> &LiveRangeSet { - &self.ranges - } - - /// Clear all data structures in this liveness analysis. - pub fn clear(&mut self) { - self.ranges.clear(); - self.worklist.clear(); - } - - /// Get the live range for `value`, if it exists. - pub fn get(&self, value: Value) -> Option<&LiveRange> { - self.ranges.get(value) - } - - /// Create a new live range for `value`. - /// - /// The new live range will be defined at `def` with no extent, like a dead value. - /// - /// This asserts that `value` does not have an existing live range. - pub fn create_dead(&mut self, value: Value, def: PP, affinity: Affinity) - where - PP: Into, - { - let old = self - .ranges - .insert(LiveRange::new(value, def.into(), affinity)); - debug_assert!(old.is_none(), "{} already has a live range", value); - } - - /// Move the definition of `value` to `def`. - /// - /// The old and new def points must be in the same block, and before the end of the live range. - pub fn move_def_locally(&mut self, value: Value, def: PP) - where - PP: Into, - { - let lr = self.ranges.get_mut(value).expect("Value has no live range"); - lr.move_def_locally(def.into()); - } - - /// Locally extend the live range for `value` to reach `user`. - /// - /// It is assumed the `value` is already live before `user` in `block`. - /// - /// Returns a mutable reference to the value's affinity in case that also needs to be updated. - pub fn extend_locally( - &mut self, - value: Value, - block: Block, - user: Inst, - layout: &Layout, - ) -> &mut Affinity { - debug_assert_eq!(Some(block), layout.inst_block(user)); - let lr = self.ranges.get_mut(value).expect("Value has no live range"); - let livein = lr.extend_in_block(block, user, layout); - debug_assert!(!livein, "{} should already be live in {}", value, block); - &mut lr.affinity - } - - /// Change the affinity of `value` to `Stack` and return the previous affinity. - pub fn spill(&mut self, value: Value) -> Affinity { - let lr = self.ranges.get_mut(value).expect("Value has no live range"); - mem::replace(&mut lr.affinity, Affinity::Stack) - } - - /// Compute the live ranges of all SSA values used in `func`. - /// This clears out any existing analysis stored in this data structure. - pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { - let _tt = timing::ra_liveness(); - self.ranges.clear(); - - // Get ISA data structures used for computing live range affinities. - let encinfo = isa.encoding_info(); - let reginfo = isa.register_info(); - - // The liveness computation needs to visit all uses, but the order doesn't matter. - // TODO: Perhaps this traversal of the function could be combined with a dead code - // elimination pass if we visit a post-order of the dominator tree? - for block in func.layout.blocks() { - // Make sure we have created live ranges for dead block parameters. - // TODO: If these parameters are really dead, we could remove them, except for the - // entry block which must match the function signature. - for &arg in func.dfg.block_params(block) { - get_or_create(&mut self.ranges, arg, isa, func, &encinfo); - } - - for inst in func.layout.block_insts(block) { - // Eliminate all value aliases, they would confuse the register allocator. - func.dfg.resolve_aliases_in_arguments(inst); - - // Make sure we have created live ranges for dead defs. - // TODO: When we implement DCE, we can use the absence of a live range to indicate - // an unused value. - for &def in func.dfg.inst_results(inst) { - get_or_create(&mut self.ranges, def, isa, func, &encinfo); - } - - // Iterator of constraints, one per value operand. - let encoding = func.encodings[inst]; - let operand_constraint_slice: &[OperandConstraint] = - encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins); - let mut operand_constraints = operand_constraint_slice.iter(); - - for &arg in func.dfg.inst_args(inst) { - // Get the live range, create it as a dead range if necessary. - let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo); - - // Extend the live range to reach this use. - extend_to_use(lr, block, inst, &mut self.worklist, func, cfg); - - // Apply operand constraint, ignoring any variable arguments after the fixed - // operands described by `operand_constraints`. Variable arguments are either - // block arguments or call/return ABI arguments. - if let Some(constraint) = operand_constraints.next() { - lr.affinity.merge(constraint, ®info); - } - } - } - } - } -} - -impl Index for Liveness { - type Output = LiveRange; - fn index(&self, index: Value) -> &LiveRange { - self.ranges - .get(index) - .unwrap_or_else(|| panic!("{} has no live range", index)) - } -} diff --git a/cranelift/codegen/src/regalloc/liverange.rs b/cranelift/codegen/src/regalloc/liverange.rs deleted file mode 100644 index 91cff53b03..0000000000 --- a/cranelift/codegen/src/regalloc/liverange.rs +++ /dev/null @@ -1,720 +0,0 @@ -//! Data structure representing the live range of an SSA value. -//! -//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of -//! an SSA value begins where it is defined and extends to all program points where the value is -//! still needed. -//! -//! # Local Live Ranges -//! -//! Inside a single basic block, the live range of a value is always an interval between -//! two program points (if the value is live in the block at all). The starting point is either: -//! -//! 1. The instruction that defines the value, or -//! 2. The block header, because the value is an argument to the block, or -//! 3. The block header, because the value is defined in another block and live-in to this one. -//! -//! The ending point of the local live range is the last of the following program points in the -//! block: -//! -//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument. -//! 2. The last branch or jump instruction in the block that can reach a use. -//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it. -//! -//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined -//! outside a loop and used inside the loop, it will be live in the entire loop. -//! -//! # Global Live Ranges -//! -//! Values that appear in more than one block have a *global live range* which can be seen as the -//! disjoint union of the per-block local intervals for all of the blocks where the value is live. -//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live -//! range becomes a linear sequence of disjoint intervals, at most one per block. -//! -//! In the special case of a dead value, the global live range is a single interval where the start -//! and end points are the same. The global live range of a value is never completely empty. -//! -//! # Register interference -//! -//! The register allocator uses live ranges to determine if values *interfere*, which means that -//! they can't be stored in the same register. Two live ranges interfere if and only if any of -//! their intervals overlap. -//! -//! If one live range ends at an instruction that defines another live range, those two live ranges -//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input -//! register for an output value. If Cranelift gets support for inline assembly, we will need to -//! handle *early clobbers* which are output registers that are not allowed to alias any input -//! registers. -//! -//! If `i1 < i2 < i3` are program points, we have: -//! -//! - `i1-i2` and `i1-i3` interfere because the intervals overlap. -//! - `i1-i2` and `i2-i3` don't interfere. -//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register. -//! - `i1-i2` and `i2-i2` don't interfere. -//! - `i2-i3` and `i2-i2` do interfere. -//! -//! Because of this behavior around interval end points, live range interference is not completely -//! equivalent to mathematical intersection of open or half-open intervals. -//! -//! # Implementation notes -//! -//! A few notes about the implementation of the live intervals field `liveins`. This should not -//! concern someone only looking to use the public interface. -//! -//! ## Current representation -//! -//! Our current implementation uses a sorted array of compressed intervals, represented by their -//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of -//! intervals easily, and shows some nice performance behavior. See -//! for benchmarks against using a -//! bforest::Map. -//! -//! ## block ordering -//! -//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to -//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't -//! depend on any property of the program order, so alternative orderings are possible: -//! -//! 1. The block layout order. This is what we currently use. -//! 2. A topological order of the dominator tree. All the live-in intervals would come after the -//! def interval. -//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the -//! `ProgramOrder` for comparisons. -//! -//! These orderings will cause small differences in coalescing opportunities, but all of them would -//! do a decent job of compressing a long live range. The numerical order might be preferable -//! because: -//! -//! - It has better performance because block numbers can be compared directly without any table -//! lookups. -//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious -//! live-in intervals from any coalesced representations that happen to cross a new block. -//! -//! For comparing instructions, the layout order is always what we want. -//! -//! ## Alternative representation -//! -//! Since a local live-in interval always begins at its block header, it is uniquely described by its -//! end point instruction alone. We can use the layout to look up the block containing the end point. -//! This means that a sorted `Vec` would be enough to represent the set of live-in intervals. -//! -//! Coalescing is an important compression technique because some live ranges can span thousands of -//! blocks. We can represent that by switching to a sorted `Vec` representation where -//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding -//! `Block` entry represents a single live-in interval. -//! -//! This representation is more compact for a live range with many uncoalesced live-in intervals. -//! It is more complicated to work with, though, so it is probably not worth it. The performance -//! benefits of switching to a numerical block order only appears if the binary search is doing -//! block-block comparisons. -//! -//! A `BTreeMap` could have been used for the live-in intervals, but it doesn't provide -//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes. -//! -//! Even the specialized `bforest::Map` implementation is slower than a plain sorted -//! array, see for details. - -use crate::entity::SparseMapValue; -use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value}; -use crate::regalloc::affinity::Affinity; -use core::cmp::Ordering; -use core::marker::PhantomData; -use smallvec::SmallVec; - -/// Global live range of a single SSA value. -/// -/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an -/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at -/// most one interval per block. We further distinguish between: -/// -/// 1. The *def interval* is the local interval in the block where the value is defined, and -/// 2. The *live-in intervals* are the local intervals in the remaining blocks. -/// -/// A live-in interval always begins at the block header, while the def interval can begin at the -/// defining instruction, or at the block header for a block argument value. -/// -/// All values have a def interval, but a large proportion of values don't have any live-in -/// intervals. These are called *local live ranges*. -/// -/// # Program order requirements -/// -/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for -/// ordering instructions inside a block *and* for ordering blocks. The methods that depend on the -/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to -/// ensure that the provided ordering is consistent between calls. -/// -/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges. -/// -/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the -/// instructions using or defining their value, `LiveRange` structs can contain references to -/// branch and jump instructions. -pub type LiveRange = GenericLiveRange; - -// See comment of liveins below. -pub struct Interval { - begin: Block, - end: Inst, -} - -/// Generic live range implementation. -/// -/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order. -/// Use `LiveRange` instead of using this generic directly. -pub struct GenericLiveRange { - /// The value described by this live range. - /// This member can't be modified in case the live range is stored in a `SparseMap`. - value: Value, - - /// The preferred register allocation for this value. - pub affinity: Affinity, - - /// The instruction or block header where this value is defined. - def_begin: ProgramPoint, - - /// The end point of the def interval. This must always belong to the same block as `def_begin`. - /// - /// We always have `def_begin <= def_end` with equality implying a dead def live range with no - /// uses. - def_end: ProgramPoint, - - /// Additional live-in intervals sorted in program order. - /// - /// This vector is empty for most values which are only used in one block. - /// - /// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to - /// `inst` which may belong to a later block in the program order. - /// - /// The entries are non-overlapping, and none of them overlap the block where the value is - /// defined. - liveins: SmallVec<[Interval; 2]>, - - po: PhantomData<*const PO>, -} - -/// A simple helper macro to make comparisons more natural to read. -macro_rules! cmp { - ($order:ident, $a:ident > $b:expr) => { - $order.cmp($a, $b) == Ordering::Greater - }; - ($order:ident, $a:ident >= $b:expr) => { - $order.cmp($a, $b) != Ordering::Less - }; - ($order:ident, $a:ident < $b:expr) => { - $order.cmp($a, $b) == Ordering::Less - }; - ($order:ident, $a:ident <= $b:expr) => { - $order.cmp($a, $b) != Ordering::Greater - }; -} - -impl GenericLiveRange { - /// Create a new live range for `value` defined at `def`. - /// - /// The live range will be created as dead, but it can be extended with `extend_in_block()`. - pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self { - Self { - value, - affinity, - def_begin: def, - def_end: def, - liveins: SmallVec::new(), - po: PhantomData, - } - } - - /// Finds an entry in the compressed set of live-in intervals that contains `block`, or return - /// the position where to insert such a new entry. - fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result { - self.liveins - .binary_search_by(|interval| order.cmp(interval.begin, block)) - .or_else(|n| { - // The previous interval's end might cover the searched block. - if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) { - Ok(n - 1) - } else { - Err(n) - } - }) - } - - /// Extend the local interval for `block` so it reaches `to` which must belong to `block`. - /// Create a live-in interval if necessary. - /// - /// If the live range already has a local interval in `block`, extend its end point so it - /// includes `to`, and return false. - /// - /// If the live range did not previously have a local interval in `block`, add one so the value - /// is live-in to `block`, extending to `to`. Return true. - /// - /// The return value can be used to detect if we just learned that the value is live-in to - /// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks. - pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool { - // First check if we're extending the def interval. - // - // We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't - // check it without a method for getting `inst`'s block. - if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) { - let inst_pp = inst.into(); - debug_assert_ne!( - inst_pp, self.def_begin, - "Can't use value in the defining instruction." - ); - if cmp!(order, inst > self.def_end) { - self.def_end = inst_pp; - } - return false; - } - - // Now check if we're extending any of the existing live-in intervals. - match self.lookup_entry_containing_block(block, order) { - Ok(n) => { - // We found one interval and might need to extend it. - if cmp!(order, inst <= self.liveins[n].end) { - // Both interval parts are already included in a compressed interval. - return false; - } - - // If the instruction at the end is the last instruction before the next block, - // coalesce the two intervals: - // [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end] - if let Some(next) = &self.liveins.get(n + 1) { - if order.is_block_gap(inst, next.begin) { - // At this point we can choose to remove the current interval or the next - // one; remove the next one to avoid one memory move. - let next_end = next.end; - debug_assert!(cmp!(order, next_end > self.liveins[n].end)); - self.liveins[n].end = next_end; - self.liveins.remove(n + 1); - return false; - } - } - - // We can't coalesce, just extend the interval. - self.liveins[n].end = inst; - false - } - - Err(n) => { - // No interval was found containing the current block: we need to insert a new one, - // unless there's a coalescing opportunity with the previous or next one. - let coalesce_next = self - .liveins - .get(n) - .filter(|next| order.is_block_gap(inst, next.begin)) - .is_some(); - let coalesce_prev = self - .liveins - .get(n.wrapping_sub(1)) - .filter(|prev| order.is_block_gap(prev.end, block)) - .is_some(); - - match (coalesce_prev, coalesce_next) { - // The new interval is the missing hole between prev and next: we can merge - // them all together. - (true, true) => { - let prev_end = self.liveins[n - 1].end; - debug_assert!(cmp!(order, prev_end <= self.liveins[n].end)); - self.liveins[n - 1].end = self.liveins[n].end; - self.liveins.remove(n); - } - - // Coalesce only with the previous or next one. - (true, false) => { - debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end)); - self.liveins[n - 1].end = inst; - } - (false, true) => { - debug_assert!(cmp!(order, block <= self.liveins[n].begin)); - self.liveins[n].begin = block; - } - - (false, false) => { - // No coalescing opportunity, we have to insert. - self.liveins.insert( - n, - Interval { - begin: block, - end: inst, - }, - ); - } - } - - true - } - } - } - - /// Is this the live range of a dead value? - /// - /// A dead value has no uses, and its live range ends at the same program point where it is - /// defined. - pub fn is_dead(&self) -> bool { - self.def_begin == self.def_end - } - - /// Is this a local live range? - /// - /// A local live range is only used in the same block where it was defined. It is allowed to span - /// multiple basic blocks within that block. - pub fn is_local(&self) -> bool { - self.liveins.is_empty() - } - - /// Get the program point where this live range is defined. - /// - /// This will be a block header when the value is a block argument, otherwise it is the defining - /// instruction. - pub fn def(&self) -> ProgramPoint { - self.def_begin - } - - /// Move the definition of this value to a new program point. - /// - /// It is only valid to move the definition within the same block, and it can't be moved beyond - /// `def_local_end()`. - pub fn move_def_locally(&mut self, def: ProgramPoint) { - self.def_begin = def; - } - - /// Get the local end-point of this live range in the block where it is defined. - /// - /// This can be the block header itself in the case of a dead block argument. - /// Otherwise, it will be the last local use or branch/jump that can reach a use. - pub fn def_local_end(&self) -> ProgramPoint { - self.def_end - } - - /// Get the local end-point of this live range in a block where it is live-in. - /// - /// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point - /// of this live range's local interval in `block`. - /// - /// If the live range is live through all of `block`, the terminator of `block` is a correct - /// answer, but it is also possible that an even later program point is returned. So don't - /// depend on the returned `Inst` to belong to `block`. - pub fn livein_local_end(&self, block: Block, order: &PO) -> Option { - self.lookup_entry_containing_block(block, order) - .and_then(|i| { - let inst = self.liveins[i].end; - if cmp!(order, block < inst) { - Ok(inst) - } else { - // Can be any error type, really, since it's discarded by ok(). - Err(i) - } - }) - .ok() - } - - /// Is this value live-in to `block`? - /// - /// A block argument is not considered to be live in. - pub fn is_livein(&self, block: Block, order: &PO) -> bool { - self.livein_local_end(block, order).is_some() - } - - /// Get all the live-in intervals. - /// - /// Note that the intervals are stored in a compressed form so each entry may span multiple - /// blocks where the value is live in. - pub fn liveins<'a>(&'a self) -> impl Iterator + 'a { - self.liveins - .iter() - .map(|interval| (interval.begin, interval.end)) - } - - /// Check if this live range overlaps a definition in `block`. - pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool { - // Two defs at the same program point always overlap, even if one is dead. - if def == self.def_begin.into() { - return true; - } - - // Check for an overlap with the local range. - if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) { - return true; - } - - // Check for an overlap with a live-in range. - match self.livein_local_end(block, order) { - Some(inst) => cmp!(order, def < inst), - None => false, - } - } - - /// Check if this live range reaches a use at `user` in `block`. - pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool { - // Check for an overlap with the local range. - if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) { - return true; - } - - // Check for an overlap with a live-in range. - match self.livein_local_end(block, order) { - Some(inst) => cmp!(order, user <= inst), - None => false, - } - } - - /// Check if this live range is killed at `user` in `block`. - pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool { - self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user) - } -} - -/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values. -impl SparseMapValue for GenericLiveRange { - fn key(&self) -> Value { - self.value - } -} - -#[cfg(test)] -mod tests { - use super::{GenericLiveRange, Interval}; - use crate::entity::EntityRef; - use crate::ir::{Block, Inst, Value}; - use crate::ir::{ExpandedProgramPoint, ProgramOrder}; - use alloc::vec::Vec; - use core::cmp::Ordering; - - // Dummy program order which simply compares indexes. - // It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes - // in between. `is_block_gap` assumes that terminator instructions have indexes of the form - // block * 10 + 1. This is used in the coalesce test. - struct ProgOrder {} - - impl ProgramOrder for ProgOrder { - fn cmp(&self, a: A, b: B) -> Ordering - where - A: Into, - B: Into, - { - fn idx(pp: ExpandedProgramPoint) -> usize { - match pp { - ExpandedProgramPoint::Inst(i) => i.index(), - ExpandedProgramPoint::Block(e) => e.index(), - } - } - - let ia = idx(a.into()); - let ib = idx(b.into()); - ia.cmp(&ib) - } - - fn is_block_gap(&self, inst: Inst, block: Block) -> bool { - inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1 - } - } - - impl ProgOrder { - // Get the block corresponding to `inst`. - fn inst_block(&self, inst: Inst) -> Block { - let i = inst.index(); - Block::new(i - i % 10) - } - - // Get the block of a program point. - fn pp_block>(&self, pp: PP) -> Block { - match pp.into() { - ExpandedProgramPoint::Inst(i) => self.inst_block(i), - ExpandedProgramPoint::Block(e) => e, - } - } - - // Validate the live range invariants. - fn validate(&self, lr: &GenericLiveRange) { - // The def interval must cover a single block. - let def_block = self.pp_block(lr.def_begin); - assert_eq!(def_block, self.pp_block(lr.def_end)); - - // Check that the def interval isn't backwards. - match self.cmp(lr.def_begin, lr.def_end) { - Ordering::Equal => assert!(lr.liveins.is_empty()), - Ordering::Greater => { - panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end) - } - Ordering::Less => {} - } - - // Check the live-in intervals. - let mut prev_end = None; - for Interval { begin, end } in lr.liveins.iter() { - let begin = *begin; - let end = *end; - - assert_eq!(self.cmp(begin, end), Ordering::Less); - if let Some(e) = prev_end { - assert_eq!(self.cmp(e, begin), Ordering::Less); - } - - assert!( - self.cmp(lr.def_end, begin) == Ordering::Less - || self.cmp(lr.def_begin, end) == Ordering::Greater, - "Interval can't overlap the def block" - ); - - // Save for next round. - prev_end = Some(end); - } - } - } - - // Singleton `ProgramOrder` for tests below. - const PO: &'static ProgOrder = &ProgOrder {}; - - #[test] - fn dead_def_range() { - let v0 = Value::new(0); - let e0 = Block::new(0); - let i1 = Inst::new(1); - let i2 = Inst::new(2); - let e2 = Block::new(2); - let lr = GenericLiveRange::new(v0, i1.into(), Default::default()); - assert!(lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), i1.into()); - assert_eq!(lr.def_local_end(), i1.into()); - assert_eq!(lr.livein_local_end(e2, PO), None); - PO.validate(&lr); - - // A dead live range overlaps its own def program point. - assert!(lr.overlaps_def(i1.into(), e0, PO)); - assert!(!lr.overlaps_def(i2.into(), e0, PO)); - assert!(!lr.overlaps_def(e0.into(), e0, PO)); - } - - #[test] - fn dead_arg_range() { - let v0 = Value::new(0); - let e2 = Block::new(2); - let lr = GenericLiveRange::new(v0, e2.into(), Default::default()); - assert!(lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), e2.into()); - assert_eq!(lr.def_local_end(), e2.into()); - // The def interval of a block argument does not count as live-in. - assert_eq!(lr.livein_local_end(e2, PO), None); - PO.validate(&lr); - } - - #[test] - fn local_def() { - let v0 = Value::new(0); - let e10 = Block::new(10); - let i11 = Inst::new(11); - let i12 = Inst::new(12); - let i13 = Inst::new(13); - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e10, i13, PO), false); - PO.validate(&lr); - assert!(!lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), i11.into()); - assert_eq!(lr.def_local_end(), i13.into()); - - // Extending to an already covered inst should not change anything. - assert_eq!(lr.extend_in_block(e10, i12, PO), false); - PO.validate(&lr); - assert_eq!(lr.def(), i11.into()); - assert_eq!(lr.def_local_end(), i13.into()); - } - - #[test] - fn local_arg() { - let v0 = Value::new(0); - let e10 = Block::new(10); - let i11 = Inst::new(11); - let i12 = Inst::new(12); - let i13 = Inst::new(13); - let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default()); - - // Extending a dead block argument in its own block should not indicate that a live-in - // interval was created. - assert_eq!(lr.extend_in_block(e10, i12, PO), false); - PO.validate(&lr); - assert!(!lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), e10.into()); - assert_eq!(lr.def_local_end(), i12.into()); - - // Extending to an already covered inst should not change anything. - assert_eq!(lr.extend_in_block(e10, i11, PO), false); - PO.validate(&lr); - assert_eq!(lr.def(), e10.into()); - assert_eq!(lr.def_local_end(), i12.into()); - - // Extending further. - assert_eq!(lr.extend_in_block(e10, i13, PO), false); - PO.validate(&lr); - assert_eq!(lr.def(), e10.into()); - assert_eq!(lr.def_local_end(), i13.into()); - } - - #[test] - fn global_def() { - let v0 = Value::new(0); - let e10 = Block::new(10); - let i11 = Inst::new(11); - let i12 = Inst::new(12); - let e20 = Block::new(20); - let i21 = Inst::new(21); - let i22 = Inst::new(22); - let i23 = Inst::new(23); - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e10, i12, PO), false); - - // Adding a live-in interval. - assert_eq!(lr.extend_in_block(e20, i22, PO), true); - PO.validate(&lr); - assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); - - // Non-extending the live-in. - assert_eq!(lr.extend_in_block(e20, i21, PO), false); - assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); - - // Extending the existing live-in. - assert_eq!(lr.extend_in_block(e20, i23, PO), false); - PO.validate(&lr); - assert_eq!(lr.livein_local_end(e20, PO), Some(i23)); - } - - #[test] - fn coalesce() { - let v0 = Value::new(0); - let i11 = Inst::new(11); - let e20 = Block::new(20); - let i21 = Inst::new(21); - let e30 = Block::new(30); - let i31 = Inst::new(31); - let e40 = Block::new(40); - let i41 = Inst::new(41); - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e30, i31, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e30, i31)]); - - // Coalesce to previous - assert_eq!(lr.extend_in_block(e40, i41, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e30, i41)]); - - // Coalesce to next - assert_eq!(lr.extend_in_block(e20, i21, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e20, i41)]); - - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e40, i41, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e40, i41)]); - - assert_eq!(lr.extend_in_block(e20, i21, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e20, i21), (e40, i41)]); - - // Coalesce to previous and next - assert_eq!(lr.extend_in_block(e30, i31, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e20, i41)]); - } -} diff --git a/cranelift/codegen/src/regalloc/mod.rs b/cranelift/codegen/src/regalloc/mod.rs deleted file mode 100644 index 581acc408e..0000000000 --- a/cranelift/codegen/src/regalloc/mod.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Register allocation. -//! -//! This module contains data structures and algorithms used for register allocation. - -pub mod coloring; -pub mod live_value_tracker; -pub mod liveness; -pub mod liverange; -pub mod register_set; -pub mod virtregs; - -mod affinity; -mod branch_splitting; -mod coalescing; -mod context; -mod diversion; -mod pressure; -mod reload; -mod safepoint; -mod solver; -mod spilling; - -pub use self::context::Context; -pub use self::diversion::{EntryRegDiversions, RegDiversions}; -pub use self::register_set::RegisterSet; -pub use self::safepoint::emit_stack_maps; diff --git a/cranelift/codegen/src/regalloc/pressure.rs b/cranelift/codegen/src/regalloc/pressure.rs deleted file mode 100644 index aa83037041..0000000000 --- a/cranelift/codegen/src/regalloc/pressure.rs +++ /dev/null @@ -1,371 +0,0 @@ -//! Register pressure tracking. -//! -//! SSA-based register allocation depends on a spilling phase that "lowers register pressure -//! sufficiently". This module defines the data structures needed to measure register pressure -//! accurately enough to guarantee that the coloring phase will not run out of registers. -//! -//! Ideally, measuring register pressure amounts to simply counting the number of live registers at -//! any given program point. This simplistic method has two problems: -//! -//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point -//! register banks, so we need to at least count the number of live registers in each register -//! bank separately. -//! -//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM -//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register. -//! This makes it difficult to accurately measure register pressure. -//! -//! This module deals with the problems via *register banks* and *top-level register classes*. -//! Register classes in different register banks are completely independent, so we can count -//! registers in one bank without worrying about the other bank at all. -//! -//! All register classes have a unique top-level register class, and we will count registers for -//! each top-level register class individually. However, a register bank can have multiple -//! top-level register classes that interfere with each other, so all top-level counts need to -//! be considered when determining how many more registers can be allocated. -//! -//! Currently, the only register bank with multiple top-level registers is the `arm32` -//! floating-point register bank which has `S`, `D`, and `Q` top-level classes. -//! -//! # Base and transient counts -//! -//! We maintain two separate register counts per top-level register class: base counts and -//! transient counts. The base counts are adjusted with the `take` and `free` functions. The -//! transient counts are adjusted with `take_transient` and `free_transient`. - -// Remove once we're using the pressure tracker. -#![allow(dead_code)] - -use crate::isa::registers::{RegClass, RegClassMask, RegInfo}; -use crate::regalloc::RegisterSet; -use core::cmp::min; -use core::fmt; -use core::iter::ExactSizeIterator; -use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS; - -/// Information per top-level register class. -/// -/// Everything but the counts is static information computed from the constructor arguments. -#[derive(Default)] -struct TopRC { - /// Number of registers currently used from this register class. - base_count: u32, - transient_count: u32, - - /// Max number of registers that can be allocated. - limit: u32, - - /// Register units per register. - width: u8, - - /// The first aliasing top-level RC. - first_toprc: u8, - - /// The number of aliasing top-level RCs. - num_toprcs: u8, -} - -impl TopRC { - fn total_count(&self) -> u32 { - self.base_count + self.transient_count - } -} - -pub struct Pressure { - /// Bit mask of top-level register classes that are aliased by other top-level register classes. - /// Unaliased register classes can use a simpler interference algorithm. - aliased: RegClassMask, - - /// Current register counts per top-level register class. - toprc: [TopRC; MAX_TRACKED_TOP_RCS], -} - -impl Pressure { - /// Create a new register pressure tracker. - pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self { - let mut p = Self { - aliased: 0, - toprc: Default::default(), - }; - - // Get the layout of aliasing top-level register classes from the register banks. - for bank in reginfo.banks { - let first = bank.first_toprc; - let num = bank.num_toprcs; - - if bank.pressure_tracking { - for rc in &mut p.toprc[first..first + num] { - rc.first_toprc = first as u8; - rc.num_toprcs = num as u8; - } - - // Flag the top-level register classes with aliases. - if num > 1 { - p.aliased |= ((1 << num) - 1) << first; - } - } else { - // This bank has no pressure tracking, so its top-level register classes may exceed - // `MAX_TRACKED_TOPRCS`. Fill in dummy entries. - for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] { - // These aren't used if we don't set the `aliased` bit. - rc.first_toprc = !0; - rc.limit = !0; - } - } - } - - // Compute per-class limits from `usable`. - for (toprc, rc) in p - .toprc - .iter_mut() - .take_while(|t| t.num_toprcs > 0) - .zip(reginfo.classes) - { - toprc.limit = usable.iter(rc).len() as u32; - toprc.width = rc.width; - } - - p - } - - /// Check for an available register in the register class `rc`. - /// - /// If it is possible to allocate one more register from `rc`'s top-level register class, - /// returns 0. - /// - /// If not, returns a bit-mask of top-level register classes that are interfering. Register - /// pressure should be eased in one of the returned top-level register classes before calling - /// `can_take()` to check again. - fn check_avail(&self, rc: RegClass) -> RegClassMask { - let entry = match self.toprc.get(rc.toprc as usize) { - None => return 0, // Not a pressure tracked bank. - Some(e) => e, - }; - let mask = 1 << rc.toprc; - if (self.aliased & mask) == 0 { - // This is a simple unaliased top-level register class. - if entry.total_count() < entry.limit { - 0 - } else { - mask - } - } else { - // This is the more complicated case. The top-level register class has aliases. - self.check_avail_aliased(entry) - } - } - - /// Check for an available register in a top-level register class that may have aliases. - /// - /// This is the out-of-line slow path for `check_avail()`. - fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask { - let first = usize::from(entry.first_toprc); - let num = usize::from(entry.num_toprcs); - let width = u32::from(entry.width); - let ulimit = entry.limit * width; - - // Count up the number of available register units. - let mut units = 0; - for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) { - let rcw = u32::from(rc.width); - // If `rc.width` is smaller than `width`, each register in `rc` could potentially block - // one of ours. This is assuming that none of the smaller registers are straddling the - // bigger ones. - // - // If `rc.width` is larger than `width`, we are also assuming that the registers are - // aligned and `rc.width` is a multiple of `width`. - let u = if rcw < width { - // We can't take more than the total number of register units in the class. - // This matters for arm32 S-registers which can only ever lock out 16 D-registers. - min(rc.total_count() * width, rc.limit * rcw) - } else { - rc.total_count() * rcw - }; - - // If this top-level RC on its own is responsible for exceeding our limit, return it - // early to guarantee that registers here are spilled before spilling other registers - // unnecessarily. - if u >= ulimit { - return 1 << rci; - } - - units += u; - } - - // We've counted up the worst-case number of register units claimed by all aliasing - // classes. Compare to the unit limit in this class. - if units < ulimit { - 0 - } else { - // Registers need to be spilled from any one of the aliasing classes. - ((1 << num) - 1) << first - } - } - - /// Take a register from `rc`. - /// - /// This does not check if there are enough registers available. - pub fn take(&mut self, rc: RegClass) { - if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { - t.base_count += 1; - } - } - - /// Free a register in `rc`. - pub fn free(&mut self, rc: RegClass) { - if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { - t.base_count -= 1; - } - } - - /// Reset all counts to 0, both base and transient. - pub fn reset(&mut self) { - for e in &mut self.toprc { - e.base_count = 0; - e.transient_count = 0; - } - } - - /// Try to increment a transient counter. - /// - /// This will fail if there are not enough registers available. - pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> { - let mask = self.check_avail(rc); - if mask == 0 { - if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { - t.transient_count += 1; - } - - Ok(()) - } else { - Err(mask) - } - } - - /// Reset all transient counts to 0. - pub fn reset_transient(&mut self) { - for e in &mut self.toprc { - e.transient_count = 0; - } - } - - /// Preserve the transient counts by transferring them to the base counts. - pub fn preserve_transient(&mut self) { - for e in &mut self.toprc { - e.base_count += e.transient_count; - e.transient_count = 0; - } - } -} - -impl fmt::Display for Pressure { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Pressure[")?; - for rc in &self.toprc { - if rc.limit > 0 && rc.limit < !0 { - write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?; - } - } - write!(f, " ]") - } -} - -#[cfg(test)] -#[cfg(feature = "arm32")] -mod tests { - use super::Pressure; - use crate::isa::registers::{RegBank, RegClassData}; - use crate::isa::{RegClass, RegInfo, RegUnit}; - use crate::regalloc::RegisterSet; - use core::borrow::Borrow; - - // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info - // about registers, so we directly access `INFO` from registers-arm32.rs. - include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); - - // Get a register class by name. - fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { - reginfo - .classes - .iter() - .find(|rc| rc.name == name) - .expect("Can't find named register class.") - } - - #[test] - fn basic_counting() { - let reginfo = INFO.borrow(); - let gpr = rc_by_name(®info, "GPR"); - let s = rc_by_name(®info, "S"); - - let regs = RegisterSet::new(); - - let mut pressure = Pressure::new(®info, ®s); - let mut count = 0; - while pressure.check_avail(gpr) == 0 { - pressure.take(gpr); - count += 1; - } - assert_eq!(count, 16); - assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); - assert_eq!(pressure.check_avail(s), 0); - pressure.free(gpr); - assert_eq!(pressure.check_avail(gpr), 0); - pressure.take(gpr); - assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); - assert_eq!(pressure.check_avail(s), 0); - pressure.reset(); - assert_eq!(pressure.check_avail(gpr), 0); - assert_eq!(pressure.check_avail(s), 0); - } - - #[test] - fn arm_float_bank() { - let reginfo = INFO.borrow(); - let s = rc_by_name(®info, "S"); - let d = rc_by_name(®info, "D"); - let q = rc_by_name(®info, "Q"); - let regs = RegisterSet::new(); - - let mut pressure = Pressure::new(®info, ®s); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - // Allocating a single S-register should not affect availability. - pressure.take(s); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - pressure.take(d); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - pressure.take(q); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - // Take a total of 16 S-regs. - for _ in 1..16 { - pressure.take(s); - } - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs. - for _ in 0..6 { - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - pressure.take(q); - } - - // We've taken 16 S, 1 D, and 7 Qs. - assert!(pressure.check_avail(s) != 0); - assert_eq!(pressure.check_avail(d), 0); - assert!(pressure.check_avail(q) != 0); - } -} diff --git a/cranelift/codegen/src/regalloc/register_set.rs b/cranelift/codegen/src/regalloc/register_set.rs deleted file mode 100644 index 52b8a6fa0a..0000000000 --- a/cranelift/codegen/src/regalloc/register_set.rs +++ /dev/null @@ -1,391 +0,0 @@ -//! Set of allocatable registers as a bit vector of register units. -//! -//! While allocating registers, we need to keep track of which registers are available and which -//! registers are in use. Since registers can alias in different ways, we track this via the -//! "register unit" abstraction. Every register contains one or more register units. Registers that -//! share a register unit can't be in use at the same time. - -use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask}; -use core::char; -use core::fmt; -use core::iter::ExactSizeIterator; -use core::mem::size_of_val; - -/// Set of registers available for allocation. -#[derive(Clone)] -pub struct RegisterSet { - avail: RegUnitMask, -} - -// Given a register class and a register unit in the class, compute a word index and a bit mask of -// register units representing that register. -// -// Note that a register is not allowed to straddle words. -fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) { - // Bit mask representing the register. It is `rc.width` consecutive units. - let width_bits = (1 << rc.width) - 1; - // Index into avail[] of the word containing `reg`. - let word_index = (reg / 32) as usize; - // The actual bits in the word that cover `reg`. - let reg_bits = width_bits << (reg % 32); - - (word_index, reg_bits) -} - -impl RegisterSet { - /// Create a new register set with all registers available. - /// - /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of - /// allocatable registers where reserved registers have been filtered out. - pub fn new() -> Self { - Self { avail: [!0; 3] } - } - - /// Create a new register set with no registers available. - pub fn empty() -> Self { - Self { avail: [0; 3] } - } - - /// Returns `true` if the specified register is available. - pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool { - let (idx, bits) = bitmask(rc, reg); - (self.avail[idx] & bits) == bits - } - - /// Allocate `reg` from `rc` so it is no longer available. - /// - /// It is an error to take a register that doesn't have all of its register units available. - pub fn take(&mut self, rc: RegClass, reg: RegUnit) { - let (idx, bits) = bitmask(rc, reg); - debug_assert!( - (self.avail[idx] & bits) == bits, - "{}:{} not available in {}", - rc, - rc.info.display_regunit(reg), - self.display(rc.info) - ); - self.avail[idx] &= !bits; - } - - /// Return `reg` and all of its register units to the set of available registers. - pub fn free(&mut self, rc: RegClass, reg: RegUnit) { - let (idx, bits) = bitmask(rc, reg); - debug_assert!( - (self.avail[idx] & bits) == 0, - "{}:{} is already free in {}", - rc, - rc.info.display_regunit(reg), - self.display(rc.info) - ); - self.avail[idx] |= bits; - } - - /// Return an iterator over all available registers belonging to the register class `rc`. - /// - /// This doesn't allocate anything from the set; use `take()` for that. - pub fn iter(&self, rc: RegClass) -> RegSetIter { - // Start by copying the RC mask. It is a single set bit for each register in the class. - let mut rsi = RegSetIter { regs: rc.mask }; - - // Mask out the unavailable units. - for idx in 0..self.avail.len() { - // If a single unit in a register is unavailable, the whole register can't be used. If - // a register straddles a word boundary, it will be marked as unavailable. There's an - // assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that. - for i in 0..rc.width { - rsi.regs[idx] &= self.avail[idx] >> i; - } - } - rsi - } - - /// Check if any register units allocated out of this set interferes with units allocated out - /// of `other`. - /// - /// This assumes that unused bits are 1. - pub fn interferes_with(&self, other: &Self) -> bool { - self.avail - .iter() - .zip(&other.avail) - .any(|(&x, &y)| (x | y) != !0) - } - - /// Intersect this set of registers with `other`. This has the effect of removing any register - /// units from this set that are not in `other`. - pub fn intersect(&mut self, other: &Self) { - for (x, &y) in self.avail.iter_mut().zip(&other.avail) { - *x &= y; - } - } - - /// Return an object that can display this register set, using the register info from the - /// target ISA. - pub fn display<'a, R: Into>>(&self, regs: R) -> DisplayRegisterSet<'a> { - DisplayRegisterSet(self.clone(), regs.into()) - } -} - -/// Iterator over available registers in a register class. -#[derive(Clone)] -pub struct RegSetIter { - regs: RegUnitMask, -} - -impl Iterator for RegSetIter { - type Item = RegUnit; - - fn next(&mut self) -> Option { - let mut unit_offset = 0; - - // Find the first set bit in `self.regs`. - for word in &mut self.regs { - if *word != 0 { - // Compute the register unit number from the lowest set bit in the word. - let unit = unit_offset + word.trailing_zeros() as RegUnit; - - // Clear that lowest bit so we won't find it again. - *word &= *word - 1; - - return Some(unit); - } - // How many register units was there in the word? This is a constant 32 for `u32` etc. - unit_offset += 8 * size_of_val(word) as RegUnit; - } - - // All of `self.regs` is 0. - None - } - - fn size_hint(&self) -> (usize, Option) { - let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum(); - (bits, Some(bits)) - } -} - -impl RegSetIter { - pub fn rnext(&mut self) -> Option { - let num_words = self.regs.len(); - let bits_per_word = 8 * size_of_val(&self.regs[0]); - - // Find the last set bit in `self.regs`. - for i in 0..num_words { - let word_ix = num_words - 1 - i; - - let word = &mut self.regs[word_ix]; - if *word != 0 { - let lzeroes = word.leading_zeros() as usize; - - // Clear that highest bit so we won't find it again. - *word &= !(1 << (bits_per_word - 1 - lzeroes)); - - return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit); - } - } - - // All of `self.regs` is 0. - None - } -} - -impl ExactSizeIterator for RegSetIter {} - -/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA. -pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayRegisterSet<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "[")?; - match self.1 { - None => { - for w in &self.0.avail { - write!(f, " #{:08x}", w)?; - } - } - Some(reginfo) => { - let toprcs = reginfo - .banks - .iter() - .map(|b| b.first_toprc + b.num_toprcs) - .max() - .expect("No register banks"); - for rc in ®info.classes[0..toprcs] { - if rc.width == 1 { - let bank = ®info.banks[rc.bank as usize]; - write!(f, " {}: ", rc)?; - for offset in 0..bank.units { - let reg = bank.first_unit + offset; - if !rc.contains(reg) { - continue; - } - if !self.0.is_avail(rc, reg) { - write!(f, "-")?; - continue; - } - // Display individual registers as either the second letter of their - // name or the last digit of their number. - // This works for x86 (rax, rbx, ...) and for numbered regs. - write!( - f, - "{}", - bank.names - .get(offset as usize) - .and_then(|name| name.chars().nth(1)) - .unwrap_or_else(|| char::from_digit( - u32::from(offset % 10), - 10 - ) - .unwrap()) - )?; - } - } - } - } - } - write!(f, " ]") - } -} - -impl fmt::Display for RegisterSet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(None).fmt(f) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::isa::registers::{RegClass, RegClassData}; - use alloc::vec::Vec; - - // Register classes for testing. - const GPR: RegClass = &RegClassData { - name: "GPR", - index: 0, - width: 1, - bank: 0, - toprc: 0, - first: 28, - subclasses: 0, - mask: [0xf0000000, 0x0000000f, 0], - info: &INFO, - pinned_reg: None, - }; - - const DPR: RegClass = &RegClassData { - name: "DPR", - index: 0, - width: 2, - bank: 0, - toprc: 0, - first: 28, - subclasses: 0, - mask: [0x50000000, 0x0000000a, 0], - info: &INFO, - pinned_reg: None, - }; - - const INFO: RegInfo = RegInfo { - banks: &[], - classes: &[], - }; - - const RSI_1: RegSetIter = RegSetIter { - regs: [0x31415927, 0x27182818, 0x14141356], - }; - - const RSI_2: RegSetIter = RegSetIter { - regs: [0x00000000, 0x00000000, 0x00000000], - }; - - const RSI_3: RegSetIter = RegSetIter { - regs: [0xffffffff, 0xffffffff, 0xffffffff], - }; - - fn reverse_regset_iteration_work(rsi: &RegSetIter) { - // Check the reverse iterator by comparing its output with the forward iterator. - let rsi_f = (*rsi).clone(); - let results_f = rsi_f.collect::>(); - - let mut rsi_r = (*rsi).clone(); - let mut results_r = Vec::::new(); - while let Some(r) = rsi_r.rnext() { - results_r.push(r); - } - - let len_f = results_f.len(); - let len_r = results_r.len(); - assert_eq!(len_f, len_r); - - for i in 0..len_f { - assert_eq!(results_f[i], results_r[len_f - 1 - i]); - } - } - - #[test] - fn reverse_regset_iteration() { - reverse_regset_iteration_work(&RSI_1); - reverse_regset_iteration_work(&RSI_2); - reverse_regset_iteration_work(&RSI_3); - } - - #[test] - fn put_and_take() { - let mut regs = RegisterSet::new(); - - // `GPR` has units 28-36. - assert_eq!(regs.iter(GPR).len(), 8); - assert_eq!(regs.iter(GPR).count(), 8); - assert_eq!(regs.iter(DPR).collect::>(), [28, 30, 33, 35]); - - assert!(regs.is_avail(GPR, 29)); - regs.take(&GPR, 29); - assert!(!regs.is_avail(GPR, 29)); - - assert_eq!(regs.iter(GPR).count(), 7); - assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); - - assert!(regs.is_avail(GPR, 30)); - regs.take(&GPR, 30); - assert!(!regs.is_avail(GPR, 30)); - - assert_eq!(regs.iter(GPR).count(), 6); - assert_eq!(regs.iter(DPR).collect::>(), [33, 35]); - - assert!(regs.is_avail(GPR, 32)); - regs.take(&GPR, 32); - assert!(!regs.is_avail(GPR, 32)); - - assert_eq!(regs.iter(GPR).count(), 5); - assert_eq!(regs.iter(DPR).collect::>(), [33, 35]); - - regs.free(&GPR, 30); - assert!(regs.is_avail(GPR, 30)); - assert!(!regs.is_avail(GPR, 29)); - assert!(!regs.is_avail(GPR, 32)); - - assert_eq!(regs.iter(GPR).count(), 6); - assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); - - regs.free(&GPR, 32); - assert!(regs.is_avail(GPR, 31)); - assert!(!regs.is_avail(GPR, 29)); - assert!(regs.is_avail(GPR, 32)); - - assert_eq!(regs.iter(GPR).count(), 7); - assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); - } - - #[test] - fn interference() { - let mut regs1 = RegisterSet::new(); - let mut regs2 = RegisterSet::new(); - - assert!(!regs1.interferes_with(®s2)); - regs1.take(&GPR, 32); - assert!(!regs1.interferes_with(®s2)); - regs2.take(&GPR, 31); - assert!(!regs1.interferes_with(®s2)); - regs1.intersect(®s2); - assert!(regs1.interferes_with(®s2)); - } -} diff --git a/cranelift/codegen/src/regalloc/reload.rs b/cranelift/codegen/src/regalloc/reload.rs deleted file mode 100644 index d853ab5b18..0000000000 --- a/cranelift/codegen/src/regalloc/reload.rs +++ /dev/null @@ -1,484 +0,0 @@ -//! Reload pass -//! -//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to -//! insert `spill` and `fill` instructions such that instruction operands expecting a register will -//! get a value with register affinity, and operands expecting a stack slot will get a value with -//! stack affinity. -//! -//! The secondary responsibility of the reload pass is to reuse values in registers as much as -//! possible to minimize the number of `fill` instructions needed. This must not cause the register -//! pressure limits to be exceeded. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::entity::{SparseMap, SparseMapValue}; -use crate::ir::{AbiParam, ArgumentLoc, InstBuilder}; -use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc}; -use crate::isa::RegClass; -use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; -use crate::regalloc::liveness::Liveness; -use crate::timing; -use crate::topo_order::TopoOrder; -use alloc::vec::Vec; - -/// Reusable data structures for the reload pass. -pub struct Reload { - candidates: Vec, - reloads: SparseMap, -} - -/// Context data structure that gets instantiated once per pass. -struct Context<'a> { - cur: EncCursor<'a>, - - // Cached ISA information. - // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. - encinfo: EncInfo, - - // References to contextual data structures we need. - domtree: &'a DominatorTree, - liveness: &'a mut Liveness, - topo: &'a mut TopoOrder, - - candidates: &'a mut Vec, - reloads: &'a mut SparseMap, -} - -impl Reload { - /// Create a new blank reload pass. - pub fn new() -> Self { - Self { - candidates: Vec::new(), - reloads: SparseMap::new(), - } - } - - /// Clear all data structures in this reload pass. - pub fn clear(&mut self) { - self.candidates.clear(); - self.reloads.clear(); - } - - /// Run the reload algorithm over `func`. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - domtree: &DominatorTree, - liveness: &mut Liveness, - topo: &mut TopoOrder, - tracker: &mut LiveValueTracker, - ) { - let _tt = timing::ra_reload(); - log::trace!("Reload for:\n{}", func.display(isa)); - let mut ctx = Context { - cur: EncCursor::new(func, isa), - encinfo: isa.encoding_info(), - domtree, - liveness, - topo, - candidates: &mut self.candidates, - reloads: &mut self.reloads, - }; - ctx.run(tracker) - } -} - -/// A reload candidate. -/// -/// This represents a stack value that is used by the current instruction where a register is -/// needed. -struct ReloadCandidate { - argidx: usize, - value: Value, - regclass: RegClass, -} - -/// A Reloaded value. -/// -/// This represents a value that has been reloaded into a register value from the stack. -struct ReloadedValue { - stack: Value, - reg: Value, -} - -impl SparseMapValue for ReloadedValue { - fn key(&self) -> Value { - self.stack - } -} - -impl<'a> Context<'a> { - fn run(&mut self, tracker: &mut LiveValueTracker) { - self.topo.reset(self.cur.func.layout.blocks()); - while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { - self.visit_block(block, tracker); - } - } - - fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Reloading {}:", block); - self.visit_block_header(block, tracker); - tracker.drop_dead_params(); - - // visit_block_header() places us at the first interesting instruction in the block. - while let Some(inst) = self.cur.current_inst() { - if !self.cur.func.dfg[inst].opcode().is_ghost() { - // This instruction either has an encoding or has ABI constraints, so visit it to - // insert spills and fills as needed. - let encoding = self.cur.func.encodings[inst]; - self.visit_inst(block, inst, encoding, tracker); - tracker.drop_dead(inst); - } else { - // This is a ghost instruction with no encoding and no extra constraints, so we can - // just skip over it. - self.cur.next_inst(); - } - } - } - - /// Process the block parameters. Move to the next instruction in the block to be processed - fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { - let (liveins, args) = tracker.block_top( - block, - &self.cur.func.dfg, - self.liveness, - &self.cur.func.layout, - self.domtree, - ); - - if self.cur.func.layout.entry_block() == Some(block) { - debug_assert_eq!(liveins.len(), 0); - self.visit_entry_params(block, args); - } else { - self.visit_block_params(block, args); - } - } - - /// Visit the parameters on the entry block. - /// These values have ABI constraints from the function signature. - fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) { - debug_assert_eq!(self.cur.func.signature.params.len(), args.len()); - self.cur.goto_first_inst(block); - - for (arg_idx, arg) in args.iter().enumerate() { - let abi = self.cur.func.signature.params[arg_idx]; - match abi.location { - ArgumentLoc::Reg(_) => { - if arg.affinity.is_stack() { - // An incoming register parameter was spilled. Replace the parameter value - // with a temporary register value that is immediately spilled. - let reg = self - .cur - .func - .dfg - .replace_block_param(arg.value, abi.value_type); - let affinity = Affinity::abi(&abi, self.cur.isa); - self.liveness.create_dead(reg, block, affinity); - self.insert_spill(block, arg.value, reg); - } - } - ArgumentLoc::Stack(_) => { - debug_assert!(arg.affinity.is_stack()); - } - ArgumentLoc::Unassigned => panic!("Unexpected ABI location"), - } - } - } - - fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) { - self.cur.goto_first_inst(block); - } - - /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction - /// that needs processing. - fn visit_inst( - &mut self, - block: Block, - inst: Inst, - encoding: Encoding, - tracker: &mut LiveValueTracker, - ) { - self.cur.use_srcloc(inst); - - // Get the operand constraints for `inst` that we are trying to satisfy. - let constraints = self.encinfo.operand_constraints(encoding); - - // Identify reload candidates. - debug_assert!(self.candidates.is_empty()); - self.find_candidates(inst, constraints); - - // If we find a copy from a stack slot to the same stack slot, replace - // it with a `copy_nop` but otherwise ignore it. In particular, don't - // generate a reload immediately followed by a spill. The `copy_nop` - // has a zero-length encoding, so will disappear at emission time. - if let InstructionData::Unary { - opcode: Opcode::Copy, - arg, - } = self.cur.func.dfg[inst] - { - let dst_vals = self.cur.func.dfg.inst_results(inst); - if dst_vals.len() == 1 { - let dst_val = dst_vals[0]; - let can_transform = match ( - self.cur.func.locations[arg], - self.cur.func.locations[dst_val], - ) { - (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => { - src_slot == dst_slot && { - let src_ty = self.cur.func.dfg.value_type(arg); - let dst_ty = self.cur.func.dfg.value_type(dst_val); - debug_assert!(src_ty == dst_ty); - // This limits the transformation to copies of the - // types: I128 I64 I32 I16 I8 F64 and F32, since that's - // the set of `copy_nop` encodings available. - src_ty.is_int() || src_ty.is_float() - } - } - _ => false, - }; - if can_transform { - // Convert the instruction into a `copy_nop`. - self.cur.func.dfg.replace(inst).copy_nop(arg); - let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok, "copy_nop encoding missing for this type"); - - // And move on to the next insn. - self.reloads.clear(); - let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - self.cur.next_inst(); - self.candidates.clear(); - return; - } - } - } - - // Deal with all instructions not special-cased by the immediately - // preceding fragment. - if let InstructionData::Unary { - opcode: Opcode::Copy, - .. - } = self.cur.func.dfg[inst] - { - self.reload_copy_candidates(inst); - } else { - self.reload_inst_candidates(block, inst); - } - - // TODO: Reuse reloads for future instructions. - self.reloads.clear(); - - let (_throughs, _kills, defs) = - tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - - // Advance to the next instruction so we can insert any spills after the instruction. - self.cur.next_inst(); - - // Rewrite register defs that need to be spilled. - // - // Change: - // - // v2 = inst ... - // - // Into: - // - // v7 = inst ... - // v2 = spill v7 - // - // That way, we don't need to rewrite all future uses of v2. - if let Some(constraints) = constraints { - for (lv, op) in defs.iter().zip(constraints.outs) { - if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack { - if let InstructionData::Unary { - opcode: Opcode::Copy, - arg, - } = self.cur.func.dfg[inst] - { - self.cur.func.dfg.replace(inst).spill(arg); - let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok); - } else { - let value_type = self.cur.func.dfg.value_type(lv.value); - let reg = self.cur.func.dfg.replace_result(lv.value, value_type); - self.liveness.create_dead(reg, inst, Affinity::new(op)); - self.insert_spill(block, lv.value, reg); - } - } - } - } - - // Same thing for spilled call return values. - let retvals = &defs[self.cur.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_results()..]; - if !retvals.is_empty() { - let sig = self - .cur - .func - .dfg - .call_signature(inst) - .expect("Extra results on non-call instruction"); - for (i, lv) in retvals.iter().enumerate() { - let abi = self.cur.func.dfg.signatures[sig].returns[i]; - debug_assert!( - abi.location.is_reg(), - "expected reg; got {:?}", - abi.location - ); - if lv.affinity.is_stack() { - let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type); - self.liveness - .create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa)); - self.insert_spill(block, lv.value, reg); - } - } - } - } - - // Reload the current candidates for the given `inst`. - fn reload_inst_candidates(&mut self, block: Block, inst: Inst) { - // Insert fill instructions before `inst` and replace `cand.value` with the filled value. - for cand in self.candidates.iter_mut() { - if let Some(reload) = self.reloads.get(cand.value) { - cand.value = reload.reg; - continue; - } - - let reg = self.cur.ins().fill(cand.value); - let fill = self.cur.built_inst(); - - self.reloads.insert(ReloadedValue { - stack: cand.value, - reg, - }); - cand.value = reg; - - // Create a live range for the new reload. - let affinity = Affinity::Reg(cand.regclass.into()); - self.liveness.create_dead(reg, fill, affinity); - self.liveness - .extend_locally(reg, block, inst, &self.cur.func.layout); - } - - // Rewrite instruction arguments. - // - // Only rewrite those arguments that were identified as candidates. This leaves block - // arguments on branches as-is without rewriting them. A spilled block argument needs to stay - // spilled because the matching block parameter is going to be in the same virtual register - // and therefore the same stack slot as the block argument value. - if !self.candidates.is_empty() { - let args = self.cur.func.dfg.inst_args_mut(inst); - while let Some(cand) = self.candidates.pop() { - args[cand.argidx] = cand.value; - } - } - } - - // Reload the current candidates for the given copy `inst`. - // - // As an optimization, replace a copy instruction where the argument has been spilled with - // a fill instruction. - fn reload_copy_candidates(&mut self, inst: Inst) { - // Copy instructions can only have one argument. - debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1); - - if let Some(cand) = self.candidates.pop() { - self.cur.func.dfg.replace(inst).fill(cand.value); - let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok); - } - } - - // Find reload candidates for `inst` and add them to `self.candidates`. - // - // These are uses of spilled values where the operand constraint requires a register. - fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) { - let args = self.cur.func.dfg.inst_args(inst); - - if let Some(constraints) = constraints { - for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { - if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() { - self.candidates.push(ReloadCandidate { - argidx, - value: arg, - regclass: op.regclass, - }) - } - } - } - - // If we only have the fixed arguments, we're done now. - let offset = self.cur.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - if args.len() == offset { - return; - } - let var_args = &args[offset..]; - - // Handle ABI arguments. - if let Some(sig) = self.cur.func.dfg.call_signature(inst) { - handle_abi_args( - self.candidates, - &self.cur.func.dfg.signatures[sig].params, - var_args, - offset, - self.cur.isa, - self.liveness, - ); - } else if self.cur.func.dfg[inst].opcode().is_return() { - handle_abi_args( - self.candidates, - &self.cur.func.signature.returns, - var_args, - offset, - self.cur.isa, - self.liveness, - ); - } - } - - /// Insert a spill at `pos` and update data structures. - /// - /// - Insert `stack = spill reg` at `pos`, and assign an encoding. - /// - Move the `stack` live range starting point to the new instruction. - /// - Extend the `reg` live range to reach the new instruction. - fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) { - self.cur.ins().with_result(stack).spill(reg); - let inst = self.cur.built_inst(); - - // Update live ranges. - self.liveness.move_def_locally(stack, inst); - self.liveness - .extend_locally(reg, block, inst, &self.cur.func.layout); - } -} - -/// Find reload candidates in the instruction's ABI variable arguments. This handles both -/// return values and call arguments. -fn handle_abi_args( - candidates: &mut Vec, - abi_types: &[AbiParam], - var_args: &[Value], - offset: usize, - isa: &dyn TargetIsa, - liveness: &Liveness, -) { - debug_assert_eq!(abi_types.len(), var_args.len()); - for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) { - if abi.location.is_reg() { - let lv = liveness.get(arg).expect("Missing live range for ABI arg"); - if lv.affinity.is_stack() { - candidates.push(ReloadCandidate { - argidx, - value: arg, - regclass: isa.regclass_for_abi_type(abi.value_type), - }); - } - } - } -} diff --git a/cranelift/codegen/src/regalloc/safepoint.rs b/cranelift/codegen/src/regalloc/safepoint.rs deleted file mode 100644 index 2686c57277..0000000000 --- a/cranelift/codegen/src/regalloc/safepoint.rs +++ /dev/null @@ -1,65 +0,0 @@ -use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::inst_predicates::is_safepoint; -use crate::ir::{Function, InstBuilder}; -use crate::isa::TargetIsa; -use crate::regalloc::live_value_tracker::LiveValueTracker; -use crate::regalloc::liveness::Liveness; -use alloc::vec::Vec; - -fn insert_and_encode_safepoint<'f>( - pos: &mut FuncCursor<'f>, - tracker: &LiveValueTracker, - isa: &dyn TargetIsa, -) { - // Iterate through all live values, collect only the references. - let live_ref_values = tracker - .live() - .iter() - .filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref()) - .map(|live_val| live_val.value) - .collect::>(); - - if !live_ref_values.is_empty() { - pos.ins().safepoint(&live_ref_values); - // Move cursor to the new safepoint instruction to encode it. - if let Some(inst) = pos.prev_inst() { - let ok = pos.func.update_encoding(inst, isa).is_ok(); - debug_assert!(ok); - } - // Restore cursor position. - pos.next_inst(); - } -} - -// The emit_stack_maps() function analyzes each instruction to retrieve the liveness of -// the defs and operands by traversing a function's blocks in layout order. -pub fn emit_stack_maps( - func: &mut Function, - domtree: &DominatorTree, - liveness: &Liveness, - tracker: &mut LiveValueTracker, - isa: &dyn TargetIsa, -) { - let mut curr = func.layout.entry_block(); - - while let Some(block) = curr { - tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree); - tracker.drop_dead_params(); - let mut pos = FuncCursor::new(func); - - // From the top of the block, step through the instructions. - pos.goto_top(block); - - while let Some(inst) = pos.next_inst() { - if is_safepoint(&pos.func, inst) { - insert_and_encode_safepoint(&mut pos, tracker, isa); - } - - // Process the instruction and get rid of dead values. - tracker.process_inst(inst, &pos.func.dfg, liveness); - tracker.drop_dead(inst); - } - curr = func.layout.next_block(block); - } -} diff --git a/cranelift/codegen/src/regalloc/solver.rs b/cranelift/codegen/src/regalloc/solver.rs deleted file mode 100644 index 3971ff4c55..0000000000 --- a/cranelift/codegen/src/regalloc/solver.rs +++ /dev/null @@ -1,1382 +0,0 @@ -//! Constraint solver for register coloring. -//! -//! The coloring phase of SSA-based register allocation is very simple in theory, but in practice -//! it is complicated by the various constraints imposed by individual instructions: -//! -//! - Call and return instructions have to satisfy ABI requirements for arguments and return -//! values. -//! - Values live across a call must be in a callee-saved register. -//! - Some instructions have operand constraints such as register sub-classes, fixed registers, or -//! tied operands. -//! -//! # The instruction register coloring problem -//! -//! The constraint solver addresses the problem of satisfying the constraints of a single -//! instruction. We have: -//! -//! - A set of values that are live in registers before the instruction, with current register -//! assignments. Some are used by the instruction, some are not. -//! - A subset of the live register values that are killed by the instruction. -//! - A set of new register values that are defined by the instruction. -//! -//! We are not concerned with stack values at all. The reload pass ensures that all values required -//! to be in a register by the instruction are already in a register. -//! -//! A solution to the register coloring problem consists of: -//! -//! - Register reassignment prescriptions for a subset of the live register values. -//! - Register assignments for the instruction's defined values. -//! -//! The solution ensures that when live registers are reassigned as prescribed before the -//! instruction, all its operand constraints are satisfied, and the definition assignments won't -//! conflict. -//! -//! # Register diversions and global interference -//! -//! We can divert register values temporarily to satisfy constraints, but we need to put the -//! values back into their originally assigned register locations before leaving the block. -//! Otherwise, values won't be in the right register at the entry point of other blocks. -//! -//! Some values are *local*, and we don't need to worry about putting those values back since they -//! are not used in any other blocks. -//! -//! When we assign register locations to defines, we are assigning both the register used locally -//! immediately after the instruction and the register used globally when the defined value is used -//! in a different block. We need to avoid interference both locally at the instruction and globally. -//! -//! We have multiple mappings of values to registers: -//! -//! 1. The initial local mapping before the instruction. This includes any diversions from previous -//! instructions in the block, but not diversions for the current instruction. -//! 2. The local mapping after applying the additional reassignments required to satisfy the -//! constraints of the current instruction. -//! 3. The local mapping after the instruction. This excludes values killed by the instruction and -//! includes values defined by the instruction. -//! 4. The global mapping after the instruction. This mapping only contains values with global live -//! ranges, and it does not include any diversions. -//! -//! All four mappings must be kept free of interference. -//! -//! # Problems handled by previous passes. -//! -//! The constraint solver can only reassign registers, it can't create spill code, so some -//! constraints are handled by earlier passes: -//! -//! - There will be enough free registers available for the defines. Ensuring this is the primary -//! purpose of the spilling phase. -//! - When the same value is used for multiple operands, the intersection of operand constraints is -//! non-empty. The spilling phase will insert copies to handle mutually incompatible constraints, -//! such as when the same value is bound to two different function arguments. -//! - Values bound to tied operands must be killed by the instruction. Also enforced by the -//! spiller. -//! - Values used by register operands are in registers, and values used by stack operands are in -//! stack slots. This is enforced by the reload pass. -//! -//! # Solver algorithm -//! -//! The goal of the solver is to satisfy the instruction constraints with a minimal number of -//! register assignments before the instruction. -//! -//! 1. Compute the set of values used by operands with a fixed register constraint that isn't -//! already satisfied. These are mandatory predetermined reassignments. -//! 2. Compute the set of values that don't satisfy their register class constraint. These are -//! mandatory reassignments that we need to solve. -//! 3. Add the set of defines to the set of variables computed in 2. Exclude defines tied to an -//! input operand since their value is pre-determined. -//! -//! The set of values computed in 2. and 3. are the *variables* for the solver. Given a set of -//! variables, we can also compute a set of allocatable registers by removing the variables from -//! the set of assigned registers before the instruction. -//! -//! 1. For each variable, compute its domain as the intersection of the allocatable registers and -//! its register class constraint. -//! 2. Sort the variables in order of increasing domain size. -//! 3. Search for a solution that assigns each variable a register from its domain without -//! interference between variables. -//! -//! If the search fails to find a solution, we may need to reassign more registers. Find an -//! appropriate candidate among the set of live register values, add it as a variable and start -//! over. - -use super::RegisterSet; -use crate::dbg::DisplayList; -use crate::entity::{SparseMap, SparseMapValue}; -use crate::ir::Value; -use crate::isa::{RegClass, RegUnit}; -use crate::regalloc::register_set::RegSetIter; -use alloc::vec::Vec; -use core::cmp; -use core::fmt; -use core::mem; -use core::u16; - -/// A variable in the constraint problem. -/// -/// Variables represent register values that can be assigned to any register unit within the -/// constraint register class. This includes live register values that can be reassigned to a new -/// register and values defined by the instruction which must be assigned to a register. -/// -/// Besides satisfying the register class constraint, variables must also be mutually -/// non-interfering in up to three contexts: -/// -/// 1. Input side live registers, after applying all the reassignments. -/// 2. Output side live registers, considering all the local register diversions. -/// 3. Global live register, not considering any local diversions. -/// -pub struct Variable { - /// The value whose register assignment we're looking for. - pub value: Value, - - /// Original register unit holding this live value before the instruction, or `None` for a - /// value that is defined by the instruction. - from: Option, - - /// Avoid interference on the input side. - is_input: bool, - - /// Avoid interference on the output side. - is_output: bool, - - /// Avoid interference with the global registers. - is_global: bool, - - /// Number of registers available in the domain of this variable. - domain: u16, - - /// The assigned register unit after a full solution was found. - pub solution: RegUnit, - - /// Any solution must belong to the constraint register class. - constraint: RegClass, -} - -impl Variable { - fn new_live(value: Value, constraint: RegClass, from: RegUnit, is_output: bool) -> Self { - Self { - value, - constraint, - from: Some(from), - is_input: true, - is_output, - is_global: false, - domain: 0, - solution: !0, - } - } - - fn new_def(value: Value, constraint: RegClass, is_global: bool) -> Self { - Self { - value, - constraint, - from: None, - is_input: false, - is_output: true, - is_global, - domain: 0, - solution: !0, - } - } - - /// Does this variable represent a value defined by the current instruction? - pub fn is_define(&self) -> bool { - self.from.is_none() - } - - /// Get an iterator over possible register choices, given the available registers on the input - /// and output sides as well as the available global register set. - fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter { - if !self.is_output { - debug_assert!(!self.is_global, "Global implies output"); - debug_assert!(self.is_input, "Missing interference set"); - return iregs.iter(self.constraint); - } - - let mut r = oregs.clone(); - if self.is_input { - r.intersect(iregs); - } - if self.is_global { - r.intersect(gregs); - } - r.iter(self.constraint) - } -} - -impl fmt::Display for Variable { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}({}", self.value, self.constraint)?; - if let Some(reg) = self.from { - write!(f, ", from {}", self.constraint.info.display_regunit(reg))?; - } - if self.is_input { - write!(f, ", in")?; - } - if self.is_output { - write!(f, ", out")?; - } - if self.is_global { - write!(f, ", global")?; - } - if self.is_define() { - write!(f, ", def")?; - } - if self.domain > 0 { - write!(f, ", {}", self.domain)?; - } - write!(f, ")") - } -} - -#[derive(Clone, Debug)] -pub struct Assignment { - pub value: Value, - pub from: RegUnit, - pub to: RegUnit, - pub rc: RegClass, -} - -impl SparseMapValue for Assignment { - fn key(&self) -> Value { - self.value - } -} - -impl fmt::Display for Assignment { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let ri = self.rc.info; - write!( - f, - "{}:{}({} -> {})", - self.value, - self.rc, - ri.display_regunit(self.from), - ri.display_regunit(self.to) - ) - } -} - -/// A move operation between two registers or between a register and an emergency spill slot. -#[derive(Clone, PartialEq)] -pub enum Move { - Reg { - value: Value, - rc: RegClass, - from: RegUnit, - to: RegUnit, - }, - #[allow(dead_code)] // rustc doesn't see it isn't dead. - Spill { - value: Value, - rc: RegClass, - from: RegUnit, - to_slot: usize, - }, - Fill { - value: Value, - rc: RegClass, - from_slot: usize, - to: RegUnit, - }, -} - -impl Move { - /// Create a register move from an assignment, but not for identity assignments. - fn with_assignment(a: &Assignment) -> Option { - if a.from != a.to { - Some(Self::Reg { - value: a.value, - from: a.from, - to: a.to, - rc: a.rc, - }) - } else { - None - } - } - - /// Get the "from" register and register class, if possible. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::wrong_self_convention))] - fn from_reg(&self) -> Option<(RegClass, RegUnit)> { - match *self { - Self::Reg { rc, from, .. } | Self::Spill { rc, from, .. } => Some((rc, from)), - Self::Fill { .. } => None, - } - } - - /// Get the "to" register and register class, if possible. - fn to_reg(&self) -> Option<(RegClass, RegUnit)> { - match *self { - Self::Reg { rc, to, .. } | Self::Fill { rc, to, .. } => Some((rc, to)), - Self::Spill { .. } => None, - } - } - - /// Replace the "to" register with `new` and return the old value. - fn replace_to_reg(&mut self, new: RegUnit) -> RegUnit { - mem::replace( - match *self { - Self::Reg { ref mut to, .. } | Self::Fill { ref mut to, .. } => to, - Self::Spill { .. } => panic!("No to register in a spill {}", self), - }, - new, - ) - } - - /// Convert this `Reg` move to a spill to `slot` and return the old "to" register. - fn change_to_spill(&mut self, slot: usize) -> RegUnit { - match self.clone() { - Self::Reg { - value, - rc, - from, - to, - } => { - *self = Self::Spill { - value, - rc, - from, - to_slot: slot, - }; - to - } - _ => panic!("Expected reg move: {}", self), - } - } - - /// Get the value being moved. - fn value(&self) -> Value { - match *self { - Self::Reg { value, .. } | Self::Fill { value, .. } | Self::Spill { value, .. } => value, - } - } - - /// Get the associated register class. - fn rc(&self) -> RegClass { - match *self { - Self::Reg { rc, .. } | Self::Fill { rc, .. } | Self::Spill { rc, .. } => rc, - } - } -} - -impl fmt::Display for Move { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Self::Reg { - value, - from, - to, - rc, - } => write!( - f, - "{}:{}({} -> {})", - value, - rc, - rc.info.display_regunit(from), - rc.info.display_regunit(to) - ), - Self::Spill { - value, - from, - to_slot, - rc, - } => write!( - f, - "{}:{}({} -> slot {})", - value, - rc, - rc.info.display_regunit(from), - to_slot - ), - Self::Fill { - value, - from_slot, - to, - rc, - } => write!( - f, - "{}:{}(slot {} -> {})", - value, - rc, - from_slot, - rc.info.display_regunit(to) - ), - } - } -} - -impl fmt::Debug for Move { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let as_display: &dyn fmt::Display = self; - as_display.fmt(f) - } -} - -/// Constraint solver for register allocation around a single instruction. -/// -/// Start by programming in the instruction constraints. -/// -/// 1. Initialize the solver by calling `reset()` with the set of allocatable registers before the -/// instruction. -/// 2. Program the input side constraints: Call `reassign_in()` for all fixed register constraints, -/// and `add_var()` for any input operands whose constraints are not already satisfied. -/// 3. Check for conflicts between fixed input assignments and existing live values by calling -/// `has_fixed_input_conflicts()`. Resolve any conflicts by calling `add_var()` with the -/// conflicting values. -/// 4. Prepare for adding output side constraints by calling `inputs_done()`. -/// 5. Add any killed register values that no longer cause interference on the output side by -/// calling `add_kill()`. -/// 6. Program the output side constraints: Call `add_fixed_output()` for all fixed register -/// constraints and `add_def()` for free defines. Resolve fixed output conflicts by calling -/// `add_through_var()`. -/// -pub struct Solver { - /// Register reassignments that are required or decided as part of a full solution. - /// This includes identity assignments for values that are already in the correct fixed - /// register. - assignments: SparseMap, - - /// Variables are the values that should be reassigned as part of a solution. - /// Values with fixed register constraints are not considered variables. They are represented - /// in the `assignments` vector if necessary. - vars: Vec, - - /// Are we finished adding input-side constraints? This changes the meaning of the `regs_in` - /// and `regs_out` register sets. - inputs_done: bool, - - /// Available registers on the input side of the instruction. - /// - /// While we're adding input constraints (`!inputs_done`): - /// - /// - Live values on the input side are marked as unavailable. - /// - The 'from' registers of fixed input reassignments are marked as available as they are - /// added. - /// - Input-side variables are marked as available. - /// - /// After finishing input constraints (`inputs_done`): - /// - /// - Live values on the input side are marked as unavailable. - /// - The 'to' registers of fixed input reassignments are marked as unavailable. - /// - Input-side variables are marked as available. - /// - regs_in: RegisterSet, - - /// Available registers on the output side of the instruction / fixed input scratch space. - /// - /// While we're adding input constraints (`!inputs_done`): - /// - /// - The 'to' registers of fixed input reassignments are marked as unavailable. - /// - /// After finishing input constraints (`inputs_done`): - /// - /// - Live-through values are marked as unavailable. - /// - Fixed output assignments are marked as unavailable. - /// - Live-through variables are marked as available. - /// - regs_out: RegisterSet, - - /// List of register moves scheduled to avoid conflicts. - /// - /// This is used as working space by the `schedule_moves()` function. - moves: Vec, - - /// List of pending fill moves. This is only used during `schedule_moves()`. - fills: Vec, -} - -/// Interface for programming the constraints into the solver. -impl Solver { - /// Create a new empty solver. - pub fn new() -> Self { - Self { - assignments: SparseMap::new(), - vars: Vec::new(), - inputs_done: false, - regs_in: RegisterSet::new(), - regs_out: RegisterSet::new(), - moves: Vec::new(), - fills: Vec::new(), - } - } - - /// Clear all data structures in this coloring pass. - pub fn clear(&mut self) { - self.assignments.clear(); - self.vars.clear(); - self.inputs_done = false; - self.regs_in = RegisterSet::new(); - self.regs_out = RegisterSet::new(); - self.moves.clear(); - self.fills.clear(); - } - - /// Reset the solver state and prepare solving for a new instruction with an initial set of - /// allocatable registers. - /// - /// The `regs` set is the allocatable registers before any reassignments are applied. - pub fn reset(&mut self, regs: &RegisterSet) { - self.assignments.clear(); - self.vars.clear(); - self.inputs_done = false; - self.regs_in = regs.clone(); - // Used for tracking fixed input assignments while `!inputs_done`: - self.regs_out = RegisterSet::new(); - self.moves.clear(); - self.fills.clear(); - } - - /// Add a fixed input reassignment of `value`. - /// - /// This means that `value` must be assigned to `to` and can't become a variable. Call with - /// `from == to` to ensure that `value` is not reassigned from its existing register location. - /// - /// In either case, `to` will not be available for variables on the input side of the - /// instruction. - pub fn reassign_in(&mut self, value: Value, rc: RegClass, from: RegUnit, to: RegUnit) { - log::trace!( - "reassign_in({}:{}, {} -> {})", - value, - rc, - rc.info.display_regunit(from), - rc.info.display_regunit(to) - ); - debug_assert!(!self.inputs_done); - if self.regs_in.is_avail(rc, from) { - // It looks like `value` was already removed from the register set. It must have been - // added as a variable previously. A fixed constraint beats a variable, so convert it. - if let Some(idx) = self.vars.iter().position(|v| v.value == value) { - let v = self.vars.remove(idx); - log::trace!("-> converting variable {} to a fixed constraint", v); - // The spiller is responsible for ensuring that all constraints on the uses of a - // value are compatible. - debug_assert!( - v.constraint.contains(to), - "Incompatible constraints for {}", - value - ); - } else { - panic!("Invalid from register for fixed {} constraint", value); - } - } - self.regs_in.free(rc, from); - self.regs_out.take(rc, to); - self.assignments.insert(Assignment { - value, - rc, - from, - to, - }); - } - - /// Add a variable representing an input side value with an existing register assignment. - /// - /// A variable is a value that should be reassigned to something in the `constraint` register - /// class. - /// - /// It is assumed initially that the value is also live on the output side of the instruction. - /// This can be changed by calling to `add_kill()`. - /// - /// This function can only be used before calling `inputs_done()`. Afterwards, more input-side - /// variables can be added by calling `add_killed_var()` and `add_through_var()` - pub fn add_var(&mut self, value: Value, constraint: RegClass, from: RegUnit) { - log::trace!( - "add_var({}:{}, from={})", - value, - constraint, - constraint.info.display_regunit(from) - ); - debug_assert!(!self.inputs_done); - self.add_live_var(value, constraint, from, true); - } - - /// Add an extra input-side variable representing a value that is killed by the current - /// instruction. - /// - /// This function should be called after `inputs_done()` only. Use `add_var()` before. - pub fn add_killed_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { - log::trace!( - "add_killed_var({}:{}, from={})", - value, - rc, - rc.info.display_regunit(from) - ); - debug_assert!(self.inputs_done); - self.add_live_var(value, rc, from, false); - } - - /// Add an extra input-side variable representing a value that is live through the current - /// instruction. - /// - /// This function should be called after `inputs_done()` only. Use `add_var()` before. - pub fn add_through_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { - log::trace!( - "add_through_var({}:{}, from={})", - value, - rc, - rc.info.display_regunit(from) - ); - debug_assert!(self.inputs_done); - self.add_live_var(value, rc, from, true); - } - - /// Shared code for `add_var`, `add_killed_var`, and `add_through_var`. - /// - /// Add a variable that is live before the instruction, and possibly live through. Merge - /// constraints if the value has already been added as a variable or fixed assignment. - fn add_live_var(&mut self, value: Value, rc: RegClass, from: RegUnit, live_through: bool) { - // Check for existing entries for this value. - if !self.can_add_var(rc, from) { - // There could be an existing variable entry. - if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { - // We have an existing variable entry for `value`. Combine the constraints. - if let Some(rc) = v.constraint.intersect(rc) { - log::trace!("-> combining constraint with {} yields {}", v, rc); - v.constraint = rc; - return; - } else { - // The spiller should have made sure the same value is not used with disjoint - // constraints. - panic!("Incompatible constraints: {} + {}", rc, v) - } - } - - // No variable, then it must be a fixed reassignment. - if let Some(a) = self.assignments.get(value) { - log::trace!("-> already fixed assignment {}", a); - debug_assert!(rc.contains(a.to), "Incompatible constraints for {}", value); - return; - } - - log::trace!("{}", self); - panic!("Wrong from register for {}", value); - } - - let new_var = Variable::new_live(value, rc, from, live_through); - log::trace!("-> new var: {}", new_var); - - self.regs_in.free(rc, from); - if self.inputs_done && live_through { - self.regs_out.free(rc, from); - } - self.vars.push(new_var); - } - - /// Check for conflicts between fixed input assignments and existing live values. - /// - /// Returns true if one of the live values conflicts with a fixed input assignment. Such a - /// conflicting value must be turned into a variable. - pub fn has_fixed_input_conflicts(&self) -> bool { - debug_assert!(!self.inputs_done); - // The `from` side of the fixed input diversions are taken from `regs_out`. - self.regs_out.interferes_with(&self.regs_in) - } - - /// Check if `rc, reg` specifically conflicts with the fixed input assignments. - pub fn is_fixed_input_conflict(&self, rc: RegClass, reg: RegUnit) -> bool { - debug_assert!(!self.inputs_done); - !self.regs_out.is_avail(rc, reg) - } - - /// Finish adding input side constraints. - /// - /// Call this method to indicate that there will be no more fixed input reassignments added - /// and prepare for the output side constraints. - pub fn inputs_done(&mut self) { - debug_assert!(!self.has_fixed_input_conflicts()); - - // At this point, `regs_out` contains the `to` side of the input reassignments, and the - // `from` side has already been marked as available in `regs_in`. - // - // Remove the `to` assignments from `regs_in` so it now indicates the registers available - // to variables at the input side. - self.regs_in.intersect(&self.regs_out); - - // The meaning of `regs_out` now changes completely to indicate the registers available to - // variables on the output side. - // The initial mask will be modified by `add_kill()` and `add_fixed_output()`. - self.regs_out = self.regs_in.clone(); - - // Now we can't add more fixed input assignments, but `add_var()` is still allowed. - self.inputs_done = true; - } - - /// Record that an input register value is killed by the instruction. - /// - /// Even if a fixed reassignment has been added for the value, the `reg` argument should be the - /// original location before the reassignments. - /// - /// This means that the register is available on the output side. - pub fn add_kill(&mut self, value: Value, rc: RegClass, reg: RegUnit) { - debug_assert!(self.inputs_done); - - // If a fixed assignment is killed, the `to` register becomes available on the output side. - if let Some(a) = self.assignments.get(value) { - debug_assert_eq!(a.from, reg); - self.regs_out.free(a.rc, a.to); - return; - } - - // It's also possible that a variable is killed. That means it doesn't need to satisfy - // interference constraints on the output side. - // Variables representing tied operands will get their `is_output` flag set again later. - if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { - debug_assert!(v.is_input); - v.is_output = false; - return; - } - - // Alright, this is just a boring value being killed by the instruction. Just reclaim - // the assigned register. - self.regs_out.free(rc, reg); - } - - /// Record that an input register is tied to an output register. - /// - /// It is assumed that `add_kill` was called previously with the same arguments. - /// - /// The output value that must have the same register as the input value is not recorded in the - /// solver. - /// - /// If the value has already been assigned to a fixed register, return that. - pub fn add_tied_input( - &mut self, - value: Value, - rc: RegClass, - reg: RegUnit, - is_global: bool, - ) -> Option { - debug_assert!(self.inputs_done); - - // If a fixed assignment is tied, the `to` register is not available on the output side. - if let Some(a) = self.assignments.get(value) { - debug_assert_eq!(a.from, reg); - self.regs_out.take(a.rc, a.to); - return Some(a.to); - } - - // Check if a variable was created. - if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { - debug_assert!(v.is_input); - v.is_output = true; - v.is_global = is_global; - return None; - } - - // No variable exists for `value` because its constraints are already satisfied. - // However, if the tied output value has a global live range, we must create a variable to - // avoid global interference too. - if is_global { - let mut new_var = Variable::new_live(value, rc, reg, true); - new_var.is_global = true; - log::trace!("add_tied_input: new tied-global value: {}", new_var); - self.vars.push(new_var); - self.regs_in.free(rc, reg); - } else { - self.regs_out.take(rc, reg); - } - - None - } - - /// Add a fixed output assignment. - /// - /// This means that `to` will not be available for variables on the output side of the - /// instruction. - /// - /// Returns `false` if a live value conflicts with `to`, so it couldn't be added. Find the - /// conflicting live-through value and turn it into a variable before calling this method - /// again. - #[allow(dead_code)] - pub fn add_fixed_output(&mut self, rc: RegClass, reg: RegUnit) -> bool { - debug_assert!(self.inputs_done); - if self.regs_out.is_avail(rc, reg) { - self.regs_out.take(rc, reg); - true - } else { - false - } - } - - /// Add a defined output value. - /// - /// This is similar to `add_var`, except the value doesn't have a prior register assignment. - pub fn add_def(&mut self, value: Value, constraint: RegClass, is_global: bool) { - debug_assert!(self.inputs_done); - self.vars - .push(Variable::new_def(value, constraint, is_global)); - } - - /// Clear the `is_global` flag on all solver variables. - /// - /// This is used when there are not enough global registers available, and global defines have - /// to be replaced with local defines followed by a copy. - pub fn clear_all_global_flags(&mut self) { - for v in &mut self.vars { - v.is_global = false; - } - } -} - -/// Error reported when the solver fails to find a solution with the current constraints. -/// -/// When no solution can be found, the error indicates how constraints could be loosened to help. -pub enum SolverError { - /// There are not available registers in the given register class. - /// - /// This should be resolved by turning live-through values into variables so they can be moved - /// out of the way. - Divert(RegClass), - - /// There are insufficient available registers in the global set to assign an `is_global` - /// variable with the given value. - /// - /// This should be resolved by converting the variable to a local one. - Global(Value), -} - -/// Interface for searching for a solution. -impl Solver { - /// Try a quick-and-dirty solution. - /// - /// This is expected to succeed for most instructions since the constraint problem is almost - /// always trivial. - /// - /// Returns `Ok(regs)` if a solution was found. - pub fn quick_solve( - &mut self, - global_regs: &RegisterSet, - is_reload: bool, - ) -> Result { - self.find_solution(global_regs, is_reload) - } - - /// Try harder to find a solution. - /// - /// Call this method after `quick_solve()` fails. - /// - /// This may return an error with a register class that has run out of registers. If registers - /// can be freed up in the starving class, this method can be called again after adding - /// variables for the freed registers. - pub fn real_solve( - &mut self, - global_regs: &RegisterSet, - is_reload: bool, - ) -> Result { - // Compute domain sizes for all the variables given the current register sets. - for v in &mut self.vars { - let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len(); - v.domain = cmp::min(d, u16::MAX as usize) as u16; - } - - // Solve for vars with small domains first to increase the chance of finding a solution. - // - // Also consider this case: - // - // v0: out, global - // v1: in - // v2: in+out - // - // If only %r0 and %r1 are available, the global constraint may cause us to assign: - // - // v0 -> %r1 - // v1 -> %r0 - // v2 -> ! - // - // Usually in+out variables will have a smaller domain, but in the above case the domain - // size is the same, so we also prioritize in+out variables. - // - // Include the reversed previous solution for this variable partly as a stable tie breaker, - // partly to shake things up on a second attempt. - // - // Use the `from` register and value number as a tie breaker to get a stable sort. - self.vars.sort_unstable_by_key(|v| { - ( - v.domain, - !(v.is_input && v.is_output), - !v.solution, - v.from.unwrap_or(0), - v.value, - ) - }); - - log::trace!("real_solve for {}", self); - self.find_solution(global_regs, is_reload) - } - - /// Search for a solution with the current list of variables. - /// - /// If a solution was found, returns `Ok(regs)` with the set of available registers on the - /// output side after the solution. If no solution could be found, returns `Err(rc)` with the - /// constraint register class that needs more available registers. - fn find_solution( - &mut self, - global_regs: &RegisterSet, - is_reload: bool, - ) -> Result { - // Available registers on the input and output sides respectively. - let mut iregs = self.regs_in.clone(); - let mut oregs = self.regs_out.clone(); - let mut gregs = global_regs.clone(); - - for v in &mut self.vars { - let rc = v.constraint; - - // Decide which register to assign. In order to try and keep registers holding - // reloaded values separate from all other registers to the extent possible, we choose - // the first available register in the normal case, but the last available one in the - // case of a reload. See "A side note on register choice heuristics" in - // src/redundant_reload_remover.rs for further details. - let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs); - let maybe_reg = if is_reload { - reg_set_iter.rnext() - } else { - reg_set_iter.next() - }; - - let reg = match maybe_reg { - Some(reg) => reg, - None => { - // If `v` must avoid global interference, there is not point in requesting - // live registers be diverted. We need to make it a non-global value. - if v.is_global && gregs.iter(rc).next().is_none() { - return Err(SolverError::Global(v.value)); - } - return Err(SolverError::Divert(rc)); - } - }; - - v.solution = reg; - if v.is_input { - iregs.take(rc, reg); - } - if v.is_output { - oregs.take(rc, reg); - } - if v.is_global { - gregs.take(rc, reg); - } - } - - Ok(oregs) - } - - /// Get all the variables. - pub fn vars(&self) -> &[Variable] { - &self.vars - } - - /// Check if `value` can be added as a variable to help find a solution. - pub fn can_add_var(&mut self, constraint: RegClass, from: RegUnit) -> bool { - !self.regs_in.is_avail(constraint, from) - && !self.vars.iter().any(|var| var.from == Some(from)) - } -} - -/// Interface for working with parallel copies once a solution has been found. -impl Solver { - /// Collect all the register moves we need to execute. - fn collect_moves(&mut self) { - self.moves.clear(); - - // Collect moves from the chosen solution for all non-define variables. - for v in &self.vars { - if let Some(from) = v.from { - // Omit variable solutions that don't require the value to be moved. - if from != v.solution { - self.moves.push(Move::Reg { - value: v.value, - from, - to: v.solution, - rc: v.constraint, - }); - } - } - } - - // Convert all of the fixed register assignments into moves, but omit the ones that are - // already in the right register. - self.moves - .extend(self.assignments.values().filter_map(Move::with_assignment)); - - if !self.moves.is_empty() { - log::trace!("collect_moves: {}", DisplayList(&self.moves)); - } - } - - /// Try to schedule a sequence of `regmove` instructions that will shuffle registers into - /// place. - /// - /// This may require the use of additional available registers, and it can fail if no - /// additional registers are available. - /// - /// TODO: Handle failure by generating a sequence of register swaps, or by temporarily spilling - /// a register. - /// - /// Returns the number of spills that had to be emitted. - pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize { - self.collect_moves(); - debug_assert!(self.fills.is_empty()); - - let mut num_spill_slots = 0; - let mut avail = regs.clone(); - let mut i = 0; - while i < self.moves.len() + self.fills.len() { - // Don't even look at the fills until we've spent all the moves. Deferring these lets - // us potentially reuse the claimed registers to resolve multiple cycles. - if i >= self.moves.len() { - self.moves.append(&mut self.fills); - } - - // Find the first move that can be executed now. - if let Some(j) = self.moves[i..].iter().position(|m| match m.to_reg() { - Some((rc, reg)) => avail.is_avail(rc, reg), - None => true, - }) { - // This move can be executed now. - self.moves.swap(i, i + j); - let m = &self.moves[i]; - if let Some((rc, reg)) = m.to_reg() { - avail.take(rc, reg); - } - if let Some((rc, reg)) = m.from_reg() { - avail.free(rc, reg); - } - log::trace!("move #{}: {}", i, m); - i += 1; - continue; - } - - // When we get here, none of the `moves[i..]` can be executed. This means there are - // only cycles remaining. The cycles can be broken in a few ways: - // - // 1. Grab an available register and use it to break a cycle. - // 2. Move a value temporarily into a stack slot instead of a register. - // 3. Use swap instructions. - // - // TODO: So far we only implement 1 and 2. - - // Pick an assignment with the largest possible width. This is more likely to break up - // a cycle than an assignment with fewer register units. For example, it may be - // necessary to move two arm32 S-registers out of the way before a D-register can move - // into place. - // - // We use `min_by_key` and `!` instead of `max_by_key` because it preserves the - // existing order of moves with the same width. - let j = self.moves[i..] - .iter() - .enumerate() - .min_by_key(|&(_, m)| !m.rc().width) - .unwrap() - .0; - self.moves.swap(i, i + j); - - // Check the top-level register class for an available register. It is an axiom of the - // register allocator that we can move between all registers in the top-level RC. - let m = self.moves[i].clone(); - let toprc = m.rc().toprc(); - if let Some(reg) = avail.iter(toprc).next() { - log::trace!( - "breaking cycle at {} with available {} register {}", - m, - toprc, - toprc.info.display_regunit(reg) - ); - - // Alter the move so it is guaranteed to be picked up when we loop. It is important - // that this move is scheduled immediately, otherwise we would have multiple moves - // of the same value, and they would not be commutable. - let old_to_reg = self.moves[i].replace_to_reg(reg); - // Append a fixup move so we end up in the right place. This move will be scheduled - // later. That's ok because it is the single remaining move of `m.value` after the - // next iteration. - self.moves.push(Move::Reg { - value: m.value(), - rc: toprc, - from: reg, - to: old_to_reg, - }); - // TODO: What if allocating an extra register is not enough to break a cycle? This - // can happen when there are registers of different widths in a cycle. For ARM, we - // may have to move two S-registers out of the way before we can resolve a cycle - // involving a D-register. - continue; - } - - // It was impossible to free up a register in toprc, so use an emergency spill slot as - // a last resort. - let slot = num_spill_slots; - num_spill_slots += 1; - log::trace!("breaking cycle at {} with slot {}", m, slot); - let old_to_reg = self.moves[i].change_to_spill(slot); - self.fills.push(Move::Fill { - value: m.value(), - rc: toprc, - from_slot: slot, - to: old_to_reg, - }); - } - - num_spill_slots - } - - /// Borrow the scheduled set of register moves that was computed by `schedule_moves()`. - pub fn moves(&self) -> &[Move] { - &self.moves - } -} - -impl fmt::Display for Solver { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let reginfo = self.vars.first().map(|v| v.constraint.info); - writeln!(f, "Solver {{ inputs_done: {},", self.inputs_done)?; - writeln!(f, " in: {}", self.regs_in.display(reginfo))?; - writeln!(f, " out: {}", self.regs_out.display(reginfo))?; - writeln!( - f, - " assignments: {}", - DisplayList(self.assignments.as_slice()) - )?; - writeln!(f, " vars: {}", DisplayList(&self.vars))?; - writeln!(f, " moves: {}", DisplayList(&self.moves))?; - writeln!(f, "}}") - } -} - -#[cfg(test)] -#[cfg(feature = "arm32")] -mod tests { - use super::{Move, Solver}; - use crate::entity::EntityRef; - use crate::ir::Value; - use crate::isa::registers::{RegBank, RegClassData}; - use crate::isa::{RegClass, RegInfo, RegUnit}; - use crate::regalloc::RegisterSet; - use core::borrow::Borrow; - - // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info - // about registers, so we directly access `INFO` from registers-arm32.rs. - include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); - - // Get a register class by name. - fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { - reginfo - .classes - .iter() - .find(|rc| rc.name == name) - .expect("Can't find named register class.") - } - - // Construct a register move. - fn mov(value: Value, rc: RegClass, from: RegUnit, to: RegUnit) -> Move { - Move::Reg { - value, - rc, - from, - to, - } - } - - fn spill(value: Value, rc: RegClass, from: RegUnit, to_slot: usize) -> Move { - Move::Spill { - value, - rc, - from, - to_slot, - } - } - - fn fill(value: Value, rc: RegClass, from_slot: usize, to: RegUnit) -> Move { - Move::Fill { - value, - rc, - from_slot, - to, - } - } - - #[test] - fn simple_moves() { - let reginfo = INFO.borrow(); - let gpr = rc_by_name(®info, "GPR"); - let r0 = gpr.unit(0); - let r1 = gpr.unit(1); - let r2 = gpr.unit(2); - let gregs = RegisterSet::new(); - let mut regs = RegisterSet::new(); - let mut solver = Solver::new(); - let v10 = Value::new(10); - let v11 = Value::new(11); - - // As simple as it gets: Value is in r1, we want r0. - regs.take(gpr, r1); - solver.reset(®s); - solver.reassign_in(v10, gpr, r1, r0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]); - - // A bit harder: r0, r1 need to go in r1, r2. - regs.take(gpr, r0); - solver.reset(®s); - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r2); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[mov(v11, gpr, r1, r2), mov(v10, gpr, r0, r1)] - ); - - // Swap r0 and r1 in three moves using r2 as a scratch. - solver.reset(®s); - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[ - mov(v10, gpr, r0, r2), - mov(v11, gpr, r1, r0), - mov(v10, gpr, r2, r1), - ] - ); - } - - #[test] - fn harder_move_cycles() { - let reginfo = INFO.borrow(); - let s = rc_by_name(®info, "S"); - let d = rc_by_name(®info, "D"); - let d0 = d.unit(0); - let d1 = d.unit(1); - let d2 = d.unit(2); - let s0 = s.unit(0); - let s1 = s.unit(1); - let s2 = s.unit(2); - let s3 = s.unit(3); - let gregs = RegisterSet::new(); - let mut regs = RegisterSet::new(); - let mut solver = Solver::new(); - let v10 = Value::new(10); - let v11 = Value::new(11); - let v12 = Value::new(12); - - // Not a simple cycle: Swap d0 <-> (s2, s3) - regs.take(d, d0); - regs.take(d, d1); - solver.reset(®s); - solver.reassign_in(v10, d, d0, d1); - solver.reassign_in(v11, s, s2, s0); - solver.reassign_in(v12, s, s3, s1); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[ - mov(v10, d, d0, d2), - mov(v11, s, s2, s0), - mov(v12, s, s3, s1), - mov(v10, d, d2, d1), - ] - ); - - // Same problem in the other direction: Swap (s0, s1) <-> d1. - // - // If we divert the moves in order, we will need to allocate *two* temporary S registers. A - // trivial algorithm might assume that allocating a single temp is enough. - solver.reset(®s); - solver.reassign_in(v11, s, s0, s2); - solver.reassign_in(v12, s, s1, s3); - solver.reassign_in(v10, d, d1, d0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[ - mov(v10, d, d1, d2), - mov(v12, s, s1, s3), - mov(v11, s, s0, s2), - mov(v10, d, d2, d0), - ] - ); - } - - #[test] - fn emergency_spill() { - let reginfo = INFO.borrow(); - let gpr = rc_by_name(®info, "GPR"); - let r0 = gpr.unit(0); - let r1 = gpr.unit(1); - let r2 = gpr.unit(2); - let r3 = gpr.unit(3); - let r4 = gpr.unit(4); - let r5 = gpr.unit(5); - let gregs = RegisterSet::new(); - let mut regs = RegisterSet::new(); - let mut solver = Solver::new(); - let v10 = Value::new(10); - let v11 = Value::new(11); - let v12 = Value::new(12); - let v13 = Value::new(13); - let v14 = Value::new(14); - let v15 = Value::new(15); - - // Claim r0--r2 and r3--r15 for other values. - for i in 0..16 { - regs.take(gpr, gpr.unit(i)); - } - - // Request a permutation cycle. - solver.reset(®s); - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r2); - solver.reassign_in(v12, gpr, r2, r0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 1); - assert_eq!( - solver.moves(), - &[ - spill(v10, gpr, r0, 0), - mov(v12, gpr, r2, r0), - mov(v11, gpr, r1, r2), - fill(v10, gpr, 0, r1), - ] - ); - - // Two cycles should only require a single spill. - solver.reset(®s); - // Cycle 1. - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r2); - solver.reassign_in(v12, gpr, r2, r0); - // Cycle 2. - solver.reassign_in(v13, gpr, r3, r4); - solver.reassign_in(v14, gpr, r4, r5); - solver.reassign_in(v15, gpr, r5, r3); - - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - // We resolve two cycles with one spill. - assert_eq!(solver.schedule_moves(®s), 1); - assert_eq!( - solver.moves(), - &[ - spill(v10, gpr, r0, 0), - mov(v12, gpr, r2, r0), - mov(v11, gpr, r1, r2), - mov(v13, gpr, r3, r1), // Use available r1 to break cycle 2. - mov(v15, gpr, r5, r3), - mov(v14, gpr, r4, r5), - mov(v13, gpr, r1, r4), - fill(v10, gpr, 0, r1), // Finally complete cycle 1. - ] - ); - } -} diff --git a/cranelift/codegen/src/regalloc/spilling.rs b/cranelift/codegen/src/regalloc/spilling.rs deleted file mode 100644 index e44502f0a6..0000000000 --- a/cranelift/codegen/src/regalloc/spilling.rs +++ /dev/null @@ -1,638 +0,0 @@ -//! Spilling pass. -//! -//! The spilling pass is the first to run after the liveness analysis. Its primary function is to -//! ensure that the register pressure never exceeds the number of available registers by moving -//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's -//! live range. -//! -//! Some instruction operand constraints may require additional registers to resolve. Since this -//! can cause spilling, the spilling pass is also responsible for resolving those constraints by -//! inserting copies. The extra constraints are: -//! -//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by -//! inserting a copy to a temporary value when necessary. -//! 2. When the same value is used more than once by an instruction, the operand constraints must -//! be compatible. Otherwise, the value must be copied into a new register for some of the -//! operands. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc}; -use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit}; -use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::pressure::Pressure; -use crate::regalloc::virtregs::VirtRegs; -use crate::timing; -use crate::topo_order::TopoOrder; -use alloc::vec::Vec; -use core::fmt; - -/// Return a top-level register class which contains `unit`. -fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass { - let bank = reginfo.bank_containing_regunit(unit).unwrap(); - reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)] - .iter() - .find(|&rc| rc.contains(unit)) - .expect("reg unit should be in a toprc") -} - -/// Persistent data structures for the spilling pass. -pub struct Spilling { - spills: Vec, - reg_uses: Vec, -} - -/// Context data structure that gets instantiated once per pass. -struct Context<'a> { - // Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - // Cached ISA information. - reginfo: RegInfo, - encinfo: EncInfo, - - // References to contextual data structures we need. - domtree: &'a DominatorTree, - liveness: &'a mut Liveness, - virtregs: &'a VirtRegs, - topo: &'a mut TopoOrder, - - // Current register pressure. - pressure: Pressure, - - // Values spilled for the current instruction. These values have already been removed from the - // pressure tracker, but they are still present in the live value tracker and their affinity - // hasn't been changed yet. - spills: &'a mut Vec, - - // Uses of register values in the current instruction. - reg_uses: &'a mut Vec, -} - -impl Spilling { - /// Create a new spilling data structure. - pub fn new() -> Self { - Self { - spills: Vec::new(), - reg_uses: Vec::new(), - } - } - - /// Clear all data structures in this spilling pass. - pub fn clear(&mut self) { - self.spills.clear(); - self.reg_uses.clear(); - } - - /// Run the spilling algorithm over `func`. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - domtree: &DominatorTree, - liveness: &mut Liveness, - virtregs: &VirtRegs, - topo: &mut TopoOrder, - tracker: &mut LiveValueTracker, - ) { - let _tt = timing::ra_spilling(); - log::trace!("Spilling for:\n{}", func.display(isa)); - let reginfo = isa.register_info(); - let usable_regs = isa.allocatable_registers(func); - let mut ctx = Context { - cur: EncCursor::new(func, isa), - reginfo: isa.register_info(), - encinfo: isa.encoding_info(), - domtree, - liveness, - virtregs, - topo, - pressure: Pressure::new(®info, &usable_regs), - spills: &mut self.spills, - reg_uses: &mut self.reg_uses, - }; - ctx.run(tracker) - } -} - -impl<'a> Context<'a> { - fn run(&mut self, tracker: &mut LiveValueTracker) { - self.topo.reset(self.cur.func.layout.blocks()); - while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { - self.visit_block(block, tracker); - } - } - - fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Spilling {}:", block); - self.cur.goto_top(block); - self.visit_block_header(block, tracker); - tracker.drop_dead_params(); - self.process_spills(tracker); - - while let Some(inst) = self.cur.next_inst() { - if !self.cur.func.dfg[inst].opcode().is_ghost() { - self.visit_inst(inst, block, tracker); - } else { - let (_throughs, kills) = tracker.process_ghost(inst); - self.free_regs(kills); - } - tracker.drop_dead(inst); - self.process_spills(tracker); - } - } - - // Take all live registers in `regs` from the pressure set. - // This doesn't cause any spilling, it is assumed there are enough registers. - fn take_live_regs(&mut self, regs: &[LiveValue]) { - for lv in regs { - if !lv.is_dead { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - self.pressure.take(rc); - } - } - } - } - - // Free all registers in `kills` from the pressure set. - fn free_regs(&mut self, kills: &[LiveValue]) { - for lv in kills { - if let Affinity::Reg(rci) = lv.affinity { - if !self.spills.contains(&lv.value) { - let rc = self.reginfo.rc(rci); - self.pressure.free(rc); - } - } - } - } - - // Free all dead registers in `regs` from the pressure set. - fn free_dead_regs(&mut self, regs: &[LiveValue]) { - for lv in regs { - if lv.is_dead { - if let Affinity::Reg(rci) = lv.affinity { - if !self.spills.contains(&lv.value) { - let rc = self.reginfo.rc(rci); - self.pressure.free(rc); - } - } - } - } - } - - fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { - let (liveins, params) = tracker.block_top( - block, - &self.cur.func.dfg, - self.liveness, - &self.cur.func.layout, - self.domtree, - ); - - // Count the live-in registers. These should already fit in registers; they did at the - // dominator. - self.pressure.reset(); - self.take_live_regs(liveins); - - // A block can have an arbitrary (up to 2^16...) number of parameters, so they are not - // guaranteed to fit in registers. - for lv in params { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - 'try_take: while let Err(mask) = self.pressure.take_transient(rc) { - log::trace!("Need {} reg for block param {}", rc, lv.value); - match self.spill_candidate(mask, liveins) { - Some(cand) => { - log::trace!( - "Spilling live-in {} to make room for {} block param {}", - cand, - rc, - lv.value - ); - self.spill_reg(cand); - } - None => { - // We can't spill any of the live-in registers, so we have to spill an - // block argument. Since the current spill metric would consider all the - // block arguments equal, just spill the present register. - log::trace!("Spilling {} block argument {}", rc, lv.value); - - // Since `spill_reg` will free a register, add the current one here. - self.pressure.take(rc); - self.spill_reg(lv.value); - break 'try_take; - } - } - } - } - } - - // The transient pressure counts for the block arguments are accurate. Just preserve them. - self.pressure.preserve_transient(); - self.free_dead_regs(params); - } - - fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Inst {}, {}", self.cur.display_inst(inst), self.pressure); - debug_assert_eq!(self.cur.current_inst(), Some(inst)); - debug_assert_eq!(self.cur.current_block(), Some(block)); - - let constraints = self - .encinfo - .operand_constraints(self.cur.func.encodings[inst]); - - // We may need to resolve register constraints if there are any noteworthy uses. - debug_assert!(self.reg_uses.is_empty()); - self.collect_reg_uses(inst, block, constraints); - - // Calls usually have fixed register uses. - let call_sig = self.cur.func.dfg.call_signature(inst); - if let Some(sig) = call_sig { - self.collect_abi_reg_uses(inst, sig); - } - - if !self.reg_uses.is_empty() { - self.process_reg_uses(inst, tracker); - } - - // Update the live value tracker with this instruction. - let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - - // Remove kills from the pressure tracker. - self.free_regs(kills); - - // If inst is a call, spill all register values that are live across the call. - // This means that we don't currently take advantage of callee-saved registers. - // TODO: Be more sophisticated. - let opcode = self.cur.func.dfg[inst].opcode(); - if call_sig.is_some() || opcode.clobbers_all_regs() { - for lv in throughs { - if lv.affinity.is_reg() && !self.spills.contains(&lv.value) { - self.spill_reg(lv.value); - } - } - } - - // Make sure we have enough registers for the register defs. - // Dead defs are included here. They need a register too. - // No need to process call return values, they are in fixed registers. - if let Some(constraints) = constraints { - for op in constraints.outs { - if op.kind != ConstraintKind::Stack { - // Add register def to pressure, spill if needed. - while let Err(mask) = self.pressure.take_transient(op.regclass) { - log::trace!("Need {} reg from {} throughs", op.regclass, throughs.len()); - match self.spill_candidate(mask, throughs) { - Some(cand) => self.spill_reg(cand), - None => panic!( - "Ran out of {} registers for {}", - op.regclass, - self.cur.display_inst(inst) - ), - } - } - } - } - self.pressure.reset_transient(); - } - - // Restore pressure state, compute pressure with affinities from `defs`. - // Exclude dead defs. Includes call return values. - // This won't cause spilling. - self.take_live_regs(defs); - } - - // Collect register uses that are noteworthy in one of the following ways: - // - // 1. It's a fixed register constraint. - // 2. It's a use of a spilled value. - // 3. It's a tied register constraint and the value isn't killed. - // - // We are assuming here that if a value is used both by a fixed register operand and a register - // class operand, they two are compatible. We are also assuming that two register class - // operands are always compatible. - fn collect_reg_uses( - &mut self, - inst: Inst, - block: Block, - constraints: Option<&RecipeConstraints>, - ) { - let args = self.cur.func.dfg.inst_args(inst); - let num_fixed_ins = if let Some(constraints) = constraints { - for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { - let mut reguse = RegUse::new(arg, idx, op.regclass.into()); - let lr = &self.liveness[arg]; - match op.kind { - ConstraintKind::Stack => continue, - ConstraintKind::FixedReg(_) => reguse.fixed = true, - ConstraintKind::Tied(_) => { - // A tied operand must kill the used value. - reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); - } - ConstraintKind::FixedTied(_) => { - reguse.fixed = true; - reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); - } - ConstraintKind::Reg => {} - } - if lr.affinity.is_stack() { - reguse.spilled = true; - } - - // Only collect the interesting register uses. - if reguse.fixed || reguse.tied || reguse.spilled { - log::trace!(" reguse: {}", reguse); - self.reg_uses.push(reguse); - } - } - constraints.ins.len() - } else { - // A non-ghost instruction with no constraints can't have any - // fixed operands. - 0 - }; - - // Similarly, for return instructions, collect uses of ABI-defined - // return values. - if self.cur.func.dfg[inst].opcode().is_return() { - debug_assert_eq!( - self.cur.func.dfg.inst_variable_args(inst).len(), - self.cur.func.signature.returns.len(), - "The non-fixed arguments in a return should follow the function's signature." - ); - for (ret_idx, (ret, &arg)) in - self.cur.func.signature.returns.iter().zip(args).enumerate() - { - let idx = num_fixed_ins + ret_idx; - let unit = match ret.location { - ArgumentLoc::Unassigned => { - panic!("function return signature should be legalized") - } - ArgumentLoc::Reg(unit) => unit, - ArgumentLoc::Stack(_) => continue, - }; - let toprc = toprc_containing_regunit(unit, &self.reginfo); - let mut reguse = RegUse::new(arg, idx, toprc.into()); - reguse.fixed = true; - - log::trace!(" reguse: {}", reguse); - self.reg_uses.push(reguse); - } - } - } - - // Collect register uses from the ABI input constraints. - fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) { - let num_fixed_args = self.cur.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let args = self.cur.func.dfg.inst_variable_args(inst); - for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig] - .params - .iter() - .zip(args) - .enumerate() - { - if abi.location.is_reg() { - let (rci, spilled) = match self.liveness[arg].affinity { - Affinity::Reg(rci) => (rci, false), - Affinity::Stack => ( - self.cur.isa.regclass_for_abi_type(abi.value_type).into(), - true, - ), - Affinity::Unassigned => panic!("Missing affinity for {}", arg), - }; - let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci); - reguse.fixed = true; - reguse.spilled = spilled; - self.reg_uses.push(reguse); - } - } - } - - // Process multiple register uses to resolve potential conflicts. - // - // Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary. - // Trigger spilling if any of the temporaries cause the register pressure to become too high. - // - // Leave `self.reg_uses` empty. - fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) { - // We're looking for multiple uses of the same value, so start by sorting by value. The - // secondary `opidx` key makes it possible to use an unstable (non-allocating) sort. - self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx)); - - self.cur.use_srcloc(inst); - for i in 0..self.reg_uses.len() { - let ru = self.reg_uses[i]; - - // Do we need to insert a copy for this use? - let need_copy = if ru.tied { - true - } else if ru.fixed { - // This is a fixed register use which doesn't necessarily require a copy. - // Make a copy only if this is not the first use of the value. - self.reg_uses - .get(i.wrapping_sub(1)) - .map_or(false, |ru2| ru2.value == ru.value) - } else { - false - }; - - if need_copy { - let copy = self.insert_copy(ru.value, ru.rci); - self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy; - } - - // Even if we don't insert a copy, we may need to account for register pressure for the - // reload pass. - if need_copy || ru.spilled { - let rc = self.reginfo.rc(ru.rci); - while let Err(mask) = self.pressure.take_transient(rc) { - log::trace!("Copy of {} reg causes spill", rc); - // Spill a live register that is *not* used by the current instruction. - // Spilling a use wouldn't help. - // - // Do allow spilling of block arguments on branches. This is safe since we spill - // the whole virtual register which includes the matching block parameter value - // at the branch destination. It is also necessary since there can be - // arbitrarily many block arguments. - match { - let args = if self.cur.func.dfg[inst].opcode().is_branch() { - self.cur.func.dfg.inst_fixed_args(inst) - } else { - self.cur.func.dfg.inst_args(inst) - }; - self.spill_candidate( - mask, - tracker.live().iter().filter(|lv| !args.contains(&lv.value)), - ) - } { - Some(cand) => self.spill_reg(cand), - None => panic!( - "Ran out of {} registers when inserting copy before {}", - rc, - self.cur.display_inst(inst) - ), - } - } - } - } - self.pressure.reset_transient(); - self.reg_uses.clear() - } - - // Find a spill candidate from `candidates` whose top-level register class is in `mask`. - fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option - where - II: IntoIterator, - { - // Find the best viable spill candidate. - // - // The very simple strategy implemented here is to spill the value with the earliest def in - // the reverse post-order. This strategy depends on a good reload pass to generate good - // code. - // - // We know that all candidate defs dominate the current instruction, so one of them will - // dominate the others. That is the earliest def. - candidates - .into_iter() - .filter_map(|lv| { - // Viable candidates are registers in one of the `mask` classes, and not already in - // the spill set. - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) { - // Here, `lv` is a viable spill candidate. - return Some(lv.value); - } - } - None - }) - .min_by(|&a, &b| { - // Find the minimum candidate according to the RPO of their defs. - self.domtree.rpo_cmp( - self.cur.func.dfg.value_def(a), - self.cur.func.dfg.value_def(b), - &self.cur.func.layout, - ) - }) - } - - /// Spill `value` immediately by - /// - /// 1. Changing its affinity to `Stack` which marks the spill. - /// 2. Removing the value from the pressure tracker. - /// 3. Adding the value to `self.spills` for later reference by `process_spills`. - /// - /// Note that this does not update the cached affinity in the live value tracker. Call - /// `process_spills` to do that. - fn spill_reg(&mut self, value: Value) { - if let Affinity::Reg(rci) = self.liveness.spill(value) { - let rc = self.reginfo.rc(rci); - self.pressure.free(rc); - self.spills.push(value); - log::trace!("Spilled {}:{} -> {}", value, rc, self.pressure); - } else { - panic!("Cannot spill {} that was already on the stack", value); - } - - // Assign a spill slot for the whole virtual register. - let ss = self - .cur - .func - .stack_slots - .make_spill_slot(self.cur.func.dfg.value_type(value)); - for &v in self.virtregs.congruence_class(&value) { - self.liveness.spill(v); - self.cur.func.locations[v] = ValueLoc::Stack(ss); - } - } - - /// Process any pending spills in the `self.spills` vector. - /// - /// It is assumed that spills are removed from the pressure tracker immediately, see - /// `spill_reg` above. - /// - /// We also need to update the live range affinity and remove spilled values from the live - /// value tracker. - fn process_spills(&mut self, tracker: &mut LiveValueTracker) { - if !self.spills.is_empty() { - tracker.process_spills(|v| self.spills.contains(&v)); - self.spills.clear() - } - } - - /// Insert a `copy value` before the current instruction and give it a live range extending to - /// the current instruction. - /// - /// Returns the new local value created. - fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value { - let copy = self.cur.ins().copy(value); - let inst = self.cur.built_inst(); - - // Update live ranges. - self.liveness.create_dead(copy, inst, Affinity::Reg(rci)); - self.liveness.extend_locally( - copy, - self.cur.func.layout.pp_block(inst), - self.cur.current_inst().expect("must be at an instruction"), - &self.cur.func.layout, - ); - - copy - } -} - -/// Struct representing a register use of a value. -/// Used to detect multiple uses of the same value with incompatible register constraints. -#[derive(Clone, Copy)] -struct RegUse { - value: Value, - opidx: u16, - - // Register class required by the use. - rci: RegClassIndex, - - // A use with a fixed register constraint. - fixed: bool, - - // A register use of a spilled value. - spilled: bool, - - // A use with a tied register constraint *and* the used value is not killed. - tied: bool, -} - -impl RegUse { - fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self { - Self { - value, - opidx: idx as u16, - rci, - fixed: false, - spilled: false, - tied: false, - } - } -} - -impl fmt::Display for RegUse { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}@op{}", self.value, self.opidx)?; - if self.fixed { - write!(f, "/fixed")?; - } - if self.spilled { - write!(f, "/spilled")?; - } - if self.tied { - write!(f, "/tied")?; - } - Ok(()) - } -} diff --git a/cranelift/codegen/src/regalloc/virtregs.rs b/cranelift/codegen/src/regalloc/virtregs.rs deleted file mode 100644 index ee2ec9bcd9..0000000000 --- a/cranelift/codegen/src/regalloc/virtregs.rs +++ /dev/null @@ -1,505 +0,0 @@ -//! Virtual registers. -//! -//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the -//! values in a virtual register are assigned to the same location, fewer copies will result in the -//! output. -//! -//! A virtual register is typically built by merging together SSA values that are "phi-related" - -//! that is, one value is passed as a block argument to a branch and the other is the block parameter -//! value itself. -//! -//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids -//! memory-to-memory copies when a spilled value is passed as a block argument. - -use crate::dbg::DisplayList; -use crate::dominator_tree::DominatorTreePreorder; -use crate::entity::entity_impl; -use crate::entity::{EntityList, ListPool}; -use crate::entity::{Keys, PrimaryMap, SecondaryMap}; -use crate::ir::{Function, Value}; -use crate::packed_option::PackedOption; -use alloc::vec::Vec; -use core::cmp::Ordering; -use core::fmt; -use core::slice; -use smallvec::SmallVec; - -/// A virtual register reference. -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct VirtReg(u32); -entity_impl!(VirtReg, "vreg"); - -type ValueList = EntityList; - -/// Collection of virtual registers. -/// -/// Each virtual register is a list of values. Also maintain a map from values to their unique -/// virtual register, if any. -pub struct VirtRegs { - /// Memory pool for the value lists. - pool: ListPool, - - /// The primary table of virtual registers. - vregs: PrimaryMap, - - /// Allocated virtual register numbers that are no longer in use. - unused_vregs: Vec, - - /// Each value belongs to at most one virtual register. - value_vregs: SecondaryMap>, - - /// Table used during the union-find phase while `vregs` is empty. - union_find: SecondaryMap, - - /// Values that have been activated in the `union_find` table, but not yet added to any virtual - /// registers by the `finish_union_find()` function. - pending_values: Vec, -} - -impl VirtRegs { - /// Create a new virtual register collection. - pub fn new() -> Self { - Self { - pool: ListPool::new(), - vregs: PrimaryMap::new(), - unused_vregs: Vec::new(), - value_vregs: SecondaryMap::new(), - union_find: SecondaryMap::new(), - pending_values: Vec::new(), - } - } - - /// Clear all virtual registers. - pub fn clear(&mut self) { - self.vregs.clear(); - self.unused_vregs.clear(); - self.value_vregs.clear(); - self.pool.clear(); - self.union_find.clear(); - self.pending_values.clear(); - } - - /// Get the virtual register containing `value`, if any. - pub fn get(&self, value: Value) -> Option { - self.value_vregs[value].into() - } - - /// Get the list of values in `vreg`. - pub fn values(&self, vreg: VirtReg) -> &[Value] { - self.vregs[vreg].as_slice(&self.pool) - } - - /// Get an iterator over all virtual registers. - pub fn all_virtregs(&self) -> Keys { - self.vregs.keys() - } - - /// Get the congruence class of `value`. - /// - /// If `value` belongs to a virtual register, the congruence class is the values of the virtual - /// register. Otherwise it is just the value itself. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))] - pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value] - where - 'a: 'b, - { - self.get(*value) - .map_or_else(|| slice::from_ref(value), |vr| self.values(vr)) - } - - /// Check if `a` and `b` belong to the same congruence class. - pub fn same_class(&self, a: Value, b: Value) -> bool { - match (self.get(a), self.get(b)) { - (Some(va), Some(vb)) => va == vb, - _ => a == b, - } - } - - /// Sort the values in `vreg` according to the dominator tree pre-order. - /// - /// Returns the slice of sorted values which `values(vreg)` will also return from now on. - pub fn sort_values( - &mut self, - vreg: VirtReg, - func: &Function, - preorder: &DominatorTreePreorder, - ) -> &[Value] { - let s = self.vregs[vreg].as_mut_slice(&mut self.pool); - s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func)); - s - } - - /// Insert a single value into a sorted virtual register. - /// - /// It is assumed that the virtual register containing `big` is already sorted by - /// `sort_values()`, and that `single` does not already belong to a virtual register. - /// - /// If `big` is not part of a virtual register, one will be created. - pub fn insert_single( - &mut self, - big: Value, - single: Value, - func: &Function, - preorder: &DominatorTreePreorder, - ) -> VirtReg { - debug_assert_eq!(self.get(single), None, "Expected singleton {}", single); - - // Make sure `big` has a vreg. - let vreg = self.get(big).unwrap_or_else(|| { - let vr = self.alloc(); - self.vregs[vr].push(big, &mut self.pool); - self.value_vregs[big] = vr.into(); - vr - }); - - // Determine the insertion position for `single`. - let index = match self - .values(vreg) - .binary_search_by(|&v| preorder.pre_cmp_def(v, single, func)) - { - Ok(_) => panic!("{} already in {}", single, vreg), - Err(i) => i, - }; - self.vregs[vreg].insert(index, single, &mut self.pool); - self.value_vregs[single] = vreg.into(); - vreg - } - - /// Remove a virtual register. - /// - /// The values in `vreg` become singletons, and the virtual register number may be reused in - /// the future. - pub fn remove(&mut self, vreg: VirtReg) { - // Start by reassigning all the values. - for &v in self.vregs[vreg].as_slice(&self.pool) { - let old = self.value_vregs[v].take(); - debug_assert_eq!(old, Some(vreg)); - } - - self.vregs[vreg].clear(&mut self.pool); - self.unused_vregs.push(vreg); - } - - /// Allocate a new empty virtual register. - fn alloc(&mut self) -> VirtReg { - self.unused_vregs - .pop() - .unwrap_or_else(|| self.vregs.push(Default::default())) - } - - /// Unify `values` into a single virtual register. - /// - /// The values in the slice can be singletons or they can belong to a virtual register already. - /// If a value belongs to a virtual register, all of the values in that register must be - /// present. - /// - /// The values are assumed to already be in topological order. - pub fn unify(&mut self, values: &[Value]) -> VirtReg { - // Start by clearing all virtual registers involved. - let mut singletons = 0; - let mut cleared = 0; - for &val in values { - match self.get(val) { - None => singletons += 1, - Some(vreg) => { - if !self.vregs[vreg].is_empty() { - cleared += self.vregs[vreg].len(&self.pool); - self.vregs[vreg].clear(&mut self.pool); - self.unused_vregs.push(vreg); - } - } - } - } - - debug_assert_eq!( - values.len(), - singletons + cleared, - "Can't unify partial virtual registers" - ); - - let vreg = self.alloc(); - self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool); - for &v in values { - self.value_vregs[v] = vreg.into(); - } - - vreg - } -} - -impl fmt::Display for VirtRegs { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for vreg in self.all_virtregs() { - write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?; - } - Ok(()) - } -} - -/// Expanded version of a union-find table entry. -enum UFEntry { - /// This value is a a set leader. The embedded number is the set's rank. - Rank(u32), - - /// This value belongs to the same set as the linked value. - Link(Value), -} - -/// The `union_find` table contains `i32` entries that are interpreted as follows: -/// -/// x = 0: The value belongs to its own singleton set. -/// x > 0: The value is the leader of a set with rank x. -/// x < 0: The value belongs to the same set as the value numbered !x. -/// -/// The rank of a set is an upper bound on the number of links that must be followed from a member -/// of the set to the set leader. -/// -/// A singleton set is the same as a set with rank 0. It contains only the leader value. -impl UFEntry { - /// Decode a table entry. - fn decode(x: i32) -> Self { - if x < 0 { - Self::Link(Value::from_u32((!x) as u32)) - } else { - Self::Rank(x as u32) - } - } - - /// Encode a link entry. - fn encode_link(v: Value) -> i32 { - !(v.as_u32() as i32) - } -} - -/// Union-find algorithm for building virtual registers. -/// -/// Before values are added to virtual registers, it is possible to use a union-find algorithm to -/// construct virtual registers efficiently. This support implemented here is used as follows: -/// -/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same -/// virtual register. -/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the -/// `union()` calls. -/// -/// The values that were passed to `union(a, b)` must not belong to any existing virtual registers -/// by the time `finish_union_find()` is called. -/// -/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for -/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed. -/// -/// The [Wikipedia entry on disjoint-set data -/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good. -impl VirtRegs { - /// Find the leader value and rank of the set containing `v`. - /// Compress the path if needed. - fn find(&mut self, mut val: Value) -> (Value, u32) { - let mut val_stack = SmallVec::<[Value; 8]>::new(); - let found = loop { - match UFEntry::decode(self.union_find[val]) { - UFEntry::Rank(rank) => break (val, rank), - UFEntry::Link(parent) => { - val_stack.push(val); - val = parent; - } - } - }; - // Compress the path - while let Some(val) = val_stack.pop() { - self.union_find[val] = UFEntry::encode_link(found.0); - } - found - } - - /// Union the two sets containing `a` and `b`. - /// - /// This ensures that `a` and `b` will belong to the same virtual register after calling - /// `finish_union_find()`. - pub fn union(&mut self, a: Value, b: Value) { - let (leader_a, rank_a) = self.find(a); - let (leader_b, rank_b) = self.find(b); - - if leader_a == leader_b { - return; - } - - // The first time we see a value, its rank will be 0. Add it to the list of pending values. - if rank_a == 0 { - debug_assert_eq!(a, leader_a); - self.pending_values.push(a); - } - if rank_b == 0 { - debug_assert_eq!(b, leader_b); - self.pending_values.push(b); - } - - // Merge into the set with the greater rank. This preserves the invariant that the rank is - // an upper bound on the number of links to the leader. - match rank_a.cmp(&rank_b) { - Ordering::Less => { - self.union_find[leader_a] = UFEntry::encode_link(leader_b); - } - Ordering::Greater => { - self.union_find[leader_b] = UFEntry::encode_link(leader_a); - } - Ordering::Equal => { - // When the two sets have the same rank, we arbitrarily pick the a-set to preserve. - // We need to increase the rank by one since the elements in the b-set are now one - // link further away from the leader. - self.union_find[leader_a] += 1; - self.union_find[leader_b] = UFEntry::encode_link(leader_a); - } - } - } - - /// Compute virtual registers based on previous calls to `union(a, b)`. - /// - /// This terminates the union-find algorithm, so the next time `union()` is called, it is for a - /// new independent batch of values. - /// - /// The values in each virtual register will be ordered according to when they were first - /// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to - /// create a more sensible value order. - /// - /// The new virtual registers will be appended to `new_vregs`, if present. - pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec>) { - debug_assert_eq!( - self.pending_values.iter().find(|&&v| self.get(v).is_some()), - None, - "Values participating in union-find must not belong to existing virtual registers" - ); - - while let Some(val) = self.pending_values.pop() { - let (leader, _) = self.find(val); - - // Get the vreg for `leader`, or create it. - let vreg = self.get(leader).unwrap_or_else(|| { - // Allocate a vreg for `leader`, but leave it empty. - let vr = self.alloc(); - if let Some(ref mut vec) = new_vregs { - vec.push(vr); - } - self.value_vregs[leader] = vr.into(); - vr - }); - - // Push values in `pending_values` order, including when `v == leader`. - self.vregs[vreg].push(val, &mut self.pool); - self.value_vregs[val] = vreg.into(); - - // Clear the entry in the union-find table. The `find(val)` call may still look at this - // entry in a future iteration, but that it ok. It will return a rank 0 leader that has - // already been assigned to the correct virtual register. - self.union_find[val] = 0; - } - - // We do *not* call `union_find.clear()` table here because re-initializing the table for - // sparse use takes time linear in the number of values in the function. Instead we reset - // the entries that are known to be non-zero in the loop above. - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::entity::EntityRef; - use crate::ir::Value; - - #[test] - fn empty_union_find() { - let mut vregs = VirtRegs::new(); - vregs.finish_union_find(None); - assert_eq!(vregs.all_virtregs().count(), 0); - } - - #[test] - fn union_self() { - let mut vregs = VirtRegs::new(); - let v1 = Value::new(1); - vregs.union(v1, v1); - vregs.finish_union_find(None); - assert_eq!(vregs.get(v1), None); - assert_eq!(vregs.all_virtregs().count(), 0); - } - - #[test] - fn union_pair() { - let mut vregs = VirtRegs::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - vregs.union(v1, v2); - vregs.finish_union_find(None); - assert_eq!(vregs.congruence_class(&v1), &[v2, v1]); - assert_eq!(vregs.congruence_class(&v2), &[v2, v1]); - assert_eq!(vregs.all_virtregs().count(), 1); - } - - #[test] - fn union_pair_backwards() { - let mut vregs = VirtRegs::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - vregs.union(v2, v1); - vregs.finish_union_find(None); - assert_eq!(vregs.congruence_class(&v1), &[v1, v2]); - assert_eq!(vregs.congruence_class(&v2), &[v1, v2]); - assert_eq!(vregs.all_virtregs().count(), 1); - } - - #[test] - fn union_tree() { - let mut vregs = VirtRegs::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - let v3 = Value::new(3); - let v4 = Value::new(4); - - vregs.union(v2, v4); - vregs.union(v3, v1); - // Leaders: v2, v3 - vregs.union(v4, v1); - vregs.finish_union_find(None); - assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]); - assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]); - assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]); - assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]); - assert_eq!(vregs.all_virtregs().count(), 1); - } - - #[test] - fn union_two() { - let mut vregs = VirtRegs::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - let v3 = Value::new(3); - let v4 = Value::new(4); - - vregs.union(v2, v4); - vregs.union(v3, v1); - // Leaders: v2, v3 - vregs.finish_union_find(None); - assert_eq!(vregs.congruence_class(&v1), &[v1, v3]); - assert_eq!(vregs.congruence_class(&v2), &[v4, v2]); - assert_eq!(vregs.congruence_class(&v3), &[v1, v3]); - assert_eq!(vregs.congruence_class(&v4), &[v4, v2]); - assert_eq!(vregs.all_virtregs().count(), 2); - } - - #[test] - fn union_uneven() { - let mut vregs = VirtRegs::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - let v3 = Value::new(3); - let v4 = Value::new(4); - - vregs.union(v2, v4); // Rank 0-0 - vregs.union(v3, v2); // Rank 0-1 - vregs.union(v2, v1); // Rank 1-0 - vregs.finish_union_find(None); - assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]); - assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]); - assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]); - assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]); - assert_eq!(vregs.all_virtregs().count(), 1); - } -} diff --git a/cranelift/codegen/src/stack_layout.rs b/cranelift/codegen/src/stack_layout.rs deleted file mode 100644 index 2430e8a643..0000000000 --- a/cranelift/codegen/src/stack_layout.rs +++ /dev/null @@ -1,241 +0,0 @@ -//! Computing stack layout. - -use crate::ir::stackslot::{StackOffset, StackSize, StackSlotKind}; -use crate::ir::{StackLayoutInfo, StackSlots}; -use crate::result::{CodegenError, CodegenResult}; -use core::cmp::{max, min}; - -/// Compute the stack frame layout. -/// -/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit` -/// stack slots. -/// -/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the -/// function doesn't perform any call. -/// -/// Returns the total stack frame size which is also saved in `frame.frame_size`. -/// -/// If the stack frame is too big, returns an `ImplLimitExceeded` error. -pub fn layout_stack( - frame: &mut StackSlots, - is_leaf: bool, - alignment: StackSize, -) -> CodegenResult { - // Each object and the whole stack frame must fit in 2 GB such that any relative offset within - // the frame fits in a `StackOffset`. - let max_size = StackOffset::max_value() as StackSize; - debug_assert!(alignment.is_power_of_two() && alignment <= max_size); - - // We assume a stack that grows toward lower addresses as implemented by modern ISAs. The - // stack layout from high to low addresses will be: - // - // 1. incoming arguments. - // 2. spills + explicits + struct returns. - // 3. outgoing arguments. - // - // The incoming arguments can have both positive and negative offsets. A negative offset - // incoming arguments is usually the x86 return address pushed by the call instruction, but - // it can also be fixed stack slots pushed by an externally generated prologue. - // - // Both incoming and outgoing argument slots have fixed offsets that are treated as - // reserved zones by the layout algorithm. - // - // If a function only has incoming arguments and does not perform any calls, then it doesn't - // require the stack to be aligned. - - let mut incoming_min = 0; - let mut incoming_max = 0; - let mut outgoing_max = 0; - let mut min_align = alignment; - let mut must_align = !is_leaf; - - for slot in frame.values() { - if slot.size > max_size { - return Err(CodegenError::ImplLimitExceeded); - } - - match slot.kind { - StackSlotKind::IncomingArg => { - incoming_min = min(incoming_min, slot.offset.unwrap()); - incoming_max = max(incoming_max, slot.offset.unwrap() + slot.size as i32); - } - StackSlotKind::OutgoingArg => { - let offset = slot - .offset - .unwrap() - .checked_add(slot.size as StackOffset) - .ok_or(CodegenError::ImplLimitExceeded)?; - outgoing_max = max(outgoing_max, offset); - must_align = true; - } - StackSlotKind::StructReturnSlot - | StackSlotKind::SpillSlot - | StackSlotKind::ExplicitSlot - | StackSlotKind::EmergencySlot => { - // Determine the smallest alignment of any explicit or spill slot. - min_align = slot.alignment(min_align); - must_align = true; - } - } - } - - // Lay out spill slots, struct return slots, and explicit slots below the - // incoming arguments. The offset is negative, growing downwards. Start with - // the smallest alignments for better packing. - let mut offset = incoming_min; - debug_assert!(min_align.is_power_of_two()); - while min_align <= alignment { - for slot in frame.values_mut() { - // Pick out explicit and spill slots with exact alignment `min_align`. - match slot.kind { - StackSlotKind::SpillSlot - | StackSlotKind::StructReturnSlot - | StackSlotKind::ExplicitSlot - | StackSlotKind::EmergencySlot => { - if slot.alignment(alignment) != min_align { - continue; - } - } - StackSlotKind::IncomingArg | StackSlotKind::OutgoingArg => continue, - } - - offset = offset - .checked_sub(slot.size as StackOffset) - .ok_or(CodegenError::ImplLimitExceeded)?; - - // Aligning the negative offset can never cause overflow. We're only clearing bits. - offset &= -(min_align as StackOffset); - slot.offset = Some(offset); - } - - // Move on to the next higher alignment. - min_align *= 2; - } - - // Finally, make room for the outgoing arguments. - offset = offset - .checked_sub(outgoing_max) - .ok_or(CodegenError::ImplLimitExceeded)?; - - if must_align { - offset &= -(alignment as StackOffset); - } - - // Set the computed layout information for the frame - let frame_size = (offset as StackSize).wrapping_neg(); - let inbound_args_size = incoming_max as u32; - frame.layout_info = Some(StackLayoutInfo { - frame_size, - inbound_args_size, - }); - - Ok(frame_size) -} - -#[cfg(test)] -mod tests { - use super::layout_stack; - use crate::ir::stackslot::StackOffset; - use crate::ir::types; - use crate::ir::{StackSlotData, StackSlotKind, StackSlots}; - use crate::result::CodegenError; - - #[test] - fn layout() { - let sss = &mut StackSlots::new(); - - // For all these test cases, assume it will call. - let is_leaf = false; - - // An empty layout should have 0-sized stack frame. - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); - - // Same for incoming arguments with non-negative offsets. - let in0 = sss.make_incoming_arg(8, 0); - let in1 = sss.make_incoming_arg(8, 8); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - - // Add some spill slots. - let ss0 = sss.make_spill_slot(types::I64); - let ss1 = sss.make_spill_slot(types::I32); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[ss0].offset, Some(-8)); - assert_eq!(sss[ss1].offset, Some(-12)); - - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[ss0].offset, Some(-16)); - assert_eq!(sss[ss1].offset, Some(-4)); - - // An incoming argument with negative offset counts towards the total frame size, but it - // should still pack nicely with the spill slots. - let in2 = sss.make_incoming_arg(4, -4); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-12)); - assert_eq!(sss[ss1].offset, Some(-16)); - - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-16)); - assert_eq!(sss[ss1].offset, Some(-8)); - - // Finally, make sure there is room for the outgoing args. - let out0 = sss.get_outgoing_arg(4, 0); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-12)); - assert_eq!(sss[ss1].offset, Some(-16)); - assert_eq!(sss[out0].offset, Some(0)); - - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-16)); - assert_eq!(sss[ss1].offset, Some(-8)); - assert_eq!(sss[out0].offset, Some(0)); - - // Also test that an unsupported offset is rejected. - sss.get_outgoing_arg(1, StackOffset::max_value() - 1); - assert_eq!( - layout_stack(sss, is_leaf, 1), - Err(CodegenError::ImplLimitExceeded) - ); - } - - #[test] - fn slot_kinds() { - let sss = &mut StackSlots::new(); - - // Add some slots of various kinds. - let ss0 = sss.make_spill_slot(types::I32); - let ss1 = sss.push(StackSlotData::new( - StackSlotKind::ExplicitSlot, - types::I32.bytes(), - )); - let ss2 = sss.get_emergency_slot(types::I32, &[]); - - assert_eq!(layout_stack(sss, true, 1), Ok(12)); - assert_eq!(sss[ss0].offset, Some(-4)); - assert_eq!(sss[ss1].offset, Some(-8)); - assert_eq!(sss[ss2].offset, Some(-12)); - } -} diff --git a/cranelift/codegen/src/timing.rs b/cranelift/codegen/src/timing.rs index 16bee01a7b..2a360a7d34 100644 --- a/cranelift/codegen/src/timing.rs +++ b/cranelift/codegen/src/timing.rs @@ -46,19 +46,14 @@ define_passes! { wasm_translate_function: "Translate WASM function", verifier: "Verify Cranelift IR", - verify_cssa: "Verify CSSA", - verify_liveness: "Verify live ranges", - verify_locations: "Verify value locations", verify_flags: "Verify CPU flags", compile: "Compilation passes", flowgraph: "Control flow graph", domtree: "Dominator tree", loop_analysis: "Loop analysis", - postopt: "Post-legalization rewriting", preopt: "Pre-legalization rewriting", dce: "Dead code elimination", - legalize: "Legalization", gvn: "Global value numbering", licm: "Loop invariant code motion", unreachable_code: "Remove unreachable blocks", @@ -70,15 +65,6 @@ define_passes! { vcode_emit_finish: "VCode emission finalization", regalloc: "Register allocation", - ra_liveness: "RA liveness analysis", - ra_cssa: "RA coalescing CSSA", - ra_spilling: "RA spilling", - ra_reload: "RA reloading", - ra_coloring: "RA coloring", - - prologue_epilogue: "Prologue/epilogue insertion", - shrink_instructions: "Instruction encoding shrinking", - relax_branches: "Branch relaxation", binemit: "Binary machine code emission", layout_renumber: "Layout full renumbering", diff --git a/cranelift/codegen/src/topo_order.rs b/cranelift/codegen/src/topo_order.rs deleted file mode 100644 index 8d38e4f324..0000000000 --- a/cranelift/codegen/src/topo_order.rs +++ /dev/null @@ -1,138 +0,0 @@ -//! Topological order of blocks, according to the dominator tree. - -use crate::dominator_tree::DominatorTree; -use crate::entity::EntitySet; -use crate::ir::{Block, Layout}; -use alloc::vec::Vec; - -/// Present blocks in a topological order such that all dominating blocks are guaranteed to be visited -/// before the current block. -/// -/// There are many topological orders of the blocks in a function, so it is possible to provide a -/// preferred order, and the `TopoOrder` will present blocks in an order that is as close as possible -/// to the preferred order. -pub struct TopoOrder { - /// Preferred order of blocks to visit. - preferred: Vec, - - /// Next entry to get from `preferred`. - next: usize, - - /// Set of visited blocks. - visited: EntitySet, - - /// Stack of blocks to be visited next, already in `visited`. - stack: Vec, -} - -impl TopoOrder { - /// Create a new empty topological order. - pub fn new() -> Self { - Self { - preferred: Vec::new(), - next: 0, - visited: EntitySet::new(), - stack: Vec::new(), - } - } - - /// Clear all data structures in this topological order. - pub fn clear(&mut self) { - self.preferred.clear(); - self.next = 0; - self.visited.clear(); - self.stack.clear(); - } - - /// Reset and initialize with a preferred sequence of blocks. The resulting topological order is - /// guaranteed to contain all of the blocks in `preferred` as well as any dominators. - pub fn reset(&mut self, preferred: Blocks) - where - Blocks: IntoIterator, - { - self.preferred.clear(); - self.preferred.extend(preferred); - self.next = 0; - self.visited.clear(); - self.stack.clear(); - } - - /// Get the next block in the topological order. - /// - /// Two things are guaranteed about the blocks returned by this function: - /// - /// - All blocks in the `preferred` iterator given to `reset` will be returned. - /// - All dominators are visited before the block returned. - pub fn next(&mut self, layout: &Layout, domtree: &DominatorTree) -> Option { - self.visited.resize(layout.block_capacity()); - // Any entries in `stack` should be returned immediately. They have already been added to - // `visited`. - while self.stack.is_empty() { - match self.preferred.get(self.next).cloned() { - None => return None, - Some(mut block) => { - // We have the next block in the preferred order. - self.next += 1; - // Push it along with any non-visited dominators. - while self.visited.insert(block) { - self.stack.push(block); - match domtree.idom(block) { - Some(idom) => { - block = layout.inst_block(idom).expect("idom not in layout") - } - None => break, - } - } - } - } - } - self.stack.pop() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::dominator_tree::DominatorTree; - use crate::flowgraph::ControlFlowGraph; - use crate::ir::{Function, InstBuilder}; - use core::iter; - - #[test] - fn empty() { - let func = Function::new(); - let cfg = ControlFlowGraph::with_function(&func); - let domtree = DominatorTree::with_function(&func, &cfg); - let mut topo = TopoOrder::new(); - - assert_eq!(topo.next(&func.layout, &domtree), None); - topo.reset(func.layout.blocks()); - assert_eq!(topo.next(&func.layout, &domtree), None); - } - - #[test] - fn simple() { - let mut func = Function::new(); - let block0 = func.dfg.make_block(); - let block1 = func.dfg.make_block(); - - { - let mut cur = FuncCursor::new(&mut func); - - cur.insert_block(block0); - cur.ins().jump(block1, &[]); - cur.insert_block(block1); - cur.ins().jump(block1, &[]); - } - - let cfg = ControlFlowGraph::with_function(&func); - let domtree = DominatorTree::with_function(&func, &cfg); - let mut topo = TopoOrder::new(); - - topo.reset(iter::once(block1)); - assert_eq!(topo.next(&func.layout, &domtree), Some(block0)); - assert_eq!(topo.next(&func.layout, &domtree), Some(block1)); - assert_eq!(topo.next(&func.layout, &domtree), None); - } -} diff --git a/cranelift/codegen/src/unreachable_code.rs b/cranelift/codegen/src/unreachable_code.rs index 327e1af3a3..de1af1ac91 100644 --- a/cranelift/codegen/src/unreachable_code.rs +++ b/cranelift/codegen/src/unreachable_code.rs @@ -31,7 +31,7 @@ pub fn eliminate_unreachable_code( // Remove all instructions from `block`. while let Some(inst) = pos.func.layout.first_inst(block) { - log::trace!(" - {}", pos.func.dfg.display_inst(inst, None)); + log::trace!(" - {}", pos.func.dfg.display_inst(inst)); pos.func.layout.remove_inst(inst); } diff --git a/cranelift/codegen/src/value_label.rs b/cranelift/codegen/src/value_label.rs index 82bfd3e30c..459fa62d72 100644 --- a/cranelift/codegen/src/value_label.rs +++ b/cranelift/codegen/src/value_label.rs @@ -1,14 +1,8 @@ -use crate::ir::{Function, SourceLoc, Value, ValueLabel, ValueLabelAssignments, ValueLoc}; -use crate::isa::TargetIsa; -use crate::machinst::MachCompileResult; -use crate::regalloc::{Context, RegDiversions}; +use crate::ir::{SourceLoc, ValueLabel}; use crate::HashMap; -use alloc::collections::BTreeMap; use alloc::vec::Vec; use core::cmp::Ordering; use core::convert::From; -use core::iter::Iterator; -use core::ops::Bound::*; use core::ops::Deref; use regalloc::Reg; @@ -31,241 +25,15 @@ pub struct ValueLocRange { #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum LabelValueLoc { - /// Old-backend location: RegUnit, StackSlot, or Unassigned. - ValueLoc(ValueLoc), /// New-backend Reg. Reg(Reg), /// New-backend offset from stack pointer. SPOffset(i64), } -impl From for LabelValueLoc { - fn from(v: ValueLoc) -> Self { - LabelValueLoc::ValueLoc(v) - } -} - /// Resulting map of Value labels and their ranges/locations. pub type ValueLabelsRanges = HashMap>; -fn build_value_labels_index(func: &Function) -> BTreeMap -where - T: From + Deref + Ord + Copy, -{ - if func.dfg.values_labels.is_none() { - return BTreeMap::new(); - } - let values_labels = func.dfg.values_labels.as_ref().unwrap(); - - // Index values_labels by srcloc/from - let mut sorted = BTreeMap::new(); - for (val, assigns) in values_labels { - match assigns { - ValueLabelAssignments::Starts(labels) => { - for label in labels { - if label.from.is_default() { - continue; - } - let srcloc = T::from(label.from); - let label = label.label; - sorted.insert(srcloc, (*val, label)); - } - } - ValueLabelAssignments::Alias { from, value } => { - if from.is_default() { - continue; - } - let mut aliased_value = *value; - while let Some(ValueLabelAssignments::Alias { value, .. }) = - values_labels.get(&aliased_value) - { - // TODO check/limit recursion? - aliased_value = *value; - } - let from = T::from(*from); - if let Some(ValueLabelAssignments::Starts(labels)) = - values_labels.get(&aliased_value) - { - for label in labels { - let srcloc = if label.from.is_default() { - from - } else { - from.max(T::from(label.from)) - }; - let label = label.label; - sorted.insert(srcloc, (*val, label)); - } - } - } - } - } - sorted -} - -/// Builds ranges and location for specified value labels. -/// The labels specified at DataFlowGraph's values_labels collection. -pub fn build_value_labels_ranges( - func: &Function, - regalloc: &Context, - mach_compile_result: Option<&MachCompileResult>, - isa: &dyn TargetIsa, -) -> ValueLabelsRanges -where - T: From + Deref + Ord + Copy, -{ - if let Some(mach_compile_result) = mach_compile_result { - return mach_compile_result.value_labels_ranges.clone(); - } - - let values_labels = build_value_labels_index::(func); - - let mut blocks = func.layout.blocks().collect::>(); - blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase - let encinfo = isa.encoding_info(); - let values_locations = &func.locations; - let liveness_ranges = regalloc.liveness().ranges(); - - let mut ranges = HashMap::new(); - let mut add_range = |label, range: (u32, u32), loc: ValueLoc| { - if range.0 >= range.1 || !loc.is_assigned() { - return; - } - ranges - .entry(label) - .or_insert_with(Vec::new) - .push(ValueLocRange { - loc: loc.into(), - start: range.0, - end: range.1, - }); - }; - - let mut end_offset = 0; - let mut tracked_values: Vec<(Value, ValueLabel, u32, ValueLoc)> = Vec::new(); - let mut divert = RegDiversions::new(); - for block in blocks { - divert.at_block(&func.entry_diversions, block); - let mut last_srcloc: Option = None; - for (offset, inst, size) in func.inst_offsets(block, &encinfo) { - divert.apply(&func.dfg[inst]); - end_offset = offset + size; - // Remove killed values. - tracked_values.retain(|(x, label, start_offset, last_loc)| { - let range = liveness_ranges.get(*x); - if range.expect("value").killed_at(inst, block, &func.layout) { - add_range(*label, (*start_offset, end_offset), *last_loc); - return false; - } - true - }); - - let srcloc = func.srclocs[inst]; - if srcloc.is_default() { - // Don't process instructions without srcloc. - continue; - } - let srcloc = T::from(srcloc); - - // Record and restart ranges if Value location was changed. - for (val, label, start_offset, last_loc) in &mut tracked_values { - let new_loc = divert.get(*val, values_locations); - if new_loc == *last_loc { - continue; - } - add_range(*label, (*start_offset, end_offset), *last_loc); - *start_offset = end_offset; - *last_loc = new_loc; - } - - // New source locations range started: abandon all tracked values. - if last_srcloc.is_some() && last_srcloc.unwrap() > srcloc { - for (_, label, start_offset, last_loc) in &tracked_values { - add_range(*label, (*start_offset, end_offset), *last_loc); - } - tracked_values.clear(); - last_srcloc = None; - } - - // Get non-processed Values based on srcloc - let range = ( - match last_srcloc { - Some(a) => Excluded(a), - None => Unbounded, - }, - Included(srcloc), - ); - let active_values = values_labels.range(range); - let active_values = active_values.filter(|(_, (v, _))| { - // Ignore dead/inactive Values. - let range = liveness_ranges.get(*v); - match range { - Some(r) => r.reaches_use(inst, block, &func.layout), - None => false, - } - }); - // Append new Values to the tracked_values. - for (_, (val, label)) in active_values { - let loc = divert.get(*val, values_locations); - tracked_values.push((*val, *label, end_offset, loc)); - } - - last_srcloc = Some(srcloc); - } - // Finish all started ranges. - for (_, label, start_offset, last_loc) in &tracked_values { - add_range(*label, (*start_offset, end_offset), *last_loc); - } - } - - // Optimize ranges in-place - for (_, label_ranges) in ranges.iter_mut() { - assert!(!label_ranges.is_empty()); - label_ranges.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end))); - - // Merge ranges - let mut i = 1; - let mut j = 0; - while i < label_ranges.len() { - assert!(label_ranges[j].start <= label_ranges[i].end); - if label_ranges[j].loc != label_ranges[i].loc { - // Different location - if label_ranges[j].end >= label_ranges[i].end { - // Consumed by previous range, skipping - i += 1; - continue; - } - j += 1; - label_ranges[j] = label_ranges[i]; - i += 1; - continue; - } - if label_ranges[j].end < label_ranges[i].start { - // Gap in the range location - j += 1; - label_ranges[j] = label_ranges[i]; - i += 1; - continue; - } - // Merge i-th and j-th ranges - if label_ranges[j].end < label_ranges[i].end { - label_ranges[j].end = label_ranges[i].end; - } - i += 1; - } - label_ranges.truncate(j + 1); - - // Cut/move start position of next range, if two neighbor ranges intersect. - for i in 0..j { - if label_ranges[i].end > label_ranges[i + 1].start { - label_ranges[i + 1].start = label_ranges[i].end; - assert!(label_ranges[i + 1].start < label_ranges[i + 1].end); - } - assert!(label_ranges[i].end <= label_ranges[i + 1].start); - } - } - ranges -} - #[derive(Eq, Clone, Copy)] pub struct ComparableSourceLoc(SourceLoc); diff --git a/cranelift/codegen/src/verifier/cssa.rs b/cranelift/codegen/src/verifier/cssa.rs deleted file mode 100644 index d3e95bf51b..0000000000 --- a/cranelift/codegen/src/verifier/cssa.rs +++ /dev/null @@ -1,172 +0,0 @@ -//! Verify conventional SSA form. - -use crate::dbg::DisplayList; -use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::{ExpandedProgramPoint, Function}; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::virtregs::VirtRegs; -use crate::timing; -use crate::verifier::{VerifierErrors, VerifierStepResult}; - -/// Verify conventional SSA form for `func`. -/// -/// Conventional SSA form is represented in Cranelift with the help of virtual registers: -/// -/// - Two values are said to be *PHI-related* if one is a block argument and the other is passed as -/// a branch argument in a location that matches the first value. -/// - PHI-related values must belong to the same virtual register. -/// - Two values in the same virtual register must not have overlapping live ranges. -/// -/// Additionally, we verify this property of virtual registers: -/// -/// - The values in a virtual register are topologically ordered w.r.t. dominance. -/// -/// We don't verify that virtual registers are minimal. Minimal CSSA is not required. -pub fn verify_cssa( - func: &Function, - cfg: &ControlFlowGraph, - domtree: &DominatorTree, - liveness: &Liveness, - virtregs: &VirtRegs, - errors: &mut VerifierErrors, -) -> VerifierStepResult<()> { - let _tt = timing::verify_cssa(); - - let mut preorder = DominatorTreePreorder::new(); - preorder.compute(domtree, &func.layout); - - let verifier = CssaVerifier { - func, - cfg, - domtree, - virtregs, - liveness, - preorder, - }; - verifier.check_virtregs(errors)?; - verifier.check_cssa(errors)?; - Ok(()) -} - -struct CssaVerifier<'a> { - func: &'a Function, - cfg: &'a ControlFlowGraph, - domtree: &'a DominatorTree, - virtregs: &'a VirtRegs, - liveness: &'a Liveness, - preorder: DominatorTreePreorder, -} - -impl<'a> CssaVerifier<'a> { - fn check_virtregs(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - for vreg in self.virtregs.all_virtregs() { - let values = self.virtregs.values(vreg); - - for (idx, &val) in values.iter().enumerate() { - if !self.func.dfg.value_is_valid(val) { - return errors.fatal((val, format!("Invalid value in {}", vreg))); - } - if !self.func.dfg.value_is_attached(val) { - return errors.fatal((val, format!("Detached value in {}", vreg))); - } - if self.liveness.get(val).is_none() { - return errors.fatal((val, format!("Value in {} has no live range", vreg))); - }; - - // Check topological ordering with the previous values in the virtual register. - let def: ExpandedProgramPoint = self.func.dfg.value_def(val).into(); - let def_block = self.func.layout.pp_block(def); - for &prev_val in &values[0..idx] { - let prev_def: ExpandedProgramPoint = self.func.dfg.value_def(prev_val).into(); - let prev_block = self.func.layout.pp_block(prev_def); - - if prev_def == def { - return errors.fatal(( - val, - format!( - "Values {} and {} in {} = {} defined at the same program point", - prev_val, - val, - vreg, - DisplayList(values) - ), - )); - } - - // Enforce topological ordering of defs in the virtual register. - if self.preorder.dominates(def_block, prev_block) - && self.domtree.dominates(def, prev_def, &self.func.layout) - { - return errors.fatal(( - val, - format!( - "Value in {} = {} def dominates previous {}", - vreg, - DisplayList(values), - prev_val - ), - )); - } - } - - // Knowing that values are in topo order, we can check for interference this - // way. - // We only have to check against the nearest dominating value. - for &prev_val in values[0..idx].iter().rev() { - let prev_def: ExpandedProgramPoint = self.func.dfg.value_def(prev_val).into(); - let prev_block = self.func.layout.pp_block(prev_def); - - if self.preorder.dominates(prev_block, def_block) - && self.domtree.dominates(prev_def, def, &self.func.layout) - { - if self.liveness[prev_val].overlaps_def(def, def_block, &self.func.layout) { - return errors.fatal(( - val, - format!( - "Value def in {} = {} interferes with {}", - vreg, - DisplayList(values), - prev_val - ), - )); - } else { - break; - } - } - } - } - } - - Ok(()) - } - - fn check_cssa(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - for block in self.func.layout.blocks() { - let block_params = self.func.dfg.block_params(block); - for BlockPredecessor { inst: pred, .. } in self.cfg.pred_iter(block) { - let pred_args = self.func.dfg.inst_variable_args(pred); - // This should have been caught by an earlier verifier pass. - assert_eq!( - block_params.len(), - pred_args.len(), - "Wrong arguments on branch." - ); - - for (&block_param, &pred_arg) in block_params.iter().zip(pred_args) { - if !self.virtregs.same_class(block_param, pred_arg) { - return errors.fatal(( - pred, - format!( - "{} and {} must be in the same virtual register", - block_param, pred_arg - ), - )); - } - } - } - } - - Ok(()) - } -} diff --git a/cranelift/codegen/src/verifier/flags.rs b/cranelift/codegen/src/verifier/flags.rs index e4cfc80462..5e67e3ae77 100644 --- a/cranelift/codegen/src/verifier/flags.rs +++ b/cranelift/codegen/src/verifier/flags.rs @@ -4,7 +4,6 @@ use crate::entity::{EntitySet, SecondaryMap}; use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; use crate::ir; use crate::ir::instructions::BranchInfo; -use crate::isa; use crate::packed_option::PackedOption; use crate::timing; use crate::verifier::{VerifierErrors, VerifierStepResult}; @@ -24,19 +23,12 @@ use crate::verifier::{VerifierErrors, VerifierStepResult}; pub fn verify_flags( func: &ir::Function, cfg: &ControlFlowGraph, - isa: Option<&dyn isa::TargetIsa>, errors: &mut VerifierErrors, ) -> VerifierStepResult<()> { let _tt = timing::verify_flags(); - let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() { - None - } else { - Some(isa.unwrap().encoding_info()) - }; let mut verifier = FlagsVerifier { func, cfg, - encinfo, livein: SecondaryMap::new(), }; verifier.check(errors) @@ -45,7 +37,6 @@ pub fn verify_flags( struct FlagsVerifier<'a> { func: &'a ir::Function, cfg: &'a ControlFlowGraph, - encinfo: Option, /// The single live-in flags value (if any) for each block. livein: SecondaryMap>, @@ -111,21 +102,6 @@ impl<'a> FlagsVerifier<'a> { return Err(()); } } - - // Does the instruction have an encoding that clobbers the CPU flags? - if self - .encinfo - .as_ref() - .and_then(|ei| ei.operand_constraints(self.func.encodings[inst])) - .map_or(false, |c| c.clobbers_flags) - && live_val.is_some() - { - errors.report(( - inst, - format!("encoding clobbers live CPU flags in {}", live), - )); - return Err(()); - } } // Now look for live ranges of CPU flags that end here. diff --git a/cranelift/codegen/src/verifier/liveness.rs b/cranelift/codegen/src/verifier/liveness.rs deleted file mode 100644 index ac5ee62c42..0000000000 --- a/cranelift/codegen/src/verifier/liveness.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! Liveness verifier. - -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::entities::AnyEntity; -use crate::ir::{ExpandedProgramPoint, Function, ProgramPoint, Value}; -use crate::isa::TargetIsa; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::liverange::LiveRange; -use crate::timing; -use crate::verifier::{VerifierErrors, VerifierStepResult}; - -/// Verify liveness information for `func`. -/// -/// The provided control flow graph is assumed to be sound. -/// -/// - All values in the program must have a live range. -/// - The live range def point must match where the value is defined. -/// - The live range must reach all uses. -/// - When a live range is live-in to a block, it must be live at all the predecessors. -/// - The live range affinity must be compatible with encoding constraints. -/// -/// We don't verify that live ranges are minimal. This would require recomputing live ranges for -/// all values. -pub fn verify_liveness( - isa: &dyn TargetIsa, - func: &Function, - cfg: &ControlFlowGraph, - liveness: &Liveness, - errors: &mut VerifierErrors, -) -> VerifierStepResult<()> { - let _tt = timing::verify_liveness(); - let verifier = LivenessVerifier { - isa, - func, - cfg, - liveness, - }; - verifier.check_blocks(errors)?; - verifier.check_insts(errors)?; - Ok(()) -} - -struct LivenessVerifier<'a> { - isa: &'a dyn TargetIsa, - func: &'a Function, - cfg: &'a ControlFlowGraph, - liveness: &'a Liveness, -} - -impl<'a> LivenessVerifier<'a> { - /// Check all block arguments. - fn check_blocks(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - for block in self.func.layout.blocks() { - for &val in self.func.dfg.block_params(block) { - let lr = match self.liveness.get(val) { - Some(lr) => lr, - None => { - return errors - .fatal((block, format!("block arg {} has no live range", val))) - } - }; - self.check_lr(block.into(), val, lr, errors)?; - } - } - Ok(()) - } - - /// Check all instructions. - fn check_insts(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - for block in self.func.layout.blocks() { - for inst in self.func.layout.block_insts(block) { - let encoding = self.func.encodings[inst]; - - // Check the defs. - for &val in self.func.dfg.inst_results(inst) { - let lr = match self.liveness.get(val) { - Some(lr) => lr, - None => return errors.fatal((inst, format!("{} has no live range", val))), - }; - self.check_lr(inst.into(), val, lr, errors)?; - - if encoding.is_legal() { - // A legal instruction is not allowed to define ghost values. - if lr.affinity.is_unassigned() { - return errors.fatal(( - inst, - format!( - "{} is a ghost value defined by a real [{}] instruction", - val, - self.isa.encoding_info().display(encoding) - ), - )); - } - } else if !lr.affinity.is_unassigned() { - // A non-encoded instruction can only define ghost values. - return errors.fatal(( - inst, - format!( - "{} is a real {} value defined by a ghost instruction", - val, - lr.affinity.display(&self.isa.register_info()) - ), - )); - } - } - - // Check the uses. - for &val in self.func.dfg.inst_args(inst) { - let lr = match self.liveness.get(val) { - Some(lr) => lr, - None => return errors.fatal((inst, format!("{} has no live range", val))), - }; - - debug_assert!(self.func.layout.inst_block(inst).unwrap() == block); - if !lr.reaches_use(inst, block, &self.func.layout) { - return errors.fatal((inst, format!("{} is not live at this use", val))); - } - - // A legal instruction is not allowed to depend on ghost values. - if encoding.is_legal() && lr.affinity.is_unassigned() { - return errors.fatal(( - inst, - format!( - "{} is a ghost value used by a real [{}] instruction", - val, - self.isa.encoding_info().display(encoding), - ), - )); - } - } - } - } - Ok(()) - } - - /// Check the integrity of the live range `lr`. - fn check_lr( - &self, - def: ProgramPoint, - val: Value, - lr: &LiveRange, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let l = &self.func.layout; - - let loc: AnyEntity = match def.into() { - ExpandedProgramPoint::Block(e) => e.into(), - ExpandedProgramPoint::Inst(i) => i.into(), - }; - if lr.def() != def { - return errors.fatal(( - loc, - format!("Wrong live range def ({}) for {}", lr.def(), val), - )); - } - if lr.is_dead() { - if !lr.is_local() { - return errors.fatal((loc, format!("Dead live range {} should be local", val))); - } else { - return Ok(()); - } - } - let def_block = match def.into() { - ExpandedProgramPoint::Block(e) => e, - ExpandedProgramPoint::Inst(i) => l.inst_block(i).unwrap(), - }; - match lr.def_local_end().into() { - ExpandedProgramPoint::Block(e) => { - return errors.fatal(( - loc, - format!("Def local range for {} can't end at {}", val, e), - )); - } - ExpandedProgramPoint::Inst(i) => { - if self.func.layout.inst_block(i) != Some(def_block) { - return errors - .fatal((loc, format!("Def local end for {} in wrong block", val))); - } - } - } - - // Now check the live-in intervals against the CFG. - for (mut block, end) in lr.liveins() { - if !l.is_block_inserted(block) { - return errors.fatal(( - loc, - format!("{} livein at {} which is not in the layout", val, block), - )); - } - let end_block = match l.inst_block(end) { - Some(e) => e, - None => { - return errors.fatal(( - loc, - format!( - "{} livein for {} ends at {} which is not in the layout", - val, block, end - ), - )); - } - }; - - // Check all the blocks in the interval independently. - loop { - // If `val` is live-in at `block`, it must be live at all the predecessors. - for BlockPredecessor { inst: pred, block } in self.cfg.pred_iter(block) { - if !lr.reaches_use(pred, block, &self.func.layout) { - return errors.fatal(( - pred, - format!( - "{} is live in to {} but not live at predecessor", - val, block - ), - )); - } - } - - if block == end_block { - break; - } - block = match l.next_block(block) { - Some(e) => e, - None => { - return errors.fatal(( - loc, - format!("end of {} livein ({}) never reached", val, end_block), - )); - } - }; - } - } - - Ok(()) - } -} diff --git a/cranelift/codegen/src/verifier/locations.rs b/cranelift/codegen/src/verifier/locations.rs deleted file mode 100644 index 7d04e9889b..0000000000 --- a/cranelift/codegen/src/verifier/locations.rs +++ /dev/null @@ -1,399 +0,0 @@ -//! Verify value locations. - -use crate::flowgraph::ControlFlowGraph; -use crate::ir; -use crate::isa; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::RegDiversions; -use crate::timing; -use crate::verifier::{VerifierErrors, VerifierStepResult}; - -/// Verify value locations for `func`. -/// -/// After register allocation, every value must be assigned to a location - either a register or a -/// stack slot. These locations must be compatible with the constraints described by the -/// instruction encoding recipes. -/// -/// Values can be temporarily diverted to a different location by using the `regmove`, `regspill`, -/// and `regfill` instructions, but only inside a block. -/// -/// If a liveness analysis is provided, it is used to verify that there are no active register -/// diversions across control flow edges. -pub fn verify_locations( - isa: &dyn isa::TargetIsa, - func: &ir::Function, - cfg: &ControlFlowGraph, - liveness: Option<&Liveness>, - errors: &mut VerifierErrors, -) -> VerifierStepResult<()> { - let _tt = timing::verify_locations(); - let verifier = LocationVerifier { - isa, - func, - reginfo: isa.register_info(), - encinfo: isa.encoding_info(), - cfg, - liveness, - }; - verifier.check_constraints(errors)?; - Ok(()) -} - -struct LocationVerifier<'a> { - isa: &'a dyn isa::TargetIsa, - func: &'a ir::Function, - reginfo: isa::RegInfo, - encinfo: isa::EncInfo, - cfg: &'a ControlFlowGraph, - liveness: Option<&'a Liveness>, -} - -impl<'a> LocationVerifier<'a> { - /// Check that the assigned value locations match the operand constraints of their uses. - fn check_constraints(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - let dfg = &self.func.dfg; - let mut divert = RegDiversions::new(); - - for block in self.func.layout.blocks() { - divert.at_block(&self.func.entry_diversions, block); - - let mut is_after_branch = false; - for inst in self.func.layout.block_insts(block) { - let enc = self.func.encodings[inst]; - - if enc.is_legal() { - self.check_enc_constraints(inst, enc, &divert, errors)? - } else { - self.check_ghost_results(inst, errors)?; - } - - if let Some(sig) = dfg.call_signature(inst) { - self.check_call_abi(inst, sig, &divert, errors)?; - } - - let opcode = dfg[inst].opcode(); - if opcode.is_return() { - self.check_return_abi(inst, &divert, errors)?; - } else if opcode.is_branch() && !divert.is_empty() { - self.check_cfg_edges(inst, &mut divert, is_after_branch, errors)?; - } - - self.update_diversions(inst, &mut divert, errors)?; - is_after_branch = opcode.is_branch(); - } - } - - Ok(()) - } - - /// Check encoding constraints against the current value locations. - fn check_enc_constraints( - &self, - inst: ir::Inst, - enc: isa::Encoding, - divert: &RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let constraints = self - .encinfo - .operand_constraints(enc) - .expect("check_enc_constraints requires a legal encoding"); - - if constraints.satisfied(inst, divert, self.func) { - return Ok(()); - } - - // TODO: We could give a better error message here. - errors.fatal(( - inst, - format!( - "{} constraints not satisfied in: {}\n{}", - self.encinfo.display(enc), - self.func.dfg.display_inst(inst, self.isa), - self.func.display(self.isa), - ), - )) - } - - /// Check that the result values produced by a ghost instruction are not assigned a value - /// location. - fn check_ghost_results( - &self, - inst: ir::Inst, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let results = self.func.dfg.inst_results(inst); - - for &res in results { - let loc = self.func.locations[res]; - if loc.is_assigned() { - return errors.fatal(( - inst, - format!( - "ghost result {} value must not have a location ({}).", - res, - loc.display(&self.reginfo) - ), - )); - } - } - - Ok(()) - } - - /// Check the ABI argument and result locations for a call. - fn check_call_abi( - &self, - inst: ir::Inst, - sig: ir::SigRef, - divert: &RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let sig = &self.func.dfg.signatures[sig]; - let varargs = self.func.dfg.inst_variable_args(inst); - let results = self.func.dfg.inst_results(inst); - - for (abi, &value) in sig.params.iter().zip(varargs) { - self.check_abi_location( - inst, - value, - abi, - divert.get(value, &self.func.locations), - ir::StackSlotKind::OutgoingArg, - errors, - )?; - } - - for (abi, &value) in sig.returns.iter().zip(results) { - self.check_abi_location( - inst, - value, - abi, - self.func.locations[value], - ir::StackSlotKind::OutgoingArg, - errors, - )?; - } - - Ok(()) - } - - /// Check the ABI argument locations for a return. - fn check_return_abi( - &self, - inst: ir::Inst, - divert: &RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let sig = &self.func.signature; - let varargs = self.func.dfg.inst_variable_args(inst); - - for (abi, &value) in sig.returns.iter().zip(varargs) { - self.check_abi_location( - inst, - value, - abi, - divert.get(value, &self.func.locations), - ir::StackSlotKind::IncomingArg, - errors, - )?; - } - - Ok(()) - } - - /// Check a single ABI location. - fn check_abi_location( - &self, - inst: ir::Inst, - value: ir::Value, - abi: &ir::AbiParam, - loc: ir::ValueLoc, - want_kind: ir::StackSlotKind, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - match abi.location { - ir::ArgumentLoc::Unassigned => {} - ir::ArgumentLoc::Reg(reg) => { - if loc != ir::ValueLoc::Reg(reg) { - return errors.fatal(( - inst, - format!( - "ABI expects {} in {}, got {}", - value, - abi.location.display(&self.reginfo), - loc.display(&self.reginfo), - ), - )); - } - } - ir::ArgumentLoc::Stack(offset) => { - if let ir::ValueLoc::Stack(ss) = loc { - let slot = &self.func.stack_slots[ss]; - if slot.kind != want_kind { - return errors.fatal(( - inst, - format!( - "call argument {} should be in a {} slot, but {} is {}", - value, want_kind, ss, slot.kind - ), - )); - } - if slot.offset.unwrap() != offset { - return errors.fatal(( - inst, - format!( - "ABI expects {} at stack offset {}, but {} is at {}", - value, - offset, - ss, - slot.offset.unwrap() - ), - )); - } - } else { - return errors.fatal(( - inst, - format!( - "ABI expects {} at stack offset {}, got {}", - value, - offset, - loc.display(&self.reginfo) - ), - )); - } - } - } - - Ok(()) - } - - /// Update diversions to reflect the current instruction and check their consistency. - fn update_diversions( - &self, - inst: ir::Inst, - divert: &mut RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let (arg, src) = match self.func.dfg[inst] { - ir::InstructionData::RegMove { arg, src, .. } - | ir::InstructionData::RegSpill { arg, src, .. } => (arg, ir::ValueLoc::Reg(src)), - ir::InstructionData::RegFill { arg, src, .. } => (arg, ir::ValueLoc::Stack(src)), - _ => return Ok(()), - }; - - if let Some(d) = divert.diversion(arg) { - if d.to != src { - return errors.fatal(( - inst, - format!( - "inconsistent with current diversion to {}", - d.to.display(&self.reginfo) - ), - )); - } - } else if self.func.locations[arg] != src { - return errors.fatal(( - inst, - format!( - "inconsistent with global location {} ({})", - self.func.locations[arg].display(&self.reginfo), - self.func.dfg.display_inst(inst, None) - ), - )); - } - - divert.apply(&self.func.dfg[inst]); - - Ok(()) - } - - /// We have active diversions before a branch. Make sure none of the diverted values are live - /// on the outgoing CFG edges. - fn check_cfg_edges( - &self, - inst: ir::Inst, - divert: &mut RegDiversions, - is_after_branch: bool, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - use crate::ir::instructions::BranchInfo::*; - let dfg = &self.func.dfg; - let branch_kind = dfg.analyze_branch(inst); - - // We can only check CFG edges if we have a liveness analysis. - let liveness = match self.liveness { - Some(l) => l, - None => return Ok(()), - }; - - match branch_kind { - NotABranch => panic!( - "No branch information for {}", - dfg.display_inst(inst, self.isa) - ), - SingleDest(block, _) => { - let unique_predecessor = self.cfg.pred_iter(block).count() == 1; - let mut val_to_remove = vec![]; - for (&value, d) in divert.iter() { - let lr = &liveness[value]; - if is_after_branch && unique_predecessor { - // Forward diversions based on the targeted branch. - if !lr.is_livein(block, &self.func.layout) { - val_to_remove.push(value) - } - } else if lr.is_livein(block, &self.func.layout) { - return errors.fatal(( - inst, - format!( - "SingleDest: {} is diverted to {} and live in to {}", - value, - d.to.display(&self.reginfo), - block, - ), - )); - } - } - if is_after_branch && unique_predecessor { - for val in val_to_remove.into_iter() { - divert.remove(val); - } - debug_assert!(divert.check_block_entry(&self.func.entry_diversions, block)); - } - } - Table(jt, block) => { - for (&value, d) in divert.iter() { - let lr = &liveness[value]; - if let Some(block) = block { - if lr.is_livein(block, &self.func.layout) { - return errors.fatal(( - inst, - format!( - "Table.default: {} is diverted to {} and live in to {}", - value, - d.to.display(&self.reginfo), - block, - ), - )); - } - } - for block in self.func.jump_tables[jt].iter() { - if lr.is_livein(*block, &self.func.layout) { - return errors.fatal(( - inst, - format!( - "Table.case: {} is diverted to {} and live in to {}", - value, - d.to.display(&self.reginfo), - block, - ), - )); - } - } - } - } - } - - Ok(()) - } -} diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index c1721a38fd..36e7286348 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -65,9 +65,8 @@ use crate::ir; use crate::ir::entities::AnyEntity; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionFormat, ResolvedConstraint}; use crate::ir::{ - types, ArgumentLoc, ArgumentPurpose, Block, Constant, FuncRef, Function, GlobalValue, Inst, - InstructionData, JumpTable, Opcode, SigRef, StackSlot, StackSlotKind, Type, Value, ValueDef, - ValueList, ValueLoc, + types, ArgumentPurpose, Block, Constant, FuncRef, Function, GlobalValue, Inst, InstructionData, + JumpTable, Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList, }; use crate::isa::TargetIsa; use crate::iterators::IteratorExtras; @@ -78,16 +77,9 @@ use alloc::collections::BTreeSet; use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::cmp::Ordering; -use core::fmt::{self, Display, Formatter, Write}; +use core::fmt::{self, Display, Formatter}; -pub use self::cssa::verify_cssa; -pub use self::liveness::verify_liveness; -pub use self::locations::verify_locations; - -mod cssa; mod flags; -mod liveness; -mod locations; /// A verifier error. #[derive(Debug, PartialEq, Eq, Clone)] @@ -322,7 +314,7 @@ impl<'a> Verifier<'a> { /// Determine a contextual error string for an instruction. #[inline] fn context(&self, inst: Inst) -> String { - self.func.dfg.display_inst(inst, self.isa).to_string() + self.func.dfg.display_inst(inst).to_string() } // Check for: @@ -703,12 +695,6 @@ impl<'a> Verifier<'a> { TableAddr { table, .. } => { self.verify_table(inst, table, errors)?; } - RegSpill { dst, .. } => { - self.verify_stack_slot(inst, dst, errors)?; - } - RegFill { src, .. } => { - self.verify_stack_slot(inst, src, errors)?; - } LoadComplex { ref args, .. } => { self.verify_value_list(inst, args, errors)?; } @@ -778,9 +764,6 @@ impl<'a> Verifier<'a> { | IntSelect { .. } | Load { .. } | Store { .. } - | RegMove { .. } - | CopySpecial { .. } - | CopyToSsa { .. } | Trap { .. } | CondTrap { .. } | IntCondTrap { .. } @@ -1380,7 +1363,6 @@ impl<'a> Verifier<'a> { .iter() .map(|a| a.value_type); self.typecheck_variable_args_iterator(inst, arg_types, errors)?; - self.check_outgoing_args(inst, sig_ref, errors)?; } CallInfo::Indirect(sig_ref, _) => { let arg_types = self.func.dfg.signatures[sig_ref] @@ -1388,7 +1370,6 @@ impl<'a> Verifier<'a> { .iter() .map(|a| a.value_type); self.typecheck_variable_args_iterator(inst, arg_types, errors)?; - self.check_outgoing_args(inst, sig_ref, errors)?; } CallInfo::NotACall => {} } @@ -1430,7 +1411,7 @@ impl<'a> Verifier<'a> { self.context(inst), format!( "mismatched argument count for `{}`: got {}, expected {}", - self.func.dfg.display_inst(inst, None), + self.func.dfg.display_inst(inst), variable_args.len(), i, ), @@ -1439,77 +1420,6 @@ impl<'a> Verifier<'a> { Ok(()) } - /// Check the locations assigned to outgoing call arguments. - /// - /// When a signature has been legalized, all values passed as outgoing arguments on the stack - /// must be assigned to a matching `OutgoingArg` stack slot. - fn check_outgoing_args( - &self, - inst: Inst, - sig_ref: SigRef, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let sig = &self.func.dfg.signatures[sig_ref]; - - let args = self.func.dfg.inst_variable_args(inst); - let expected_args = &sig.params[..]; - - for (&arg, &abi) in args.iter().zip(expected_args) { - // Value types have already been checked by `typecheck_variable_args_iterator()`. - if let ArgumentLoc::Stack(offset) = abi.location { - let arg_loc = self.func.locations[arg]; - if let ValueLoc::Stack(ss) = arg_loc { - // Argument value is assigned to a stack slot as expected. - self.verify_stack_slot(inst, ss, errors)?; - let slot = &self.func.stack_slots[ss]; - if slot.kind != StackSlotKind::OutgoingArg { - return errors.fatal(( - inst, - self.context(inst), - format!( - "Outgoing stack argument {} in wrong stack slot: {} = {}", - arg, ss, slot, - ), - )); - } - if slot.offset != Some(offset) { - return errors.fatal(( - inst, - self.context(inst), - format!( - "Outgoing stack argument {} should have offset {}: {} = {}", - arg, offset, ss, slot, - ), - )); - } - if abi.purpose == ArgumentPurpose::StructArgument(slot.size) { - } else if slot.size != abi.value_type.bytes() { - return errors.fatal(( - inst, - self.context(inst), - format!( - "Outgoing stack argument {} wrong size for {}: {} = {}", - arg, abi.value_type, ss, slot, - ), - )); - } - } else { - let reginfo = self.isa.map(|i| i.register_info()); - return errors.fatal(( - inst, - self.context(inst), - format!( - "Outgoing stack argument {} in wrong location: {}", - arg, - arg_loc.display(reginfo.as_ref()) - ), - )); - } - } - } - Ok(()) - } - fn typecheck_return(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> { if self.func.dfg[inst].opcode().is_return() { let args = self.func.dfg.inst_variable_args(inst); @@ -1671,22 +1581,6 @@ impl<'a> Verifier<'a> { "copy_nop src and dst types must be the same", )); } - let src_loc = self.func.locations[arg]; - let dst_loc = self.func.locations[dst_val]; - let locs_ok = match (src_loc, dst_loc) { - (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => src_slot == dst_slot, - _ => false, - }; - if !locs_ok { - return errors.fatal(( - inst, - self.context(inst), - format!( - "copy_nop must refer to identical stack slots, but found {:?} vs {:?}", - src_loc, dst_loc, - ), - )); - } } Ok(()) } @@ -1763,145 +1657,6 @@ impl<'a> Verifier<'a> { errors.as_result() } - /// If the verifier has been set up with an ISA, make sure that the recorded encoding for the - /// instruction (if any) matches how the ISA would encode it. - fn verify_encoding(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - // When the encodings table is empty, we don't require any instructions to be encoded. - // - // Once some instructions are encoded, we require all side-effecting instructions to have a - // legal encoding. - if self.func.encodings.is_empty() { - return Ok(()); - } - - let isa = match self.isa { - Some(isa) => isa, - None => return Ok(()), - }; - - let encoding = self.func.encodings[inst]; - if encoding.is_legal() { - if self.func.dfg[inst].opcode().is_ghost() { - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "Ghost instruction has an encoding: {}", - isa.encoding_info().display(encoding), - ), - )); - } - - let mut encodings = isa - .legal_encodings( - &self.func, - &self.func.dfg[inst], - self.func.dfg.ctrl_typevar(inst), - ) - .peekable(); - - if encodings.peek().is_none() { - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "Instruction failed to re-encode {}", - isa.encoding_info().display(encoding), - ), - )); - } - - let has_valid_encoding = encodings.any(|possible_enc| encoding == possible_enc); - - if !has_valid_encoding { - let mut possible_encodings = String::new(); - let mut multiple_encodings = false; - - for enc in isa.legal_encodings( - &self.func, - &self.func.dfg[inst], - self.func.dfg.ctrl_typevar(inst), - ) { - if !possible_encodings.is_empty() { - possible_encodings.push_str(", "); - multiple_encodings = true; - } - possible_encodings - .write_fmt(format_args!("{}", isa.encoding_info().display(enc))) - .unwrap(); - } - - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "encoding {} should be {}{}", - isa.encoding_info().display(encoding), - if multiple_encodings { "one of: " } else { "" }, - possible_encodings, - ), - )); - } - return Ok(()); - } - - // Instruction is not encoded, so it is a ghost instruction. - // Instructions with side effects are not allowed to be ghost instructions. - let opcode = self.func.dfg[inst].opcode(); - - // The `fallthrough`, `fallthrough_return`, and `safepoint` instructions are not required - // to have an encoding. - if opcode == Opcode::Fallthrough - || opcode == Opcode::FallthroughReturn - || opcode == Opcode::Safepoint - { - return Ok(()); - } - - // Check if this opcode must be encoded. - let mut needs_enc = None; - if opcode.is_branch() { - needs_enc = Some("Branch"); - } else if opcode.is_call() { - needs_enc = Some("Call"); - } else if opcode.is_return() { - needs_enc = Some("Return"); - } else if opcode.can_store() { - needs_enc = Some("Store"); - } else if opcode.can_trap() { - needs_enc = Some("Trapping instruction"); - } else if opcode.other_side_effects() { - needs_enc = Some("Instruction with side effects"); - } - - if let Some(text) = needs_enc { - // This instruction needs an encoding, so generate an error. - // Provide the ISA default encoding as a hint. - match self.func.encode(inst, isa) { - Ok(enc) => { - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "{} must have an encoding (e.g., {})))", - text, - isa.encoding_info().display(enc), - ), - )); - } - Err(_) => { - return errors.nonfatal(( - inst, - self.context(inst), - format!("{} must have an encoding", text), - )) - } - } - } - - Ok(()) - } - fn immediate_constraints( &self, inst: Inst, @@ -2034,19 +1789,18 @@ impl<'a> Verifier<'a> { self.instruction_integrity(inst, errors)?; self.verify_safepoint_unused(inst, errors)?; self.typecheck(inst, errors)?; - self.verify_encoding(inst, errors)?; self.immediate_constraints(inst, errors)?; } self.encodable_as_bb(block, errors)?; } - verify_flags(self.func, &self.expected_cfg, self.isa, errors)?; + verify_flags(self.func, &self.expected_cfg, errors)?; if !errors.is_empty() { log::warn!( "Found verifier errors in function:\n{}", - pretty_verifier_error(self.func, None, None, errors.clone()) + pretty_verifier_error(self.func, None, errors.clone()) ); } diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index d7528beef4..3c262a5f6d 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -5,14 +5,8 @@ use crate::entity::SecondaryMap; use crate::ir::entities::AnyEntity; -use crate::ir::{ - Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value, - ValueDef, ValueLoc, -}; -use crate::isa::{RegInfo, TargetIsa}; +use crate::ir::{Block, DataFlowGraph, Function, Inst, SigRef, Type, Value, ValueDef}; use crate::packed_option::ReservedValue; -use crate::value_label::{LabelValueLoc, ValueLabelsRanges}; -use crate::HashSet; use alloc::string::String; use alloc::vec::Vec; use core::fmt::{self, Write}; @@ -24,7 +18,6 @@ pub trait FuncWriter { &mut self, w: &mut dyn Write, func: &Function, - isa: Option<&dyn TargetIsa>, block: Block, indent: usize, ) -> fmt::Result; @@ -35,28 +28,17 @@ pub trait FuncWriter { w: &mut dyn Write, func: &Function, aliases: &SecondaryMap>, - isa: Option<&dyn TargetIsa>, inst: Inst, indent: usize, ) -> fmt::Result; /// Write the preamble to `w`. By default, this uses `write_entity_definition`. - fn write_preamble( - &mut self, - w: &mut dyn Write, - func: &Function, - regs: Option<&RegInfo>, - ) -> Result { - self.super_preamble(w, func, regs) + fn write_preamble(&mut self, w: &mut dyn Write, func: &Function) -> Result { + self.super_preamble(w, func) } /// Default impl of `write_preamble` - fn super_preamble( - &mut self, - w: &mut dyn Write, - func: &Function, - regs: Option<&RegInfo>, - ) -> Result { + fn super_preamble(&mut self, w: &mut dyn Write, func: &Function) -> Result { let mut any = false; for (ss, slot) in func.stack_slots.iter() { @@ -87,7 +69,7 @@ pub trait FuncWriter { // signatures. for (sig, sig_data) in &func.dfg.signatures { any = true; - self.write_entity_definition(w, func, sig.into(), &sig_data.display(regs))?; + self.write_entity_definition(w, func, sig.into(), &sig_data)?; } for (fnref, ext_func) in &func.dfg.ext_funcs { @@ -148,33 +130,27 @@ impl FuncWriter for PlainWriter { w: &mut dyn Write, func: &Function, aliases: &SecondaryMap>, - isa: Option<&dyn TargetIsa>, inst: Inst, indent: usize, ) -> fmt::Result { - write_instruction(w, func, aliases, isa, inst, indent) + write_instruction(w, func, aliases, inst, indent) } fn write_block_header( &mut self, w: &mut dyn Write, func: &Function, - isa: Option<&dyn TargetIsa>, block: Block, indent: usize, ) -> fmt::Result { - write_block_header(w, func, isa, block, indent) + write_block_header(w, func, block, indent) } } /// Write `func` to `w` as equivalent text. /// Use `isa` to emit ISA-dependent annotations. -pub fn write_function( - w: &mut dyn Write, - func: &Function, - annotations: &DisplayFunctionAnnotations, -) -> fmt::Result { - decorate_function(&mut PlainWriter, w, func, annotations) +pub fn write_function(w: &mut dyn Write, func: &Function) -> fmt::Result { + decorate_function(&mut PlainWriter, w, func) } /// Create a reverse-alias map from a value to all aliases having that value as a direct target @@ -196,21 +172,17 @@ pub fn decorate_function( func_w: &mut FW, w: &mut dyn Write, func: &Function, - annotations: &DisplayFunctionAnnotations, ) -> fmt::Result { - let regs = annotations.isa.map(TargetIsa::register_info); - let regs = regs.as_ref(); - write!(w, "function ")?; - write_spec(w, func, regs)?; + write_spec(w, func)?; writeln!(w, " {{")?; let aliases = alias_map(func); - let mut any = func_w.write_preamble(w, func, regs)?; + let mut any = func_w.write_preamble(w, func)?; for block in &func.layout { if any { writeln!(w)?; } - decorate_block(func_w, w, func, &aliases, annotations, block)?; + decorate_block(func_w, w, func, &aliases, block)?; any = true; } writeln!(w, "}}") @@ -220,27 +192,16 @@ pub fn decorate_function( // // Function spec. -fn write_spec(w: &mut dyn Write, func: &Function, regs: Option<&RegInfo>) -> fmt::Result { - write!(w, "{}{}", func.name, func.signature.display(regs)) +fn write_spec(w: &mut dyn Write, func: &Function) -> fmt::Result { + write!(w, "{}{}", func.name, func.signature) } //---------------------------------------------------------------------- // // Basic blocks -fn write_arg( - w: &mut dyn Write, - func: &Function, - regs: Option<&RegInfo>, - arg: Value, -) -> fmt::Result { - write!(w, "{}: {}", arg, func.dfg.value_type(arg))?; - let loc = func.locations[arg]; - if loc.is_assigned() { - write!(w, " [{}]", loc.display(regs))? - } - - Ok(()) +fn write_arg(w: &mut dyn Write, func: &Function, arg: Value) -> fmt::Result { + write!(w, "{}: {}", arg, func.dfg.value_type(arg)) } /// Write out the basic block header, outdented: @@ -252,114 +213,45 @@ fn write_arg( pub fn write_block_header( w: &mut dyn Write, func: &Function, - isa: Option<&dyn TargetIsa>, block: Block, indent: usize, ) -> fmt::Result { // The `indent` is the instruction indentation. block headers are 4 spaces out from that. write!(w, "{1:0$}{2}", indent - 4, "", block)?; - let regs = isa.map(TargetIsa::register_info); - let regs = regs.as_ref(); - let mut args = func.dfg.block_params(block).iter().cloned(); match args.next() { None => return writeln!(w, ":"), Some(arg) => { write!(w, "(")?; - write_arg(w, func, regs, arg)?; + write_arg(w, func, arg)?; } } // Remaining arguments. for arg in args { write!(w, ", ")?; - write_arg(w, func, regs, arg)?; + write_arg(w, func, arg)?; } writeln!(w, "):") } -fn write_valueloc(w: &mut dyn Write, loc: LabelValueLoc, regs: &RegInfo) -> fmt::Result { - match loc { - LabelValueLoc::ValueLoc(ValueLoc::Reg(r)) => write!(w, "{}", regs.display_regunit(r)), - LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => write!(w, "{}", ss), - LabelValueLoc::ValueLoc(ValueLoc::Unassigned) => write!(w, "?"), - LabelValueLoc::Reg(r) => write!(w, "{:?}", r), - LabelValueLoc::SPOffset(off) => write!(w, "[sp+{}]", off), - } -} - -fn write_value_range_markers( - w: &mut dyn Write, - val_ranges: &ValueLabelsRanges, - regs: &RegInfo, - offset: u32, - indent: usize, -) -> fmt::Result { - let mut result = String::new(); - let mut shown = HashSet::new(); - for (val, rng) in val_ranges { - for i in (0..rng.len()).rev() { - if rng[i].start == offset { - write!(&mut result, " {}@", val)?; - write_valueloc(&mut result, rng[i].loc, regs)?; - shown.insert(val); - break; - } - } - } - for (val, rng) in val_ranges { - for i in (0..rng.len()).rev() { - if rng[i].end == offset && !shown.contains(val) { - write!(&mut result, " {}\u{2620}", val)?; - break; - } - } - } - if !result.is_empty() { - writeln!(w, ";{1:0$}; {2}", indent + 24, "", result)?; - } - Ok(()) -} - fn decorate_block( func_w: &mut FW, w: &mut dyn Write, func: &Function, aliases: &SecondaryMap>, - annotations: &DisplayFunctionAnnotations, block: Block, ) -> fmt::Result { - // Indent all instructions if any encodings are present. - let indent = if func.encodings.is_empty() && func.srclocs.is_empty() { - 4 - } else { - 36 - }; - let isa = annotations.isa; + // Indent all instructions if any srclocs are present. + let indent = if func.srclocs.is_empty() { 4 } else { 36 }; - func_w.write_block_header(w, func, isa, block, indent)?; + func_w.write_block_header(w, func, block, indent)?; for a in func.dfg.block_params(block).iter().cloned() { write_value_aliases(w, aliases, a, indent)?; } - if let Some(isa) = isa { - if !func.offsets.is_empty() { - let encinfo = isa.encoding_info(); - let regs = &isa.register_info(); - for (offset, inst, size) in func.inst_offsets(block, &encinfo) { - func_w.write_instruction(w, func, aliases, Some(isa), inst, indent)?; - if size > 0 { - if let Some(val_ranges) = annotations.value_ranges { - write_value_range_markers(w, val_ranges, regs, offset + size, indent)?; - } - } - } - return Ok(()); - } - } - for inst in func.layout.block_insts(block) { - func_w.write_instruction(w, func, aliases, isa, inst, indent)?; + func_w.write_instruction(w, func, aliases, inst, indent)?; } Ok(()) @@ -425,7 +317,6 @@ fn write_instruction( w: &mut dyn Write, func: &Function, aliases: &SecondaryMap>, - isa: Option<&dyn TargetIsa>, inst: Inst, indent: usize, ) -> fmt::Result { @@ -438,23 +329,6 @@ fn write_instruction( write!(s, "{} ", srcloc)?; } - // Write out encoding info. - if let Some(enc) = func.encodings.get(inst).cloned() { - if let Some(isa) = isa { - write!(s, "[{}", isa.encoding_info().display(enc))?; - // Write value locations, if we have them. - if !func.locations.is_empty() { - let regs = isa.register_info(); - for &r in func.dfg.inst_results(inst) { - write!(s, ",{}", func.locations[r].display(®s))? - } - } - write!(s, "] ")?; - } else { - write!(s, "[{}] ", enc)?; - } - } - // Write out prefix and indent the instruction. write!(w, "{1:0$}", indent, s)?; @@ -480,7 +354,7 @@ fn write_instruction( None => write!(w, "{}", opcode)?, } - write_operands(w, &func.dfg, isa, inst)?; + write_operands(w, &func.dfg, inst)?; writeln!(w)?; // Value aliases come out on lines after the instruction defining the referent. @@ -491,12 +365,7 @@ fn write_instruction( } /// Write the operands of `inst` to `w` with a prepended space. -pub fn write_operands( - w: &mut dyn Write, - dfg: &DataFlowGraph, - isa: Option<&dyn TargetIsa>, - inst: Inst, -) -> fmt::Result { +pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt::Result { let pool = &dfg.value_lists; use crate::ir::instructions::InstructionData::*; match dfg[inst] { @@ -665,57 +534,6 @@ pub fn write_operands( offset ) } - RegMove { arg, src, dst, .. } => { - if let Some(isa) = isa { - let regs = isa.register_info(); - write!( - w, - " {}, {} -> {}", - arg, - regs.display_regunit(src), - regs.display_regunit(dst) - ) - } else { - write!(w, " {}, %{} -> %{}", arg, src, dst) - } - } - CopySpecial { src, dst, .. } => { - if let Some(isa) = isa { - let regs = isa.register_info(); - write!( - w, - " {} -> {}", - regs.display_regunit(src), - regs.display_regunit(dst) - ) - } else { - write!(w, " %{} -> %{}", src, dst) - } - } - CopyToSsa { src, .. } => { - if let Some(isa) = isa { - let regs = isa.register_info(); - write!(w, " {}", regs.display_regunit(src)) - } else { - write!(w, " %{}", src) - } - } - RegSpill { arg, src, dst, .. } => { - if let Some(isa) = isa { - let regs = isa.register_info(); - write!(w, " {}, {} -> {}", arg, regs.display_regunit(src), dst) - } else { - write!(w, " {}, %{} -> {}", arg, src, dst) - } - } - RegFill { arg, src, dst, .. } => { - if let Some(isa) = isa { - let regs = isa.register_info(); - write!(w, " {}, {} -> {}", arg, src, regs.display_regunit(dst)) - } else { - write!(w, " {}, {} -> %{}", arg, src, dst) - } - } Trap { code, .. } => write!(w, " {}", code), CondTrap { arg, code, .. } => write!(w, " {}, {}", arg, code), IntCondTrap { diff --git a/cranelift/docs/ir.md b/cranelift/docs/ir.md index d4625621f2..31d0e83867 100644 --- a/cranelift/docs/ir.md +++ b/cranelift/docs/ir.md @@ -884,10 +884,6 @@ are assigned to registers or stack slots. This approach permits SSA form to be preserved throughout the register allocation pass and beyond. -Register values can be temporarily diverted to other registers by the -`regmove` instruction, and to and from stack slots by `regspill` -and `regfill`. - ## Instruction groups All of the shared instructions are part of the `base` instruction diff --git a/cranelift/docs/testing.md b/cranelift/docs/testing.md index a005ffd982..4dc6054808 100644 --- a/cranelift/docs/testing.md +++ b/cranelift/docs/testing.md @@ -299,13 +299,6 @@ Test the preopt pass. The preopt pass is run on each function, and then results are run through filecheck. -### `test postopt` - -Test the postopt pass. - -The postopt pass is run on each function, and then results are run -through filecheck. - ### `test compile` Test the whole code generation pipeline. @@ -321,16 +314,16 @@ Cranelift IR right before binary machine code emission. Compile and execute a function. This test command allows several directives: - - to print the result of running a function to stdout, add a `print` + - to print the result of running a function to stdout, add a `print` directive and call the preceding function with arguments (see `%foo` in - the example below); remember to enable `--nocapture` if running these + the example below); remember to enable `--nocapture` if running these tests through Cargo - to check the result of a function, add a `run` directive and call the preceding function with a comparison (`==` or `!=`) (see `%bar` below) - for backwards compatibility, to check the result of a function with a - `() -> b*` signature, only the `run` directive is required, with no - invocation or comparison (see `%baz` below); a `true` value is - interpreted as a successful test execution, whereas a `false` value is + `() -> b*` signature, only the `run` directive is required, with no + invocation or comparison (see `%baz` below); a `true` value is + interpreted as a successful test execution, whereas a `false` value is interpreted as a failed test. Currently a `target` is required but is only used to indicate whether the host @@ -417,19 +410,19 @@ See the diagram below, on how the `vmctx` struct ends up if with multiple heaps: ``` ┌─────────────────────┐ vmctx+0 - │heap0: start address │ + │heap0: start address │ ├─────────────────────┤ vmctx+8 │heap0: end address │ ├─────────────────────┤ vmctx+16 │heap1: start address │ ├─────────────────────┤ vmctx+24 - │heap1: end address │ + │heap1: end address │ ├─────────────────────┤ vmctx+32 │etc... │ └─────────────────────┘ ``` -With this setup, you can now use the global values to load heaps, and load / store to them. +With this setup, you can now use the global values to load heaps, and load / store to them. Example: diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index 09054719e0..6dac449083 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -129,6 +129,68 @@ macro_rules! entity_impl { } } }; + + // Alternate form for tuples we can't directly construct; providing "to" and "from" expressions + // to turn an index *into* an entity, or get an index *from* an entity. + ($entity:ident, $display_prefix:expr, $arg:ident, $to_expr:expr, $from_expr:expr) => { + impl $crate::EntityRef for $entity { + #[inline] + fn new(index: usize) -> Self { + debug_assert!(index < ($crate::__core::u32::MAX as usize)); + let $arg = index as u32; + $to_expr + } + + #[inline] + fn index(self) -> usize { + let $arg = self; + $from_expr as usize + } + } + + impl $crate::packed_option::ReservedValue for $entity { + #[inline] + fn reserved_value() -> $entity { + $entity::from_u32($crate::__core::u32::MAX) + } + + #[inline] + fn is_reserved_value(&self) -> bool { + self.as_u32() == $crate::__core::u32::MAX + } + } + + impl $entity { + /// Create a new instance from a `u32`. + #[allow(dead_code)] + #[inline] + pub fn from_u32(x: u32) -> Self { + debug_assert!(x < $crate::__core::u32::MAX); + let $arg = x; + $to_expr + } + + /// Return the underlying index value as a `u32`. + #[allow(dead_code)] + #[inline] + pub fn as_u32(self) -> u32 { + let $arg = self; + $from_expr + } + } + + impl $crate::__core::fmt::Display for $entity { + fn fmt(&self, f: &mut $crate::__core::fmt::Formatter) -> $crate::__core::fmt::Result { + write!(f, concat!($display_prefix, "{}"), self.as_u32()) + } + } + + impl $crate::__core::fmt::Debug for $entity { + fn fmt(&self, f: &mut $crate::__core::fmt::Formatter) -> $crate::__core::fmt::Result { + (self as &dyn $crate::__core::fmt::Display).fmt(f) + } + } + }; } pub mod packed_option; @@ -150,3 +212,103 @@ pub use self::map::SecondaryMap; pub use self::primary::PrimaryMap; pub use self::set::EntitySet; pub use self::sparse::{SparseMap, SparseMapValue, SparseSet}; + +/// A collection of tests to ensure that use of the different `entity_impl!` forms will generate +/// `EntityRef` implementations that behave the same way. +#[cfg(test)] +mod tests { + /// A macro used to emit some basic tests to show that entities behave as we expect. + macro_rules! entity_test { + ($entity:ident) => { + #[test] + fn from_usize_to_u32() { + let e = $entity::new(42); + assert_eq!(e.as_u32(), 42_u32); + } + + #[test] + fn from_u32_to_usize() { + let e = $entity::from_u32(42); + assert_eq!(e.index(), 42_usize); + } + + #[test] + fn comparisons_work() { + let a = $entity::from_u32(42); + let b = $entity::new(42); + assert_eq!(a, b); + } + + #[should_panic] + #[test] + fn cannot_construct_from_reserved_u32() { + use crate::packed_option::ReservedValue; + let reserved = $entity::reserved_value().as_u32(); + let _ = $entity::from_u32(reserved); // panic + } + + #[should_panic] + #[test] + fn cannot_construct_from_reserved_usize() { + use crate::packed_option::ReservedValue; + let reserved = $entity::reserved_value().index(); + let _ = $entity::new(reserved); // panic + } + }; + } + + /// Test cases for a plain ol' `EntityRef` implementation. + mod basic_entity { + use crate::EntityRef; + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + struct BasicEntity(u32); + entity_impl!(BasicEntity); + entity_test!(BasicEntity); + } + + /// Test cases for an `EntityRef` implementation that includes a display prefix. + mod prefix_entity { + use crate::EntityRef; + #[derive(Clone, Copy, PartialEq, Eq)] + struct PrefixEntity(u32); + entity_impl!(PrefixEntity, "prefix-"); + entity_test!(PrefixEntity); + + #[test] + fn display_prefix_works() { + let e = PrefixEntity::new(0); + assert_eq!(alloc::format!("{}", e), "prefix-0"); + } + } + + /// Test cases for an `EntityRef` implementation for a type we can only construct through + /// other means, such as calls to `core::convert::From`. + mod other_entity { + mod inner { + #[derive(Clone, Copy, PartialEq, Eq)] + pub struct InnerEntity(u32); + + impl From for InnerEntity { + fn from(x: u32) -> Self { + Self(x) + } + } + + impl From for u32 { + fn from(x: InnerEntity) -> Self { + x.0 + } + } + } + + use {self::inner::InnerEntity, crate::EntityRef}; + entity_impl!(InnerEntity, "inner-", i, InnerEntity::from(i), u32::from(i)); + entity_test!(InnerEntity); + + #[test] + fn display_prefix_works() { + let e = InnerEntity::new(0); + assert_eq!(alloc::format!("{}", e), "inner-0"); + } + } +} diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif new file mode 100644 index 0000000000..9157c99977 --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif @@ -0,0 +1,114 @@ +test compile +target aarch64 has_lse + +function %atomic_rmw_add_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 add v0, v1 + return +} +; check: ldaddal x1, x0, [x0] + +function %atomic_rmw_add_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 add v0, v1 + return +} +; check: ldaddal w1, w0, [x0] + +function %atomic_rmw_and_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 and v0, v1 + return +} +; check: ldclral x1, x0, [x0] + +function %atomic_rmw_and_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 and v0, v1 + return +} +; check: ldclral w1, w0, [x0] + +function %atomic_rmw_or_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 or v0, v1 + return +} +; check: ldsetal x1, x0, [x0] + +function %atomic_rmw_or_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 or v0, v1 + return +} +; check: ldsetal w1, w0, [x0] + +function %atomic_rmw_xor_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 xor v0, v1 + return +} +; check: ldeoral x1, x0, [x0] + +function %atomic_rmw_xor_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 xor v0, v1 + return +} +; check: ldeoral w1, w0, [x0] + +function %atomic_rmw_smax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smax v0, v1 + return +} +; check: ldsmaxal x1, x0, [x0] + +function %atomic_rmw_smax_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 smax v0, v1 + return +} +; check: ldsmaxal w1, w0, [x0] + +function %atomic_rmw_umax_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umax v0, v1 + return +} +; check: ldumaxal x1, x0, [x0] + +function %atomic_rmw_umax_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 umax v0, v1 + return +} +; check: ldumaxal w1, w0, [x0] + +function %atomic_rmw_smin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 smin v0, v1 + return +} +; check: ldsminal x1, x0, [x0] + +function %atomic_rmw_smin_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 smin v0, v1 + return +} +; check: ldsminal w1, w0, [x0] + +function %atomic_rmw_umin_i64(i64, i64) { +block0(v0: i64, v1: i64): + v2 = atomic_rmw.i64 umin v0, v1 + return +} +; check: lduminal x1, x0, [x0] + +function %atomic_rmw_umin_i32(i32, i32) { +block0(v0: i32, v1: i32): + v2 = atomic_rmw.i32 umin v0, v1 + return +} +; check: lduminal w1, w0, [x0] diff --git a/cranelift/filetests/filetests/isa/riscv/abi-e.clif b/cranelift/filetests/filetests/isa/riscv/abi-e.clif deleted file mode 100644 index fcd762ee81..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/abi-e.clif +++ /dev/null @@ -1,14 +0,0 @@ -; Test the legalization of function signatures for RV32E. -test legalizer -target riscv32 enable_e - -; regex: V=v\d+ - -function %f() { - ; Spilling into the stack args after %x15 since %16 and up are not - ; available in RV32E. - sig0 = (i64, i64, i64, i64) -> i64 system_v - ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] system_v -block0: - return -} diff --git a/cranelift/filetests/filetests/isa/riscv/abi.clif b/cranelift/filetests/filetests/isa/riscv/abi.clif deleted file mode 100644 index d9469f490e..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/abi.clif +++ /dev/null @@ -1,32 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target riscv32 - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v - - ; The i64 argument must go in an even-odd register pair. - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v - - ; Spilling into the stack args. - sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 system_v - ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] system_v - - ; Splitting vectors. - sig4 = (i32x4) system_v - ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) system_v - - ; Splitting vectors, then splitting ints. - sig5 = (i64x4) system_v - ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) system_v - -block0: - return -} diff --git a/cranelift/filetests/filetests/isa/riscv/binary32.clif b/cranelift/filetests/filetests/isa/riscv/binary32.clif deleted file mode 100644 index 5a69c4289b..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/binary32.clif +++ /dev/null @@ -1,189 +0,0 @@ -; Binary emission of 32-bit code. -test binemit -target riscv32 - -function %RV32I(i32 link [%x1]) -> i32 link [%x1] { - sig0 = () - fn0 = %foo() - -block0(v9999: i32): - [-,%x10] v1 = iconst.i32 1 - [-,%x21] v2 = iconst.i32 2 - - ; Integer Register-Register Operations. - ; add - [-,%x7] v10 = iadd v1, v2 ; bin: 015503b3 - [-,%x16] v11 = iadd v2, v1 ; bin: 00aa8833 - ; sub - [-,%x7] v12 = isub v1, v2 ; bin: 415503b3 - [-,%x16] v13 = isub v2, v1 ; bin: 40aa8833 - ; and - [-,%x7] v20 = band v1, v2 ; bin: 015573b3 - [-,%x16] v21 = band v2, v1 ; bin: 00aaf833 - ; or - [-,%x7] v22 = bor v1, v2 ; bin: 015563b3 - [-,%x16] v23 = bor v2, v1 ; bin: 00aae833 - ; xor - [-,%x7] v24 = bxor v1, v2 ; bin: 015543b3 - [-,%x16] v25 = bxor v2, v1 ; bin: 00aac833 - ; sll - [-,%x7] v30 = ishl v1, v2 ; bin: 015513b3 - [-,%x16] v31 = ishl v2, v1 ; bin: 00aa9833 - ; srl - [-,%x7] v32 = ushr v1, v2 ; bin: 015553b3 - [-,%x16] v33 = ushr v2, v1 ; bin: 00aad833 - ; sra - [-,%x7] v34 = sshr v1, v2 ; bin: 415553b3 - [-,%x16] v35 = sshr v2, v1 ; bin: 40aad833 - ; slt - [-,%x7] v42 = icmp slt v1, v2 ; bin: 015523b3 - [-,%x16] v43 = icmp slt v2, v1 ; bin: 00aaa833 - ; sltu - [-,%x7] v44 = icmp ult v1, v2 ; bin: 015533b3 - [-,%x16] v45 = icmp ult v2, v1 ; bin: 00aab833 - - ; Integer Register-Immediate Instructions - - ; addi - [-,%x7] v100 = iadd_imm v1, 1000 ; bin: 3e850393 - [-,%x16] v101 = iadd_imm v2, -905 ; bin: c77a8813 - ; andi - [-,%x7] v110 = band_imm v1, 1000 ; bin: 3e857393 - [-,%x16] v111 = band_imm v2, -905 ; bin: c77af813 - ; ori - [-,%x7] v112 = bor_imm v1, 1000 ; bin: 3e856393 - [-,%x16] v113 = bor_imm v2, -905 ; bin: c77ae813 - ; xori - [-,%x7] v114 = bxor_imm v1, 1000 ; bin: 3e854393 - [-,%x16] v115 = bxor_imm v2, -905 ; bin: c77ac813 - - ; slli - [-,%x7] v120 = ishl_imm v1, 31 ; bin: 01f51393 - [-,%x16] v121 = ishl_imm v2, 8 ; bin: 008a9813 - ; srli - [-,%x7] v122 = ushr_imm v1, 31 ; bin: 01f55393 - [-,%x16] v123 = ushr_imm v2, 8 ; bin: 008ad813 - ; srai - [-,%x7] v124 = sshr_imm v1, 31 ; bin: 41f55393 - [-,%x16] v125 = sshr_imm v2, 8 ; bin: 408ad813 - - ; slti - [-,%x7] v130 = icmp_imm slt v1, 1000 ; bin: 3e852393 - [-,%x16] v131 = icmp_imm slt v2, -905 ; bin: c77aa813 - ; sltiu - [-,%x7] v132 = icmp_imm ult v1, 1000 ; bin: 3e853393 - [-,%x16] v133 = icmp_imm ult v2, -905 ; bin: c77ab813 - - ; lui - [-,%x7] v140 = iconst.i32 0x12345000 ; bin: 123453b7 - [-,%x16] v141 = iconst.i32 0xffffffff_fedcb000 ; bin: fedcb837 - ; addi - [-,%x7] v142 = iconst.i32 1000 ; bin: 3e800393 - [-,%x16] v143 = iconst.i32 -905 ; bin: c7700813 - - ; Copies alias to iadd_imm. - [-,%x7] v150 = copy v1 ; bin: 00050393 - [-,%x16] v151 = copy v2 ; bin: 000a8813 - - ; Control Transfer Instructions - - ; jal %x1, fn0 - call fn0() ; bin: Call(%foo) 000000ef - - ; jalr %x1, %x10 - call_indirect sig0, v1() ; bin: 000500e7 - call_indirect sig0, v2() ; bin: 000a80e7 - - brz v1, block3 - fallthrough block4 - -block4: - brnz v1, block1 - fallthrough block5 - -block5: - ; jalr %x0, %x1, 0 - return v9999 ; bin: 00008067 - -block1: - ; beq 0x000 - br_icmp eq v1, v2, block1 ; bin: 01550063 - fallthrough block100 - -block100: - ; bne 0xffc - br_icmp ne v1, v2, block1 ; bin: ff551ee3 - fallthrough block101 - -block101: - ; blt 0xff8 - br_icmp slt v1, v2, block1 ; bin: ff554ce3 - fallthrough block102 - -block102: - ; bge 0xff4 - br_icmp sge v1, v2, block1 ; bin: ff555ae3 - fallthrough block103 - -block103: - ; bltu 0xff0 - br_icmp ult v1, v2, block1 ; bin: ff5568e3 - fallthrough block104 - -block104: - ; bgeu 0xfec - br_icmp uge v1, v2, block1 ; bin: ff5576e3 - fallthrough block105 - -block105: - - ; Forward branches. - fallthrough block106 - -block106: - ; beq 0x018 - br_icmp eq v2, v1, block2 ; bin: 00aa8c63 - fallthrough block107 - -block107: - ; bne 0x014 - br_icmp ne v2, v1, block2 ; bin: 00aa9a63 - fallthrough block108 - -block108: - ; blt 0x010 - br_icmp slt v2, v1, block2 ; bin: 00aac863 - fallthrough block109 - -block109: - ; bge 0x00c - br_icmp sge v2, v1, block2 ; bin: 00aad663 - fallthrough block110 - -block110: - ; bltu 0x008 - br_icmp ult v2, v1, block2 ; bin: 00aae463 - fallthrough block111 - -block111: - ; bgeu 0x004 - br_icmp uge v2, v1, block2 ; bin: 00aaf263 - - fallthrough block2 - -block2: - ; jal %x0, 0x00000 - jump block2 ; bin: 0000006f - -block3: - ; beq x, %x0 - brz v1, block3 ; bin: 00050063 - fallthrough block6 - -block6: - ; bne x, %x0 - brnz v1, block3 ; bin: fe051ee3 - - ; jal %x0, 0x1ffff4 - jump block2 ; bin: ff5ff06f -} diff --git a/cranelift/filetests/filetests/isa/riscv/encoding.clif b/cranelift/filetests/filetests/isa/riscv/encoding.clif deleted file mode 100644 index b8c991f52e..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/encoding.clif +++ /dev/null @@ -1,21 +0,0 @@ -test legalizer -target riscv32 supports_m=1 - -function %int32(i32, i32) { -block0(v1: i32, v2: i32): - v10 = iadd v1, v2 - ; check: [R#0c] - ; sameln: v10 = iadd - - v11 = isub v1, v2 - ; check: [R#200c] - ; sameln: v11 = isub - - v12 = imul v1, v2 - ; check: [R#10c] - ; sameln: v12 = imul - - return - ; check: [Iret#19] - ; sameln: return -} diff --git a/cranelift/filetests/filetests/isa/riscv/expand-i32.clif b/cranelift/filetests/filetests/isa/riscv/expand-i32.clif deleted file mode 100644 index ee62bc093f..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/expand-i32.clif +++ /dev/null @@ -1,37 +0,0 @@ -; Test the legalization of i32 instructions that don't have RISC-V versions. -test legalizer - -target riscv32 supports_m=1 - -target riscv64 supports_m=1 - -; regex: V=v\d+ - -function %carry_out(i32, i32) -> i32, b1 { -block0(v1: i32, v2: i32): - v3, v4 = iadd_cout v1, v2 - return v3, v4 -} -; check: v3 = iadd v1, v2 -; check: v4 = icmp ult v3, v1 -; check: return v3, v4 - -; Expanding illegal immediate constants. -; Note that at some point we'll probably expand the iconst as well. -function %large_imm(i32) -> i32 { -block0(v0: i32): - v1 = iadd_imm v0, 1000000000 - return v1 -} -; check: $(cst=$V) = iconst.i32 0x3b9a_ca00 -; check: v1 = iadd v0, $cst -; check: return v1 - -function %bitclear(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = band_not v0, v1 - ; check: iconst.i32 -1 - ; check: bxor - ; check: band - return v2 -} diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif b/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif deleted file mode 100644 index 5ea4cd002e..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/legalize-abi.clif +++ /dev/null @@ -1,134 +0,0 @@ -; Test legalizer's handling of ABI boundaries. -test legalizer -target riscv32 - -; regex: V=v\d+ -; regex: SS=ss\d+ -; regex: WS=\s+ - -function %int_split_args(i64) -> i64 { -block0(v0: i64): - ; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): - ; check: v0 = iconcat $v0l, $v0h - v1 = iadd_imm v0, 1 - ; check: $(v1l=$V), $(v1h=$V) = isplit v1 - ; check: return $v1l, $v1h, $link - return v1 -} - -function %split_call_arg(i32) { - fn1 = %foo(i64) - fn2 = %foo(i32, i64) -block0(v0: i32): - v1 = uextend.i64 v0 - call fn1(v1) - ; check: $(v1h=$V) = iconst.i32 0 - ; check: call fn1(v0, $v1h) - call fn2(v0, v1) - ; check: call fn2(v0, $V, $V) - return -} - -function %split_ret_val() { - fn1 = %foo() -> i64 -block0: - v1 = call fn1() - ; check: block0($(link=$V): i32): - ; nextln: $(v1l=$V), $(v1h=$V) = call fn1() - ; check: v1 = iconcat $v1l, $v1h - jump block1(v1) - ; check: jump block1(v1) - -block1(v10: i64): - jump block1(v10) -} - -; First return value is fine, second one is expanded. -function %split_ret_val2() { - fn1 = %foo() -> i32, i64 -block0: - v1, v2 = call fn1() - ; check: block0($(link=$V): i32): - ; nextln: v1, $(v2l=$V), $(v2h=$V) = call fn1() - ; check: v2 = iconcat $v2l, $v2h - jump block1(v1, v2) - ; check: jump block1(v1, v2) - -block1(v9: i32, v10: i64): - jump block1(v9, v10) -} - -function %int_ext(i8, i8 sext, i8 uext) -> i8 uext { -block0(v1: i8, v2: i8, v3: i8): - ; check: block0(v1: i8, $(v2x=$V): i32, $(v3x=$V): i32, $(link=$V): i32): - ; check: v2 = ireduce.i8 $v2x - ; check: v3 = ireduce.i8 $v3x - ; check: $(v1x=$V) = uextend.i32 v1 - ; check: return $v1x, $link - return v1 -} - -; Function produces single return value, still need to copy. -function %ext_ret_val() { - fn1 = %foo() -> i8 sext -block0: - v1 = call fn1() - ; check: block0($V: i32): - ; nextln: $(rv=$V) = call fn1() - ; check: v1 = ireduce.i8 $rv - jump block1(v1) - ; check: jump block1(v1) - -block1(v10: i8): - jump block1(v10) -} - -function %vector_split_args(i64x4) -> i64x4 { -block0(v0: i64x4): - ; check: block0($(v0al=$V): i32, $(v0ah=$V): i32, $(v0bl=$V): i32, $(v0bh=$V): i32, $(v0cl=$V): i32, $(v0ch=$V): i32, $(v0dl=$V): i32, $(v0dh=$V): i32, $(link=$V): i32): - ; check: $(v0a=$V) = iconcat $v0al, $v0ah - ; check: $(v0b=$V) = iconcat $v0bl, $v0bh - ; check: $(v0ab=$V) = vconcat $v0a, $v0b - ; check: $(v0c=$V) = iconcat $v0cl, $v0ch - ; check: $(v0d=$V) = iconcat $v0dl, $v0dh - ; check: $(v0cd=$V) = vconcat $v0c, $v0d - ; check: v0 = vconcat $v0ab, $v0cd - v1 = bxor v0, v0 - ; check: $(v1ab=$V), $(v1cd=$V) = vsplit v1 - ; check: $(v1a=$V), $(v1b=$V) = vsplit $v1ab - ; check: $(v1al=$V), $(v1ah=$V) = isplit $v1a - ; check: $(v1bl=$V), $(v1bh=$V) = isplit $v1b - ; check: $(v1c=$V), $(v1d=$V) = vsplit $v1cd - ; check: $(v1cl=$V), $(v1ch=$V) = isplit $v1c - ; check: $(v1dl=$V), $(v1dh=$V) = isplit $v1d - ; check: return $v1al, $v1ah, $v1bl, $v1bh, $v1cl, $v1ch, $v1dl, $v1dh, $link - return v1 -} - -function %indirect(i32) { - sig1 = () system_v -block0(v0: i32): - call_indirect sig1, v0() - return -} - -; The first argument to call_indirect doesn't get altered. -function %indirect_arg(i32, f32x2) { - sig1 = (f32x2) system_v -block0(v0: i32, v1: f32x2): - call_indirect sig1, v0(v1) - ; check: call_indirect sig1, v0($V, $V) - return -} - -; Call a function that takes arguments on the stack. -function %stack_args(i32) { - ; check: $(ss0=$SS) = outgoing_arg 4 - fn1 = %foo(i64, i64, i64, i64, i32) -block0(v0: i32): - v1 = iconst.i64 1 - call fn1(v1, v1, v1, v1, v0) - ; check: [GPsp#48,$ss0]$WS $(v0s=$V) = spill v0 - ; check: call fn1($(=.*), $v0s) - return -} diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif b/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif deleted file mode 100644 index 11b31218be..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/legalize-i64.clif +++ /dev/null @@ -1,64 +0,0 @@ -; Test the legalization of i64 arithmetic instructions. -test legalizer -target riscv32 supports_m=1 - -; regex: V=v\d+ - -function %bitwise_and(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v3 = band v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#ec -; sameln: $(v3l=$V) = band $v1l, $v2l -; check: [R#ec -; sameln: $(v3h=$V) = band $v1h, $v2h -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link - -function %bitwise_or(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v3 = bor v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#cc -; sameln: $(v3l=$V) = bor $v1l, $v2l -; check: [R#cc -; sameln: $(v3h=$V) = bor $v1h, $v2h -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link - -function %bitwise_xor(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v3 = bxor v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#8c -; sameln: $(v3l=$V) = bxor $v1l, $v2l -; check: [R#8c -; sameln: $(v3h=$V) = bxor $v1h, $v2h -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link - -function %arith_add(i64, i64) -> i64 { -; Legalizing iadd.i64 requires two steps: -; 1. Narrow to iadd_cout.i32, then -; 2. Expand iadd_cout.i32 since RISC-V has no carry flag. -block0(v1: i64, v2: i64): - v3 = iadd v1, v2 - return v3 -} -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): -; check: [R#0c -; sameln: $(v3l=$V) = iadd $v1l, $v2l -; check: $(c=$V) = icmp ult $v3l, $v1l -; check: [R#0c -; sameln: $(v3h1=$V) = iadd $v1h, $v2h -; check: $(c_int=$V) = bint.i32 $c -; check: [R#0c -; sameln: $(v3h=$V) = iadd $v3h1, $c_int -; check: v3 = iconcat $v3l, $v3h -; check: return $v3l, $v3h, $link diff --git a/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif b/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif deleted file mode 100644 index d7250cb3af..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/legalize-icmp_imm-i64.clif +++ /dev/null @@ -1,55 +0,0 @@ -test legalizer -target riscv32 - -; regex: V=v\d+ - -function %icmp_imm_eq(i64) -> b1 { -block0(v0: i64): - v1 = icmp_imm eq v0, 0x20202020_10101010 - return v1 -} -; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): -; nextln: $(v2l=$V) -> $(v0l) -; nextln: $(v2h=$V) -> $(v0h) -; nextln: v0 = iconcat $(v0l), $(v0h) -; nextln: $(imm_low=$V) = iconst.i32 0x1010_1010 -; nextln: $(imm_high=$V) = iconst.i32 0x2020_2020 -; nextln: $(v3=$V) = icmp eq $(v2l), $(imm_low) -; nextln: $(v4=$V) = icmp eq $(v2h), $(imm_high) -; nextln: v1 = band $(v3), $(v4) -; nextln: return v1, $(link) - -function %icmp_imm_ne(i64) -> b1 { -block0(v0: i64): - v1 = icmp_imm ne v0, 0x33333333_44444444 - return v1 -} -; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): -; nextln: $(v2l=$V) -> $(v0l) -; nextln: $(v2h=$V) -> $(v0h) -; nextln: v0 = iconcat $(v0l), $(v0h) -; nextln: $(imm_low=$V) = iconst.i32 0x4444_4444 -; nextln: $(imm_high=$V) = iconst.i32 0x3333_3333 -; nextln: $(v3=$V) = icmp ne $(v2l), $(imm_low) -; nextln: $(v4=$V) = icmp ne $(v2h), $(imm_high) -; nextln: v1 = bor $(v3), $(v4) -; nextln: return v1, $(link) - -function %icmp_imm_sge(i64) -> b1 { -block0(v0: i64): - v1 = icmp_imm sge v0, 0x01020304_05060708 - return v1 -} -; check: block0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32): -; nextln: $(v2l=$V) -> $(v0l) -; nextln: $(v2h=$V) -> $(v0h) -; nextln: v0 = iconcat $(v0l), $(v0h) -; nextln: $(imm_low=$V) = iconst.i32 0x0506_0708 -; nextln: $(imm_high=$V) = iconst.i32 0x0102_0304 -; nextln: $(v3=$V) = icmp sgt $(v2h), $(imm_high) -; nextln: $(v4=$V) = icmp slt $(v2h), $(imm_high) -; nextln: $(v5=$V) = icmp uge $(v2l), $(imm_low) -; nextln: $(v6=$V) = bnot $v4 -; nextln: $(v7=$V) = band $v6, $v5 -; nextln: v1 = bor $(v3), $(v7) -; nextln: return v1, $(link) diff --git a/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif b/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif deleted file mode 100644 index 21cd828b8a..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/parse-encoding.clif +++ /dev/null @@ -1,36 +0,0 @@ -; Test the parser's support for encoding annotations. -test legalizer -target riscv32 - -function %parse_encoding(i32 [%x5]) -> i32 [%x10] { - ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] fast { - - sig0 = (i32 [%x10]) -> i32 [%x10] system_v - ; check: sig0 = (i32 [%x10]) -> i32 [%x10] system_v - - sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v - ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] system_v - - sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v - ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] system_v - - ; Arguments on stack where not necessary - sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v - ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] system_v - - ; Stack argument before register argument - sig4 = (f32 [72], i32 [%x10]) system_v - ; check: sig4 = (f32 [72], i32 [%x10]) system_v - - ; Return value on stack - sig5 = () -> f32 [0] system_v - ; check: sig5 = () -> f32 [0] system_v - - ; function + signature - fn0 = %bar(i32 [%x10]) -> b1 [%x10] system_v - ; check: sig6 = (i32 [%x10]) -> b1 [%x10] system_v - ; nextln: fn0 = %bar sig6 - -block0(v0: i32): - return v0 -} diff --git a/cranelift/filetests/filetests/isa/riscv/regmove.clif b/cranelift/filetests/filetests/isa/riscv/regmove.clif deleted file mode 100644 index f1509e8178..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/regmove.clif +++ /dev/null @@ -1,15 +0,0 @@ -; Test tracking of register moves. -test binemit -target riscv32 - -function %regmoves(i32 link [%x1]) -> i32 link [%x1] { -block0(v9999: i32): - [-,%x10] v1 = iconst.i32 1 - [-,%x7] v2 = iadd_imm v1, 1000 ; bin: 3e850393 - regmove v1, %x10 -> %x11 ; bin: 00050593 - [-,%x7] v3 = iadd_imm v1, 1000 ; bin: 3e858393 - regmove v1, %x11 -> %x10 ; bin: 00058513 - [-,%x7] v4 = iadd_imm v1, 1000 ; bin: 3e850393 - - return v9999 -} diff --git a/cranelift/filetests/filetests/isa/riscv/split-args.clif b/cranelift/filetests/filetests/isa/riscv/split-args.clif deleted file mode 100644 index 9f4b3e9268..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/split-args.clif +++ /dev/null @@ -1,55 +0,0 @@ -; Test the legalization of block arguments that are split. -test legalizer -target riscv32 - -; regex: V=v\d+ - -function %simple(i64, i64) -> i64 { -block0(v1: i64, v2: i64): -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): - jump block1(v1) - ; check: jump block1($v1l, $v1h) - -block1(v3: i64): -; check: block1($(v3l=$V): i32, $(v3h=$V): i32): - v4 = band v3, v2 - ; check: $(v4l=$V) = band $v3l, $v2l - ; check: $(v4h=$V) = band $v3h, $v2h - return v4 - ; check: return $v4l, $v4h, $link -} - -function %multi(i64) -> i64 { -block1(v1: i64): -; check: block1($(v1l=$V): i32, $(v1h=$V): i32, $(link=$V): i32): - jump block2(v1, v1) - ; check: jump block2($v1l, $v1l, $v1h, $v1h) - -block2(v2: i64, v3: i64): -; check: block2($(v2l=$V): i32, $(v3l=$V): i32, $(v2h=$V): i32, $(v3h=$V): i32): - jump block3(v2) - ; check: jump block3($v2l, $v2h) - -block3(v4: i64): -; check: block3($(v4l=$V): i32, $(v4h=$V): i32): - v5 = band v4, v3 - ; check: $(v5l=$V) = band $v4l, $v3l - ; check: $(v5h=$V) = band $v4h, $v3h - return v5 - ; check: return $v5l, $v5h, $link -} - -function %loop(i64, i64) -> i64 { -block0(v1: i64, v2: i64): -; check: block0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32): - jump block1(v1) - ; check: jump block1($v1l, $v1h) - -block1(v3: i64): -; check: block1($(v3l=$V): i32, $(v3h=$V): i32): - v4 = band v3, v2 - ; check: $(v4l=$V) = band $v3l, $v2l - ; check: $(v4h=$V) = band $v3h, $v2h - jump block1(v4) - ; check: jump block1($v4l, $v4h) -} diff --git a/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif b/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif deleted file mode 100644 index 1d29b86da9..0000000000 --- a/cranelift/filetests/filetests/isa/riscv/verify-encoding.clif +++ /dev/null @@ -1,21 +0,0 @@ -test verifier -target riscv32 - -function %RV32I(i32 link [%x1]) -> i32 link [%x1] { - fn0 = %foo() - -block0(v9999: i32): - ; iconst.i32 needs legalizing, so it should throw a - [R#0,-] v1 = iconst.i32 0xf0f0f0f0f0 ; error: Instruction failed to re-encode - [Iret#19] return v9999 -} - -function %RV32I(i32 link [%x1]) -> i32 link [%x1] { - fn0 = %foo() - -block0(v9999: i32): - v1 = iconst.i32 1 - v2 = iconst.i32 2 - [R#0,-] v3 = iadd v1, v2 ; error: encoding R#00 should be R#0c - [Iret#19] return v9999 -} diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif index 479268ec0c..98883f6ae7 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -202,6 +202,98 @@ block0(v0: i8, v1: i64): ; nextln: ar %r2, %r3 ; nextln: br %r14 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IADD_IFCOUT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %iadd_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2, v3 = iadd_ifcout.i64 v0, v1 + return v2 +} + +; check: algr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_ext32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3, v4 = iadd_ifcout.i64 v0, v2 + return v3 +} + +; check: algfr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32768 + v2, v3 = iadd_ifcout.i64 v0, v1 + return v2 +} + +; check: algfi %r2, 32768 +; nextln: br %r14 + +function %iadd_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3, v4 = iadd_ifcout.i64 v0, v2 + return v3 +} + +; check: alg %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = uload32.i64 v1 + v3, v4 = iadd_ifcout.i64 v0, v2 + return v3 +} + +; check: algf %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2, v3 = iadd_ifcout.i32 v0, v1 + return v2 +} + +; check: alr %r2, %r3 +; nextln: br %r14 + +function %iadd_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32768 + v2, v3 = iadd_ifcout.i32 v0, v1 + return v2 +} + +; check: alfi %r2, 32768 +; nextln: br %r14 + +function %iadd_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3, v4 = iadd_ifcout.i32 v0, v2 + return v3 +} + +; check: al %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3, v4 = iadd_ifcout.i32 v0, v2 + return v3 +} + +; check: aly %r2, 4096(%r3) +; nextln: br %r14 + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ISUB ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index f5789b67ee..902df05a08 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %amode_add(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index cbd265a9ea..64049860dd 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(b1, i32, i32) -> i32 { ; check: pushq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index 8b43d70c7c..88b605d3d7 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif index 6b53f3c3bd..8678e7f66a 100644 --- a/cranelift/filetests/filetests/isa/x64/bextend.clif +++ b/cranelift/filetests/filetests/isa/x64/bextend.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(b8) -> b64 { block0(v0: b8): diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 99aec088ac..edcf36ec1a 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index 8f619e2aa7..e2f0d8e81a 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; system_v has first param in %rdi, fascall in %rcx function %one_arg(i32) system_v { diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif index f36caed88a..627e4700bb 100644 --- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_lzcnt +target x86_64 has_lzcnt function %clz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index 16c788ed84..853fd0d5f9 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif index 5931451e11..42f82c653f 100644 --- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif +++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_bmi1 +target x86_64 has_bmi1 function %ctz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index 3984aba42f..5a366914a2 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -1,6 +1,6 @@ test compile set avoid_div_traps=false -target x86_64 machinst +target x86_64 ;; We should get the checked-div/rem sequence (`srem` pseudoinst below) even ;; when `avoid_div_traps` above is false (i.e. even when the host is normally diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index fc8d3a801d..99397044d9 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -1,7 +1,7 @@ test compile set enable_llvm_abi_extensions=true set unwind_info=true -target x86_64 machinst +target x86_64 function %f0(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): @@ -206,7 +206,7 @@ block0(v0: i64): v18 = load.f64 v0+136 v19 = load.f64 v0+144 v20 = load.f64 v0+152 - + v21 = fadd.f64 v1, v2 v22 = fadd.f64 v3, v4 v23 = fadd.f64 v5, v6 diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif index c1e30a3b19..2f2552aec1 100644 --- a/cranelift/filetests/filetests/isa/x64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(f64) -> f64 { block0(v0: f64): diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 2c77bc7ec2..b8c27f422e 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(i32, i64 vmctx) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 61783e366d..75013a8170 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1,6 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 function %f0(i128, i128) -> i128 { ; check: pushq %rbp @@ -190,7 +190,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %rax, %r8 ; nextln: andq $$1, %r8 ; nextln: setnz %r8b - + v4 = icmp slt v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setl %r9b @@ -201,7 +201,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %r9, %r10 ; nextln: andq $$1, %r10 ; nextln: setnz %r9b - + v5 = icmp sle v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setl %r10b @@ -212,7 +212,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %r10, %r11 ; nextln: andq $$1, %r11 ; nextln: setnz %r10b - + v6 = icmp sgt v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setnle %r11b @@ -307,7 +307,7 @@ block0(v0: i128): ; nextln: setz %sil ; nextln: andb %dil, %sil ; nextln: jnz label1; j label2 - + jump block2 block1: @@ -725,7 +725,7 @@ block2(v6: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret - + } function %f24(i128, i128, i64, i128, i128, i128) -> i128 { @@ -1106,4 +1106,4 @@ block0(v0: i128, v1: i128): ; nextln: movq %rcx, %rdx ; nextln: movq %rbp, %rsp ; nextln: popq %rbp -; nextln: ret \ No newline at end of file +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index a4069b20ca..85c26dec3e 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %add_from_mem_u32_1(i64, i32) -> i32 { block0(v0: i64, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif index 5b23afb8d3..533eb5341b 100644 --- a/cranelift/filetests/filetests/isa/x64/move-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %move_registers(i32x4) -> b8x16 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif index 2049f53962..8326e681b0 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_popcnt has_sse42 +target x86_64 has_popcnt has_sse42 function %popcnt(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index df68f6b4b7..e3f8d0c49d 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %popcnt64(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif index 8e8b424c70..40944a797c 100644 --- a/cranelift/filetests/filetests/isa/x64/probestack.clif +++ b/cranelift/filetests/filetests/isa/x64/probestack.clif @@ -1,6 +1,6 @@ test compile set enable_probestack=true -target x86_64 machinst +target x86_64 function %f1() -> i64 { ss0 = explicit_slot 100000 diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index af6996f85f..41c8a67fb1 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -1,6 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 function %f0(i32, i128, i128) -> i128 { ; check: pushq %rbp @@ -24,6 +24,6 @@ block0(v0: i32, v1: i128, v2: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret - + } diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 52761b1ed0..b7251f9fe1 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bitselect_i16x8() -> i16x8 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif index b50ff6328d..7e3dee77f0 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index 2f6a8c7dfd..38894f6086 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst has_ssse3 has_sse41 +target x86_64 has_ssse3 has_sse41 ;; shuffle diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 72249faaef..29f4b2cdb0 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bnot_b32x4(b32x4) -> b32x4 { block0(v0: b32x4): diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif index 31edd7bdca..c20f816fc2 100644 --- a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif +++ b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; The goal of this test is to ensure that stack spills of an integer value, ;; which width is less than the machine word's size, cause the full word to be diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index e9001c5393..23fbb731b8 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function u0:0(i64 sarg(64)) -> i8 system_v { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index ee59ff4963..90a6d6fbe4 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i64 sret) { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif index 37a4698619..71bf7dada9 100644 --- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif @@ -1,6 +1,6 @@ test compile set tls_model=elf_gd -target x86_64 machinst +target x86_64 function u0:0(i32) -> i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index ef43c3dd03..32c856c419 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %elide_uextend_add(i32, i32) -> i64 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif index 5ddd4b20d3..6548930328 100644 --- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif +++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; From: https://github.com/bytecodealliance/wasmtime/issues/2670 diff --git a/cranelift/filetests/filetests/isa/x86/abcd.clif b/cranelift/filetests/filetests/isa/x86/abcd.clif deleted file mode 100644 index bfea325055..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abcd.clif +++ /dev/null @@ -1,13 +0,0 @@ -test regalloc -target i686 legacy - -; %rdi can't be used in a movsbl instruction, so test that the register -; allocator can move it to a register that can be. - -function %test(i32 [%rdi]) -> i32 system_v { -block0(v0: i32 [%rdi]): - v1 = ireduce.i8 v0 - v2 = sextend.i32 v1 - return v2 -} -; check: regmove v1, %rdi -> %rax diff --git a/cranelift/filetests/filetests/isa/x86/abi-bool.clif b/cranelift/filetests/filetests/isa/x86/abi-bool.clif deleted file mode 100644 index 5286de3c18..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi-bool.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy haswell - -function %foo(i64, i64, i64, i32) -> b1 system_v { -block3(v0: i64, v1: i64, v2: i64, v3: i32): - v5 = icmp ne v2, v2 - v8 = iconst.i64 0 - jump block2(v8, v3, v5) - -block2(v10: i64, v30: i32, v37: b1): - v18 = load.i32 notrap aligned v2 - v27 = iadd.i64 v10, v10 - v31 = icmp eq v30, v30 - brz v31, block2(v27, v30, v37) - jump block0(v37) - -block0(v35: b1): - return v35 -} diff --git a/cranelift/filetests/filetests/isa/x86/abi32.clif b/cranelift/filetests/filetests/isa/x86/abi32.clif deleted file mode 100644 index 8ca530a695..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi32.clif +++ /dev/null @@ -1,20 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target i686 legacy - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v - - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v - -block0: - return -} - diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif deleted file mode 100644 index 0da2aad424..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi64.clif +++ /dev/null @@ -1,37 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target x86_64 legacy - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v - - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v - - sig3 = () -> i128 system_v - ; check: sig3 = () -> i64 [%rax], i64 [%rdx] system_v - - sig4 = (i128) -> i128 system_v - ; check: sig4 = (i64 [%rdi], i64 [%rsi]) -> i64 [%rax], i64 [%rdx] system_v - -block0: - return -} - -function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v { - sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v - fn0 = u0:0 sig0 - -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64): - call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif deleted file mode 100644 index ca0ace1342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif +++ /dev/null @@ -1,25 +0,0 @@ -; binary emission of 32-bit code. -test binemit -set opt_level=speed_and_size -set emit_all_ones_funcaddrs -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs32.clif | llvm-mc -show-encoding -triple=i386 -; - -; Tests from binary32.clif affected by emit_all_ones_funcaddrs. -function %I32() { - sig0 = () - fn0 = %foo() - -block0: - - ; asm: movl $-1, %ecx - [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) ffffffff - ; asm: movl $-1, %esi - [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) ffffffff - - return ; bin: c3 -} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif deleted file mode 100644 index 7fbb670df2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif +++ /dev/null @@ -1,27 +0,0 @@ -; binary emission of 64-bit code. -test binemit -set opt_level=speed_and_size -set emit_all_ones_funcaddrs -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests from binary64.clif affected by emit_all_ones_funcaddrs. -function %I64() { - sig0 = () - fn0 = %foo() - -block0: - - ; asm: movabsq $-1, %rcx - [-,%rcx] v400 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) ffffffffffffffff - ; asm: movabsq $-1, %rsi - [-,%rsi] v401 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) ffffffffffffffff - ; asm: movabsq $-1, %r10 - [-,%r10] v402 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) ffffffffffffffff - - return ; bin: c3 -} diff --git a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif deleted file mode 100644 index d2713829cd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -set enable_probestack=false -target i686 legacy - -function u0:0(i32 vmctx) baldrdash_system_v { - sig0 = (i32 vmctx, i32 sigid) baldrdash_system_v - -block0(v0: i32): - v2 = iconst.i32 0 - v8 = iconst.i32 0 - v9 = iconst.i32 0 - call_indirect sig0, v8(v9, v2) - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif deleted file mode 100644 index 9099315878..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif +++ /dev/null @@ -1,92 +0,0 @@ -test compile -target x86_64 legacy baseline - - -; clz/ctz on 64 bit operands - -function %i64_clz(i64) -> i64 { -block0(v10: i64): - v11 = clz v10 - ; check: x86_bsr - ; check: selectif.i64 - return v11 -} - -function %i64_ctz(i64) -> i64 { -block1(v20: i64): - v21 = ctz v20 - ; check: x86_bsf - ; check: selectif.i64 - return v21 -} - - -; clz/ctz on 32 bit operands - -function %i32_clz(i32) -> i32 { -block0(v10: i32): - v11 = clz v10 - ; check: x86_bsr - ; check: selectif.i32 - return v11 -} - -function %i32_ctz(i32) -> i32 { -block1(v20: i32): - v21 = ctz v20 - ; check: x86_bsf - ; check: selectif.i32 - return v21 -} - - -; popcount on 64 bit operands - -function %i64_popcount(i64) -> i64 { -block0(v30: i64): - v31 = popcnt v30; - ; check: ushr_imm - ; check: iconst.i64 - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: iadd - ; check: iconst.i64 - ; check: band - ; check: iconst.i64 - ; check: imul - ; check: ushr_imm - return v31; -} - - -; popcount on 32 bit operands - -function %i32_popcount(i32) -> i32 { -block0(v40: i32): - v41 = popcnt v40; - ; check: ushr_imm - ; check: iconst.i32 - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: iadd - ; check: iconst.i32 - ; check: band - ; check: iconst.i32 - ; check: imul - ; check: ushr_imm - return v41; -} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif deleted file mode 100644 index b2f36ff148..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif +++ /dev/null @@ -1,87 +0,0 @@ -test binemit -set opt_level=speed_and_size -target x86_64 legacy baseline - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %Foo() { -block0: - ; 64-bit wide bsf - - [-,%r11] v10 = iconst.i64 0x1234 - ; asm: bsfq %r11, %rcx - [-,%rcx,%rflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb - - [-,%rdx] v14 = iconst.i64 0x5678 - ; asm: bsfq %rdx, %r12 - [-,%r12,%rflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2 - - ; asm: bsfq %rdx, %rdi - [-,%rdi,%rflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa - - - ; 32-bit wide bsf - - [-,%r11] v20 = iconst.i32 0x1234 - ; asm: bsfl %r11d, %ecx - [-,%rcx,%rflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb - - [-,%rdx] v24 = iconst.i32 0x5678 - ; asm: bsfl %edx, %r12d - [-,%r12,%rflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2 - - ; asm: bsfl %edx, %esi - [-,%rsi,%rflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2 - - - ; 64-bit wide bsr - - [-,%r11] v30 = iconst.i64 0x1234 - ; asm: bsrq %r11, %rcx - [-,%rcx,%rflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb - - [-,%rdx] v34 = iconst.i64 0x5678 - ; asm: bsrq %rdx, %r12 - [-,%r12,%rflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2 - - ; asm: bsrq %rdx, %rdi - [-,%rdi,%rflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa - - - ; 32-bit wide bsr - - [-,%r11] v40 = iconst.i32 0x1234 - ; asm: bsrl %r11d, %ecx - [-,%rcx,%rflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb - - [-,%rdx] v44 = iconst.i32 0x5678 - ; asm: bsrl %edx, %r12d - [-,%r12,%rflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2 - - ; asm: bsrl %edx, %esi - [-,%rsi,%rflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2 - - - ; 64-bit wide cmov - - ; asm: cmoveq %r11, %rdx - [-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3 - - ; asm: cmoveq %rdi, %rdx - [-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7 - - - ; 32-bit wide cmov - - ; asm: cmovnel %r11d, %edx - [-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3 - - ; asm: cmovlel %esi, %edx - [-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6 - - - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif deleted file mode 100644 index cfac85f7b2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary32-float.clif +++ /dev/null @@ -1,557 +0,0 @@ -; Binary emission of 32-bit floating point code. -test binemit -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32-float.clif | llvm-mc -show-encoding -triple=i386 -; - -function %F32() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%rcx] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - - ; asm: cvtsi2ss %ecx, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 0f 2a e9 - ; asm: cvtsi2ss %esi, %xmm2 - [-,%xmm2] v11 = fcvt_from_sint.f32 v1 ; bin: f3 0f 2a d6 - - ; asm: cvtss2sd %xmm2, %xmm5 - [-,%xmm5] v12 = fpromote.f64 v11 ; bin: f3 0f 5a ea - ; asm: cvtss2sd %xmm5, %xmm2 - [-,%xmm2] v13 = fpromote.f64 v10 ; bin: f3 0f 5a d5 - - ; asm: movd %ecx, %xmm5 - [-,%xmm5] v14 = bitcast.f32 v0 ; bin: 66 0f 6e e9 - ; asm: movd %esi, %xmm2 - [-,%xmm2] v15 = bitcast.f32 v1 ; bin: 66 0f 6e d6 - - ; asm: movd %xmm5, %ecx - [-,%rcx] v16 = bitcast.i32 v10 ; bin: 66 0f 7e e9 - ; asm: movd %xmm2, %esi - [-,%rsi] v17 = bitcast.i32 v11 ; bin: 66 0f 7e d6 - - ; asm: movaps %xmm2, %xmm5 - [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 - - ; asm: movaps %xmm2, %xmm5 - regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 - - ; Binary arithmetic. - - ; asm: addss %xmm2, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 0f 58 ea - ; asm: addss %xmm5, %xmm2 - [-,%xmm2] v21 = fadd v11, v10 ; bin: f3 0f 58 d5 - - ; asm: subss %xmm2, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 0f 5c ea - ; asm: subss %xmm5, %xmm2 - [-,%xmm2] v23 = fsub v11, v10 ; bin: f3 0f 5c d5 - - ; asm: mulss %xmm2, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 0f 59 ea - ; asm: mulss %xmm5, %xmm2 - [-,%xmm2] v25 = fmul v11, v10 ; bin: f3 0f 59 d5 - - ; asm: divss %xmm2, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 0f 5e ea - ; asm: divss %xmm5, %xmm2 - [-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm2, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea - ; asm: andps %xmm5, %xmm2 - [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 - - ; asm: andnps %xmm2, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea - ; asm: andnps %xmm5, %xmm2 - [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 - - ; asm: orps %xmm2, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea - ; asm: orps %xmm5, %xmm2 - [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 - - ; asm: xorps %xmm2, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea - ; asm: xorps %xmm5, %xmm2 - [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 - - ; Convert float to int. (No i64 dest on i386). - - ; asm: cvttss2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd - ; asm: cvttss2si %xmm2, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2 - - ; Min/max. - - ; asm: minss %xmm2, %xmm5 - [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f3 0f 5d ea - ; asm: minss %xmm5, %xmm2 - [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f3 0f 5d d5 - ; asm: maxss %xmm2, %xmm5 - [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f3 0f 5f ea - ; asm: maxss %xmm5, %xmm2 - [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f3 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtss %xmm5, %xmm2 - [-,%xmm2] v50 = sqrt v10 ; bin: f3 0f 51 d5 - ; asm: sqrtss %xmm2, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f3 0f 51 ea - - ; asm: roundss $0, %xmm5, %xmm4 - [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0a e5 00 - ; asm: roundss $0, %xmm2, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0a ea 00 - ; asm: roundss $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 - - ; asm: roundss $1, %xmm5, %xmm4 - [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0a e5 01 - ; asm: roundss $1, %xmm2, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0a ea 01 - ; asm: roundss $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 - - ; asm: roundss $2, %xmm5, %xmm4 - [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0a e5 02 - ; asm: roundss $2, %xmm2, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0a ea 02 - ; asm: roundss $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 - - ; asm: roundss $3, %xmm5, %xmm4 - [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0a e5 03 - ; asm: roundss $3, %xmm2, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0a ea 03 - ; asm: roundss $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 - - ; Load/Store - - ; asm: movss (%ecx), %xmm5 - [-,%xmm5] v100 = load.f32 v0 ; bin: heap_oob f3 0f 10 29 - ; asm: movss (%esi), %xmm2 - [-,%xmm2] v101 = load.f32 v1 ; bin: heap_oob f3 0f 10 16 - ; asm: movss 50(%ecx), %xmm5 - [-,%xmm5] v110 = load.f32 v0+50 ; bin: heap_oob f3 0f 10 69 32 - ; asm: movss -50(%esi), %xmm2 - [-,%xmm2] v111 = load.f32 v1-50 ; bin: heap_oob f3 0f 10 56 ce - ; asm: movss 10000(%ecx), %xmm5 - [-,%xmm5] v120 = load.f32 v0+10000 ; bin: heap_oob f3 0f 10 a9 00002710 - ; asm: movss -10000(%esi), %xmm2 - [-,%xmm2] v121 = load.f32 v1-10000 ; bin: heap_oob f3 0f 10 96 ffffd8f0 - - ; asm: movss %xmm5, (%ecx) - [-] store.f32 v100, v0 ; bin: heap_oob f3 0f 11 29 - ; asm: movss %xmm2, (%esi) - [-] store.f32 v101, v1 ; bin: heap_oob f3 0f 11 16 - ; asm: movss %xmm5, 50(%ecx) - [-] store.f32 v100, v0+50 ; bin: heap_oob f3 0f 11 69 32 - ; asm: movss %xmm2, -50(%esi) - [-] store.f32 v101, v1-50 ; bin: heap_oob f3 0f 11 56 ce - ; asm: movss %xmm5, 10000(%ecx) - [-] store.f32 v100, v0+10000 ; bin: heap_oob f3 0f 11 a9 00002710 - ; asm: movss %xmm2, -10000(%esi) - [-] store.f32 v101, v1-10000 ; bin: heap_oob f3 0f 11 96 ffffd8f0 - - ; Spill / Fill. - - ; asm: movss %xmm5, 1032(%esp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss %xmm2, 1032(%esp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 0f 11 94 24 00000408 - - ; asm: movss 1032(%esp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 - ; asm: movss 1032(%esp), %xmm2 - [-,%xmm2] v211 = fill v201 ; bin: f3 0f 10 94 24 00000408 - - ; asm: movss %xmm5, 1032(%esp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss 1032(%esp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2 - - ; asm: ucomiss %xmm2, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 0f 2e ea - ; asm: ucomiss %xmm2, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 0f 2e d5 - ; asm: ucomiss %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i32 1 - [-,%rbx] v351 = iconst.i32 2 - - ; asm: movss (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 - ; asm: movss 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 - ; asm: movss -0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce - ; asm: movss 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 - ; asm: movss -0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0 - ; asm: movss %xmm5,(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 - ; asm: movss %xmm5,0x32(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 - ; asm: movss %xmm2,-0x32(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce - ; asm: movss %xmm5,0x2710(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 - ; asm: movss %xmm2,-0x2710(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0 - - return -} - -function %F64() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%rcx] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - - ; asm: cvtsi2sd %ecx, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 0f 2a e9 - ; asm: cvtsi2sd %esi, %xmm2 - [-,%xmm2] v11 = fcvt_from_sint.f64 v1 ; bin: f2 0f 2a d6 - - ; asm: cvtsd2ss %xmm2, %xmm5 - [-,%xmm5] v12 = fdemote.f32 v11 ; bin: f2 0f 5a ea - ; asm: cvtsd2ss %xmm5, %xmm2 - [-,%xmm2] v13 = fdemote.f32 v10 ; bin: f2 0f 5a d5 - - ; No i64 <-> f64 bitcasts in 32-bit mode. - - ; asm: movaps %xmm2, %xmm5 - [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 - - ; asm: movaps %xmm2, %xmm5 - regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 - - ; Binary arithmetic. - - ; asm: addsd %xmm2, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 0f 58 ea - ; asm: addsd %xmm5, %xmm2 - [-,%xmm2] v21 = fadd v11, v10 ; bin: f2 0f 58 d5 - - ; asm: subsd %xmm2, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 0f 5c ea - ; asm: subsd %xmm5, %xmm2 - [-,%xmm2] v23 = fsub v11, v10 ; bin: f2 0f 5c d5 - - ; asm: mulsd %xmm2, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 0f 59 ea - ; asm: mulsd %xmm5, %xmm2 - [-,%xmm2] v25 = fmul v11, v10 ; bin: f2 0f 59 d5 - - ; asm: divsd %xmm2, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 0f 5e ea - ; asm: divsd %xmm5, %xmm2 - [-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm2, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea - ; asm: andps %xmm5, %xmm2 - [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 - - ; asm: andnps %xmm2, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea - ; asm: andnps %xmm5, %xmm2 - [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 - - ; asm: orps %xmm2, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea - ; asm: orps %xmm5, %xmm2 - [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 - - ; asm: xorps %xmm2, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea - ; asm: xorps %xmm5, %xmm2 - [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 - - ; Convert float to int. (No i64 dest on i386). - - ; asm: cvttsd2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd - ; asm: cvttsd2si %xmm2, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2 - - ; Min/max. - - ; asm: minsd %xmm2, %xmm5 - [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f2 0f 5d ea - ; asm: minsd %xmm5, %xmm2 - [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f2 0f 5d d5 - ; asm: maxsd %xmm2, %xmm5 - [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f2 0f 5f ea - ; asm: maxsd %xmm5, %xmm2 - [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f2 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtsd %xmm5, %xmm2 - [-,%xmm2] v50 = sqrt v10 ; bin: f2 0f 51 d5 - ; asm: sqrtsd %xmm2, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f2 0f 51 ea - - ; asm: roundsd $0, %xmm5, %xmm4 - [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0b e5 00 - ; asm: roundsd $0, %xmm2, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0b ea 00 - ; asm: roundsd $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 - - ; asm: roundsd $1, %xmm5, %xmm4 - [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0b e5 01 - ; asm: roundsd $1, %xmm2, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0b ea 01 - ; asm: roundsd $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 - - ; asm: roundsd $2, %xmm5, %xmm4 - [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0b e5 02 - ; asm: roundsd $2, %xmm2, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0b ea 02 - ; asm: roundsd $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 - - ; asm: roundsd $3, %xmm5, %xmm4 - [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0b e5 03 - ; asm: roundsd $3, %xmm2, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0b ea 03 - ; asm: roundsd $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 - - ; Load/Store - - ; asm: movsd (%ecx), %xmm5 - [-,%xmm5] v100 = load.f64 v0 ; bin: heap_oob f2 0f 10 29 - ; asm: movsd (%esi), %xmm2 - [-,%xmm2] v101 = load.f64 v1 ; bin: heap_oob f2 0f 10 16 - ; asm: movsd 50(%ecx), %xmm5 - [-,%xmm5] v110 = load.f64 v0+50 ; bin: heap_oob f2 0f 10 69 32 - ; asm: movsd -50(%esi), %xmm2 - [-,%xmm2] v111 = load.f64 v1-50 ; bin: heap_oob f2 0f 10 56 ce - ; asm: movsd 10000(%ecx), %xmm5 - [-,%xmm5] v120 = load.f64 v0+10000 ; bin: heap_oob f2 0f 10 a9 00002710 - ; asm: movsd -10000(%esi), %xmm2 - [-,%xmm2] v121 = load.f64 v1-10000 ; bin: heap_oob f2 0f 10 96 ffffd8f0 - - ; asm: movsd %xmm5, (%ecx) - [-] store.f64 v100, v0 ; bin: heap_oob f2 0f 11 29 - ; asm: movsd %xmm2, (%esi) - [-] store.f64 v101, v1 ; bin: heap_oob f2 0f 11 16 - ; asm: movsd %xmm5, 50(%ecx) - [-] store.f64 v100, v0+50 ; bin: heap_oob f2 0f 11 69 32 - ; asm: movsd %xmm2, -50(%esi) - [-] store.f64 v101, v1-50 ; bin: heap_oob f2 0f 11 56 ce - ; asm: movsd %xmm5, 10000(%ecx) - [-] store.f64 v100, v0+10000 ; bin: heap_oob f2 0f 11 a9 00002710 - ; asm: movsd %xmm2, -10000(%esi) - [-] store.f64 v101, v1-10000 ; bin: heap_oob f2 0f 11 96 ffffd8f0 - - ; Spill / Fill. - - ; asm: movsd %xmm5, 1032(%esp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd %xmm2, 1032(%esp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 0f 11 94 24 00000408 - - ; asm: movsd 1032(%esp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 - ; asm: movsd 1032(%esp), %xmm2 - [-,%xmm2] v211 = fill v201 ; bin: f2 0f 10 94 24 00000408 - - ; asm: movsd %xmm5, 1032(%esp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd 1032(%esp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2 - - ; asm: ucomisd %xmm2, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 0f 2e ea - ; asm: ucomisd %xmm2, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 0f 2e d5 - ; asm: ucomisd %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed - - return -} - -function %cpuflags_float(f32 [%xmm0]) { -block0(v0: f32 [%xmm0]): - ; asm: ucomiss %xmm0, %xmm0 - [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 - - jump block1 - -block1: - ; asm: jnp block1 - brff ord v1, block1 ; bin: 7b fe - jump block2 - -block2: - ; asm: jp block1 - brff uno v1, block1 ; bin: 7a fc - jump block3 - -block3: - ; asm: jne block1 - brff one v1, block1 ; bin: 75 fa - jump block4 - -block4: - ; asm: je block1 - brff ueq v1, block1 ; bin: 74 f8 - jump block5 - -block5: - ; asm: ja block1 - brff gt v1, block1 ; bin: 77 f6 - jump block6 - -block6: - ; asm: jae block1 - brff ge v1, block1 ; bin: 73 f4 - jump block7 - -block7: - ; asm: jb block1 - brff ult v1, block1 ; bin: 72 f2 - jump block8 - -block8: - ; asm: jbe block1 - brff ule v1, block1 ; bin: 76 f0 - jump block9 - -block9: - ; asm: jp .+4; ud2 - trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b - ; asm: jnp .+4; ud2 - trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b - ; asm: je .+4; ud2 - trapff one v1, user0 ; bin: 74 02 user0 0f 0b - ; asm: jne .+4; ud2 - trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapff gt v1, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapff ge v1, user0 ; bin: 72 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapff ult v1, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapff ule v1, user0 ; bin: 77 02 user0 0f 0b - - ; asm: setnp %bl - [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 - ; asm: setp %bl - [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 - ; asm: setne %dl - [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 - ; asm: sete %dl - [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 - ; asm: seta %al - [-,%rax] v14 = trueff gt v1 ; bin: 0f 97 c0 - ; asm: setae %al - [-,%rax] v15 = trueff ge v1 ; bin: 0f 93 c0 - ; asm: setb %cl - [-,%rcx] v16 = trueff ult v1 ; bin: 0f 92 c1 - ; asm: setbe %cl - [-,%rcx] v17 = trueff ule v1 ; bin: 0f 96 c1 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary32.clif b/cranelift/filetests/filetests/isa/x86/binary32.clif deleted file mode 100644 index 11268d5c4e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary32.clif +++ /dev/null @@ -1,721 +0,0 @@ -; binary emission of x86-32 code. -test binemit -set opt_level=speed_and_size -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32.clif | llvm-mc -show-encoding -triple=i386 -; - -function %I32() { - sig0 = () - fn0 = %foo() - - gv0 = symbol %some_gv - - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - ; asm: movl $1, %ecx - [-,%rcx] v1 = iconst.i32 1 ; bin: b9 00000001 - ; asm: movl $2, %esi - [-,%rsi] v2 = iconst.i32 2 ; bin: be 00000002 - - ; asm: movb $1, %cl - [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 - - ; Integer Register-Register Operations. - - ; asm: addl %esi, %ecx - [-,%rcx] v10 = iadd v1, v2 ; bin: 01 f1 - ; asm: addl %ecx, %esi - [-,%rsi] v11 = iadd v2, v1 ; bin: 01 ce - ; asm: subl %esi, %ecx - [-,%rcx] v12 = isub v1, v2 ; bin: 29 f1 - ; asm: subl %ecx, %esi - [-,%rsi] v13 = isub v2, v1 ; bin: 29 ce - - ; asm: andl %esi, %ecx - [-,%rcx] v14 = band v1, v2 ; bin: 21 f1 - ; asm: andl %ecx, %esi - [-,%rsi] v15 = band v2, v1 ; bin: 21 ce - ; asm: orl %esi, %ecx - [-,%rcx] v16 = bor v1, v2 ; bin: 09 f1 - ; asm: orl %ecx, %esi - [-,%rsi] v17 = bor v2, v1 ; bin: 09 ce - ; asm: xorl %esi, %ecx - [-,%rcx] v18 = bxor v1, v2 ; bin: 31 f1 - ; asm: xorl %ecx, %esi - [-,%rsi] v19 = bxor v2, v1 ; bin: 31 ce - - ; Dynamic shifts take the shift amount in %rcx. - - ; asm: shll %cl, %esi - [-,%rsi] v20 = ishl v2, v1 ; bin: d3 e6 - ; asm: shll %cl, %ecx - [-,%rcx] v21 = ishl v1, v1 ; bin: d3 e1 - ; asm: shrl %cl, %esi - [-,%rsi] v22 = ushr v2, v1 ; bin: d3 ee - ; asm: shrl %cl, %ecx - [-,%rcx] v23 = ushr v1, v1 ; bin: d3 e9 - ; asm: sarl %cl, %esi - [-,%rsi] v24 = sshr v2, v1 ; bin: d3 fe - ; asm: sarl %cl, %ecx - [-,%rcx] v25 = sshr v1, v1 ; bin: d3 f9 - ; asm: roll %cl, %esi - [-,%rsi] v26 = rotl v2, v1 ; bin: d3 c6 - ; asm: roll %cl, %ecx - [-,%rcx] v27 = rotl v1, v1 ; bin: d3 c1 - ; asm: rorl %cl, %esi - [-,%rsi] v28 = rotr v2, v1 ; bin: d3 ce - ; asm: rorl %cl, %ecx - [-,%rcx] v29 = rotr v1, v1 ; bin: d3 c9 - - ; Integer Register - Immediate 8-bit operations. - ; The 8-bit immediate is sign-extended. - - ; asm: addl $-128, %ecx - [-,%rcx] v30 = iadd_imm v1, -128 ; bin: 83 c1 80 - ; asm: addl $10, %esi - [-,%rsi] v31 = iadd_imm v2, 10 ; bin: 83 c6 0a - - ; asm: andl $-128, %ecx - [-,%rcx] v32 = band_imm v1, -128 ; bin: 83 e1 80 - ; asm: andl $10, %esi - [-,%rsi] v33 = band_imm v2, 10 ; bin: 83 e6 0a - ; asm: orl $-128, %ecx - [-,%rcx] v34 = bor_imm v1, -128 ; bin: 83 c9 80 - ; asm: orl $10, %esi - [-,%rsi] v35 = bor_imm v2, 10 ; bin: 83 ce 0a - ; asm: xorl $-128, %ecx - [-,%rcx] v36 = bxor_imm v1, -128 ; bin: 83 f1 80 - ; asm: xorl $10, %esi - [-,%rsi] v37 = bxor_imm v2, 10 ; bin: 83 f6 0a - - ; Integer Register - Immediate 32-bit operations. - - ; asm: addl $-128000, %ecx - [-,%rcx] v40 = iadd_imm v1, -128000 ; bin: 81 c1 fffe0c00 - ; asm: addl $1000000, %esi - [-,%rsi] v41 = iadd_imm v2, 1000000 ; bin: 81 c6 000f4240 - - ; asm: andl $-128000, %ecx - [-,%rcx] v42 = band_imm v1, -128000 ; bin: 81 e1 fffe0c00 - ; asm: andl $1000000, %esi - [-,%rsi] v43 = band_imm v2, 1000000 ; bin: 81 e6 000f4240 - ; asm: orl $-128000, %ecx - [-,%rcx] v44 = bor_imm v1, -128000 ; bin: 81 c9 fffe0c00 - ; asm: orl $1000000, %esi - [-,%rsi] v45 = bor_imm v2, 1000000 ; bin: 81 ce 000f4240 - ; asm: xorl $-128000, %ecx - [-,%rcx] v46 = bxor_imm v1, -128000 ; bin: 81 f1 fffe0c00 - ; asm: xorl $1000000, %esi - [-,%rsi] v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240 - - ; More arithmetic. - - ; asm: imull %esi, %ecx - [-,%rcx] v50 = imul v1, v2 ; bin: 0f af ce - ; asm: imull %ecx, %esi - [-,%rsi] v51 = imul v2, v1 ; bin: 0f af f1 - - ; asm: movl $1, %eax - [-,%rax] v52 = iconst.i32 1 ; bin: b8 00000001 - ; asm: movl $2, %edx - [-,%rdx] v53 = iconst.i32 2 ; bin: ba 00000002 - ; asm: idivl %ecx - [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1 ; bin: int_divz f7 f9 - ; asm: idivl %esi - [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2 ; bin: int_divz f7 fe - ; asm: divl %ecx - [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1 ; bin: int_divz f7 f1 - ; asm: divl %esi - [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2 ; bin: int_divz f7 f6 - - ; Register copies. - - ; asm: movl %esi, %ecx - [-,%rcx] v80 = copy v2 ; bin: 89 f1 - ; asm: movl %ecx, %esi - [-,%rsi] v81 = copy v1 ; bin: 89 ce - - ; Copy Special - ; asm: movl %esp, %ebp - copy_special %rsp -> %rbp ; bin: 89 e5 - ; asm: movl %ebp, %esp - copy_special %rbp -> %rsp ; bin: 89 ec - - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movl %ecx, (%esi) - store v1, v2 ; bin: heap_oob 89 0e - ; asm: movl %esi, (%ecx) - store v2, v1 ; bin: heap_oob 89 31 - ; asm: movw %cx, (%esi) - istore16 v1, v2 ; bin: heap_oob 66 89 0e - ; asm: movw %si, (%ecx) - istore16 v2, v1 ; bin: heap_oob 66 89 31 - ; asm: movb %cl, (%esi) - istore8 v1, v2 ; bin: heap_oob 88 0e - ; Can't store %sil in 32-bit mode (needs REX prefix). - - ; asm: movl (%ecx), %edi - [-,%rdi] v100 = load.i32 v1 ; bin: heap_oob 8b 39 - ; asm: movl (%esi), %edx - [-,%rdx] v101 = load.i32 v2 ; bin: heap_oob 8b 16 - ; asm: movzwl (%ecx), %edi - [-,%rdi] v102 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 - ; asm: movzwl (%esi), %edx - [-,%rdx] v103 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 - ; asm: movswl (%ecx), %edi - [-,%rdi] v104 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 - ; asm: movswl (%esi), %edx - [-,%rdx] v105 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 - ; asm: movzbl (%ecx), %edi - [-,%rdi] v106 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 - ; asm: movzbl (%esi), %edx - [-,%rdx] v107 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 - ; asm: movsbl (%ecx), %edi - [-,%rdi] v108 = sload8.i32 v1 ; bin: heap_oob 0f be 39 - ; asm: movsbl (%esi), %edx - [-,%rdx] v109 = sload8.i32 v2 ; bin: heap_oob 0f be 16 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movl %ecx, 100(%esi) - store v1, v2+100 ; bin: heap_oob 89 4e 64 - ; asm: movl %esi, -100(%ecx) - store v2, v1-100 ; bin: heap_oob 89 71 9c - ; asm: movw %cx, 100(%esi) - istore16 v1, v2+100 ; bin: heap_oob 66 89 4e 64 - ; asm: movw %si, -100(%ecx) - istore16 v2, v1-100 ; bin: heap_oob 66 89 71 9c - ; asm: movb %cl, 100(%esi) - istore8 v1, v2+100 ; bin: heap_oob 88 4e 64 - - ; asm: movl 50(%ecx), %edi - [-,%rdi] v110 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%esi), %edx - [-,%rdx] v111 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movzwl 50(%ecx), %edi - [-,%rdi] v112 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 - ; asm: movzwl -50(%esi), %edx - [-,%rdx] v113 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce - ; asm: movswl 50(%ecx), %edi - [-,%rdi] v114 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 - ; asm: movswl -50(%esi), %edx - [-,%rdx] v115 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce - ; asm: movzbl 50(%ecx), %edi - [-,%rdi] v116 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 - ; asm: movzbl -50(%esi), %edx - [-,%rdx] v117 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce - ; asm: movsbl 50(%ecx), %edi - [-,%rdi] v118 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 - ; asm: movsbl -50(%esi), %edx - [-,%rdx] v119 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movl %ecx, 10000(%esi) - store v1, v2+10000 ; bin: heap_oob 89 8e 00002710 - ; asm: movl %esi, -10000(%ecx) - store v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 - ; asm: movw %cx, 10000(%esi) - istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 - ; asm: movw %si, -10000(%ecx) - istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 - ; asm: movb %cl, 10000(%esi) - istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 - - ; asm: movl 50000(%ecx), %edi - [-,%rdi] v120 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%esi), %edx - [-,%rdx] v121 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movzwl 50000(%ecx), %edi - [-,%rdi] v122 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 - ; asm: movzwl -50000(%esi), %edx - [-,%rdx] v123 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 - ; asm: movswl 50000(%ecx), %edi - [-,%rdi] v124 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 - ; asm: movswl -50000(%esi), %edx - [-,%rdx] v125 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 - ; asm: movzbl 50000(%ecx), %edi - [-,%rdi] v126 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 - ; asm: movzbl -50000(%esi), %edx - [-,%rdx] v127 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 - ; asm: movsbl 50000(%ecx), %edi - [-,%rdi] v128 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 - ; asm: movsbl -50000(%esi), %edx - [-,%rdx] v129 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 - - ; Bit-counting instructions. - - ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce - ; asm: popcntl %ecx, %esi - [-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1 - - ; asm: lzcntl %esi, %ecx - [-,%rcx] v202 = clz v2 ; bin: f3 0f bd ce - ; asm: lzcntl %ecx, %esi - [-,%rsi] v203 = clz v1 ; bin: f3 0f bd f1 - - ; asm: tzcntl %esi, %ecx - [-,%rcx] v204 = ctz v2 ; bin: f3 0f bc ce - ; asm: tzcntl %ecx, %esi - [-,%rsi] v205 = ctz v1 ; bin: f3 0f bc f1 - - ; Integer comparisons. - - ; asm: cmpl %esi, %ecx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 - ; asm: cmpl %ecx, %esi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v1 ; bin: 39 ce 0f 94 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 - ; asm: cmpl %ecx, %esi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v1 ; bin: 39 ce 0f 95 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 - ; asm: cmpl %ecx, %esi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v1 ; bin: 39 ce 0f 9c c2 - - ; asm: cmpl %esi, %ecx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 - ; asm: cmpl %ecx, %esi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v1 ; bin: 39 ce 0f 9d c2 - - ; asm: cmpl %esi, %ecx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 - ; asm: cmpl %ecx, %esi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v1 ; bin: 39 ce 0f 9f c2 - - ; asm: cmpl %esi, %ecx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 - ; asm: cmpl %ecx, %esi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v1 ; bin: 39 ce 0f 9e c2 - - ; asm: cmpl %esi, %ecx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 - ; asm: cmpl %ecx, %esi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v1 ; bin: 39 ce 0f 92 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 - ; asm: cmpl %ecx, %esi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v1 ; bin: 39 ce 0f 93 c2 - - ; asm: cmpl %esi, %ecx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 - ; asm: cmpl %ecx, %esi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v1 ; bin: 39 ce 0f 97 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 - ; asm: cmpl %ecx, %esi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v1 ; bin: 39 ce 0f 96 c2 - - ; Bool-to-int conversions. - - ; asm: movzbl %bl, %ecx - [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb - ; asm: movzbl %dl, %esi - [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 - - ; asm: call foo - call fn0() ; bin: stk_ovf e8 CallPCRel4(%foo-4) 00000000 - - ; asm: movl $0, %ecx - [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) 00000000 - ; asm: movl $0, %esi - [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) 00000000 - - ; asm: call *%ecx - call_indirect sig0, v400() ; bin: stk_ovf ff d1 - ; asm: call *%esi - call_indirect sig0, v401() ; bin: stk_ovf ff d6 - - ; asm: movl $0, %ecx - [-,%rcx] v450 = symbol_value.i32 gv0 ; bin: b9 Abs4(%some_gv) 00000000 - ; asm: movl $0, %esi - [-,%rsi] v451 = symbol_value.i32 gv0 ; bin: be Abs4(%some_gv) 00000000 - - ; Spill / Fill. - - ; asm: movl %ecx, 1032(%esp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl %esi, 1032(%esp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 - - ; asm: movl 1032(%esp), %ecx - [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 - ; asm: movl 1032(%esp), %esi - [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 - - ; asm: movl %ecx, 1032(%esp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl 1032(%esp), %ecx - regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 - - ; Push and Pop - ; asm: pushl %ecx - x86_push v1 ; bin: stk_ovf 51 - ; asm: popl %ecx - [-,%rcx] v512 = x86_pop.i32 ; bin: 59 - - ; Adjust Stack Pointer Up - ; asm: addl $64, %esp - adjust_sp_up_imm 64 ; bin: 83 c4 40 - ; asm: addl $-64, %esp - adjust_sp_up_imm -64 ; bin: 83 c4 c0 - ; asm: addl $1024, %esp - adjust_sp_up_imm 1024 ; bin: 81 c4 00000400 - ; asm: addl $-1024, %esp - adjust_sp_up_imm -1024 ; bin: 81 c4 fffffc00 - ; asm: addl $2147483647, %esp - adjust_sp_up_imm 2147483647 ; bin: 81 c4 7fffffff - ; asm: addl $-2147483648, %esp - adjust_sp_up_imm -2147483648 ; bin: 81 c4 80000000 - - ; Adjust Stack Pointer Down - ; asm: subl %ecx, %esp - adjust_sp_down v1 ; bin: 29 cc - ; asm: subl %esi, %esp - adjust_sp_down v2 ; bin: 29 f4 - ; asm: addl $64, %esp - adjust_sp_down_imm 64 ; bin: 83 ec 40 - ; asm: addl $-64, %esp - adjust_sp_down_imm -64 ; bin: 83 ec c0 - ; asm: addl $1024, %esp - adjust_sp_down_imm 1024 ; bin: 81 ec 00000400 - ; asm: addl $-1024, %esp - adjust_sp_down_imm -1024 ; bin: 81 ec fffffc00 - ; asm: addl $2147483647, %esp - adjust_sp_down_imm 2147483647 ; bin: 81 ec 7fffffff - ; asm: addl $-2147483648, %esp - adjust_sp_down_imm -2147483648 ; bin: 81 ec 80000000 - - ; Shift immediates - ; asm: shll $2, %esi - [-,%rsi] v513 = ishl_imm v2, 2 ; bin: c1 e6 02 - ; asm: sarl $5, %esi - [-,%rsi] v514 = sshr_imm v2, 5 ; bin: c1 fe 05 - ; asm: shrl $8, %esi - [-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08 - - ; Rotate immediates - ; asm: rolq $12, %esi - [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: c1 c6 0c - ; asm: rorq $5, %esi - [-,%rsi] v5103 = rotr_imm v2, 5 ; bin: c1 ce 05 - - ; Load Complex - [-,%rax] v521 = iconst.i32 1 - [-,%rbx] v522 = iconst.i32 1 - ; asm: movl (%eax,%ebx,1), %ecx - [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movl 1(%eax,%ebx,1), %ecx - [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 - ; asm: mov 0x100000(%eax,%ebx,1),%ecx - [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 - ; asm: movzbl (%eax,%ebx,1),%ecx - [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 - ; asm: movsbl (%eax,%ebx,1),%ecx - [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 - ; asm: movzwl (%eax,%ebx,1),%ecx - [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 - ; asm: movswl (%eax,%ebx,1),%ecx - [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 - - ; Store Complex - [-,%rcx] v601 = iconst.i32 1 - ; asm: mov %ecx,(%eax,%ebx,1) - store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %ecx,0x1(%eax,%ebx,1) - store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 - ; asm: mov %ecx,0x100000(%eax,%ebx,1) - store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 - ; asm: mov %cx,(%eax,%ebx,1) - istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %cl,(%eax,%ebx,1) - istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 - - ; Carry Addition - ; asm: addl %esi, %ecx - [-,%rcx,%rflags] v701, v702 = iadd_ifcout v1, v2 ; bin: 01 f1 - ; asm: adcl %esi, %ecx - [-,%rcx] v703 = iadd_ifcin v1, v2, v702 ; bin: 11 f1 - ; asm: adcl %esi, %ecx - [-,%rcx,%rflags] v704, v705 = iadd_ifcarry v1, v2, v702 ; bin: 11 f1 - - ; Borrow Subtraction - ; asm: subl %esi, %ecx - [-,%rcx,%rflags] v706, v707 = isub_ifbout v1, v2 ; bin: 29 f1 - ; asm: sbbl %esi, %ecx - [-,%rcx] v708 = isub_ifbin v1, v2, v707 ; bin: 19 f1 - ; asm: sbbl %esi, %ecx - [-,%rcx,%rflags] v709, v710 = isub_ifborrow v1, v2, v707 ; bin: 19 f1 - - ; asm: testl %ecx, %ecx - ; asm: je block1 - brz v1, block1 ; bin: 85 c9 74 0e - fallthrough block3 - -block3: - ; asm: testl %esi, %esi - ; asm: je block1 - brz v2, block1 ; bin: 85 f6 74 0a - fallthrough block4 - -block4: - ; asm: testl %ecx, %ecx - ; asm: jne block1 - brnz v1, block1 ; bin: 85 c9 75 06 - fallthrough block5 - -block5: - ; asm: testl %esi, %esi - ; asm: jne block1 - brnz v2, block1 ; bin: 85 f6 75 02 - - ; asm: jmp block2 - jump block2 ; bin: eb 01 - - ; asm: block1: -block1: - ; asm: ret - return ; bin: c3 - - ; asm: block2: -block2: - trap user0 ; bin: user0 0f 0b -} - -; Special branch encodings only for I32 mode. -function %special_branches() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%rdi] v3 = icmp eq v1, v2 - [-,%rbx] v4 = icmp ugt v1, v2 - - ; asm: testl $0xff, %edi - ; asm: je block1 - brz v3, block1 ; bin: f7 c7 000000ff 0f 84 00000015 - fallthrough block2 - -block2: - ; asm: testb %bl, %bl - ; asm: je block1 - brz v4, block1 ; bin: 84 db 74 11 - fallthrough block3 - -block3: - ; asm: testl $0xff, %edi - ; asm: jne block1 - brnz v3, block1 ; bin: f7 c7 000000ff 0f 85 00000005 - fallthrough block4 - -block4: - ; asm: testb %bl, %bl - ; asm: jne block1 - brnz v4, block1 ; bin: 84 db 75 01 - fallthrough block5 - -block5: - return - -block1: - return -} - -; CPU flag instructions. -function %cpu_flags() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - jump block1 - -block1: - ; asm: cmpl %esi, %ecx - [-,%rflags] v10 = ifcmp v1, v2 ; bin: 39 f1 - ; asm: cmpl %ecx, %esi - [-,%rflags] v11 = ifcmp v2, v1 ; bin: 39 ce - - ; asm: je block1 - brif eq v11, block1 ; bin: 74 fa - jump block2 - -block2: - ; asm: jne block1 - brif ne v11, block1 ; bin: 75 f8 - jump block3 - -block3: - ; asm: jl block1 - brif slt v11, block1 ; bin: 7c f6 - jump block4 - -block4: - ; asm: jge block1 - brif sge v11, block1 ; bin: 7d f4 - jump block5 - -block5: - ; asm: jg block1 - brif sgt v11, block1 ; bin: 7f f2 - jump block6 - -block6: - ; asm: jle block1 - brif sle v11, block1 ; bin: 7e f0 - jump block7 - -block7: - ; asm: jb block1 - brif ult v11, block1 ; bin: 72 ee - jump block8 - -block8: - ; asm: jae block1 - brif uge v11, block1 ; bin: 73 ec - jump block9 - -block9: - ; asm: ja block1 - brif ugt v11, block1 ; bin: 77 ea - jump block10 - -block10: - ; asm: jbe block1 - brif ule v11, block1 ; bin: 76 e8 - jump block11 - -block11: - - ; asm: sete %bl - [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 - ; asm: setne %bl - [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 - ; asm: setl %dl - [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 - ; asm: setge %dl - [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 - ; asm: setg %bl - [-,%rbx] v24 = trueif sgt v11 ; bin: 0f 9f c3 - ; asm: setle %bl - [-,%rbx] v25 = trueif sle v11 ; bin: 0f 9e c3 - ; asm: setb %dl - [-,%rdx] v26 = trueif ult v11 ; bin: 0f 92 c2 - ; asm: setae %dl - [-,%rdx] v27 = trueif uge v11 ; bin: 0f 93 c2 - ; asm: seta %bl - [-,%rbx] v28 = trueif ugt v11 ; bin: 0f 97 c3 - ; asm: setbe %bl - [-,%rbx] v29 = trueif ule v11 ; bin: 0f 96 c3 - - ; The trapif instructions are encoded as macros: a conditional jump over a ud2. - ; asm: jne .+4; ud2 - trapif eq v11, user0 ; bin: 75 02 user0 0f 0b - ; asm: je .+4; ud2 - trapif ne v11, user0 ; bin: 74 02 user0 0f 0b - ; asm: jnl .+4; ud2 - trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b - ; asm: jnge .+4; ud2 - trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b - ; asm: jng .+4; ud2 - trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b - ; asm: jnle .+4; ud2 - trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapif ult v11, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapif uge v11, user0 ; bin: 72 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapif ule v11, user0 ; bin: 77 02 user0 0f 0b - ; asm: jo .+4; ud2 - trapif of v11, user0 ; bin: 71 02 user0 0f 0b - ; asm: jno .+4; ud2 - trapif nof v11, user0 ; bin: 70 02 user0 0f 0b - - ; Stack check. - ; asm: cmpl %esp, %ecx - [-,%rflags] v40 = ifcmp_sp v1 ; bin: 39 e1 - ; asm: cmpl %esp, %esi - [-,%rflags] v41 = ifcmp_sp v2 ; bin: 39 e6 - - ; asm: cmpl $-100, %ecx - [-,%rflags] v42 = ifcmp_imm v1, -100 ; bin: 83 f9 9c - ; asm: cmpl $100, %esi - [-,%rflags] v43 = ifcmp_imm v2, 100 ; bin: 83 fe 64 - - ; asm: cmpl $-10000, %ecx - [-,%rflags] v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 - ; asm: cmpl $10000, %esi - [-,%rflags] v45 = ifcmp_imm v2, 10000 ; bin: 81 fe 00002710 - - return -} - -; Tests for i32/i8 conversion instructions. -function %I32_I8() { -block0: - [-,%rcx] v1 = iconst.i32 1 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - - ; asm: movsbl %cl, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i32/i16 conversion instructions. -function %I32_I16() { -block0: - [-,%rcx] v1 = iconst.i32 1 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - - ; asm: movswl %cx, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 - - trap user0 ; bin: user0 0f 0b -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif deleted file mode 100644 index 6bf6f325b1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif +++ /dev/null @@ -1,638 +0,0 @@ -; Binary emission of 64-bit floating point code. -test binemit -set opt_level=speed_and_size -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-float.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %F32() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%r11] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - [-,%rax] v2 = iconst.i64 11 - [-,%r14] v3 = iconst.i64 12 - [-,%r13] v4 = iconst.i64 13 - - ; asm: cvtsi2ssl %r11d, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 41 0f 2a eb - ; asm: cvtsi2ssl %esi, %xmm10 - [-,%xmm10] v11 = fcvt_from_sint.f32 v1 ; bin: f3 44 0f 2a d6 - - ; asm: cvtsi2ssq %rax, %xmm5 - [-,%xmm5] v12 = fcvt_from_sint.f32 v2 ; bin: f3 48 0f 2a e8 - ; asm: cvtsi2ssq %r14, %xmm10 - [-,%xmm10] v13 = fcvt_from_sint.f32 v3 ; bin: f3 4d 0f 2a d6 - - ; asm: cvtss2sd %xmm10, %xmm5 - [-,%xmm5] v14 = fpromote.f64 v11 ; bin: f3 41 0f 5a ea - ; asm: cvtss2sd %xmm5, %xmm10 - [-,%xmm10] v15 = fpromote.f64 v10 ; bin: f3 44 0f 5a d5 - - ; asm: movd %r11d, %xmm5 - [-,%xmm5] v16 = bitcast.f32 v0 ; bin: 66 41 0f 6e eb - ; asm: movd %esi, %xmm10 - [-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6 - - ; asm: movd %xmm5, %ecx - [-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9 - ; asm: movd %xmm10, %esi - [-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6 - - ; Binary arithmetic. - - ; asm: addss %xmm10, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 41 0f 58 ea - ; asm: addss %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f3 44 0f 58 d5 - - ; asm: subss %xmm10, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 41 0f 5c ea - ; asm: subss %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f3 44 0f 5c d5 - - ; asm: mulss %xmm10, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 41 0f 59 ea - ; asm: mulss %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f3 44 0f 59 d5 - - ; asm: divss %xmm10, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 41 0f 5e ea - ; asm: divss %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm10, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea - ; asm: andps %xmm5, %xmm10 - [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 - - ; asm: andnps %xmm10, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea - ; asm: andnps %xmm5, %xmm10 - [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 - - ; asm: orps %xmm10, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea - ; asm: orps %xmm5, %xmm10 - [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 - - ; asm: xorps %xmm10, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea - ; asm: xorps %xmm5, %xmm10 - [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 - - ; asm: movaps %xmm10, %xmm5 - [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea - ; asm: movaps %xmm5, %xmm10 - [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 - - ; Copy to SSA - - ; asm: movsd %xmm0, %xmm15 - [-,%xmm15] v400 = copy_to_ssa.f64 %xmm0 ; bin: f2 44 0f 10 f8 - ; asm: movsd %xmm15, %xmm0 - [-,%xmm0] v401 = copy_to_ssa.f64 %xmm15 ; bin: f2 41 0f 10 c7 - ; asm: movsd %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. - [-,%xmm6] v402 = copy_to_ssa.f64 %xmm7 ; bin: f2 40 0f 10 f7 - ; asm: movsd %xmm11, %xmm14 - [-,%xmm14] v403 = copy_to_ssa.f64 %xmm11 ; bin: f2 45 0f 10 f3 - - ; asm: movss %xmm0, %xmm15 - [-,%xmm15] v404 = copy_to_ssa.f32 %xmm0 ; bin: f3 44 0f 10 f8 - ; asm: movss %xmm15, %xmm0 - [-,%xmm0] v405 = copy_to_ssa.f32 %xmm15 ; bin: f3 41 0f 10 c7 - ; asm: movss %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. - [-,%xmm6] v406 = copy_to_ssa.f32 %xmm7 ; bin: f3 40 0f 10 f7 - ; asm: movss %xmm11, %xmm14 - [-,%xmm14] v407 = copy_to_ssa.f32 %xmm11 ; bin: f3 45 0f 10 f3 - - ; Convert float to int. - - ; asm: cvttss2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd - ; asm: cvttss2si %xmm10, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2 - - ; asm: cvttss2si %xmm5, %rcx - [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd - ; asm: cvttss2si %xmm10, %rsi - [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2 - - ; Min/max. - - ; asm: minss %xmm10, %xmm5 - [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f3 41 0f 5d ea - ; asm: minss %xmm5, %xmm10 - [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f3 44 0f 5d d5 - ; asm: maxss %xmm10, %xmm5 - [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f3 41 0f 5f ea - ; asm: maxss %xmm5, %xmm10 - [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f3 44 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtss %xmm5, %xmm10 - [-,%xmm10] v50 = sqrt v10 ; bin: f3 44 0f 51 d5 - ; asm: sqrtss %xmm10, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f3 41 0f 51 ea - - ; asm: roundss $0, %xmm5, %xmm10 - [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0a d5 00 - ; asm: roundss $0, %xmm10, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0a ea 00 - ; asm: roundss $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 - - ; asm: roundss $1, %xmm5, %xmm10 - [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0a d5 01 - ; asm: roundss $1, %xmm10, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0a ea 01 - ; asm: roundss $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 - - ; asm: roundss $2, %xmm5, %xmm10 - [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0a d5 02 - ; asm: roundss $2, %xmm10, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0a ea 02 - ; asm: roundss $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 - - ; asm: roundss $3, %xmm5, %xmm10 - [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0a d5 03 - ; asm: roundss $3, %xmm10, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0a ea 03 - ; asm: roundss $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 - - ; Load/Store - - ; asm: movss (%r14), %xmm5 - [-,%xmm5] v100 = load.f32 v3 ; bin: heap_oob f3 41 0f 10 2e - ; asm: movss (%rax), %xmm10 - [-,%xmm10] v101 = load.f32 v2 ; bin: heap_oob f3 44 0f 10 10 - ; asm: movss 50(%r14), %xmm5 - [-,%xmm5] v110 = load.f32 v3+50 ; bin: heap_oob f3 41 0f 10 6e 32 - ; asm: movss -50(%rax), %xmm10 - [-,%xmm10] v111 = load.f32 v2-50 ; bin: heap_oob f3 44 0f 10 50 ce - ; asm: movss 10000(%r14), %xmm5 - [-,%xmm5] v120 = load.f32 v3+10000 ; bin: heap_oob f3 41 0f 10 ae 00002710 - ; asm: movss -10000(%rax), %xmm10 - [-,%xmm10] v121 = load.f32 v2-10000 ; bin: heap_oob f3 44 0f 10 90 ffffd8f0 - - ; asm: movss %xmm5, (%r14) - [-] store.f32 v100, v3 ; bin: heap_oob f3 41 0f 11 2e - ; asm: movss %xmm10, (%rax) - [-] store.f32 v101, v2 ; bin: heap_oob f3 44 0f 11 10 - ; asm: movss %xmm5, (%r13) - [-] store.f32 v100, v4 ; bin: heap_oob f3 41 0f 11 6d 00 - ; asm: movss %xmm10, (%r13) - [-] store.f32 v101, v4 ; bin: heap_oob f3 45 0f 11 55 00 - ; asm: movss %xmm5, 50(%r14) - [-] store.f32 v100, v3+50 ; bin: heap_oob f3 41 0f 11 6e 32 - ; asm: movss %xmm10, -50(%rax) - [-] store.f32 v101, v2-50 ; bin: heap_oob f3 44 0f 11 50 ce - ; asm: movss %xmm5, 10000(%r14) - [-] store.f32 v100, v3+10000 ; bin: heap_oob f3 41 0f 11 ae 00002710 - ; asm: movss %xmm10, -10000(%rax) - [-] store.f32 v101, v2-10000 ; bin: heap_oob f3 44 0f 11 90 ffffd8f0 - - ; Spill / Fill. - - ; asm: movss %xmm5, 1032(%rsp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss %xmm10, 1032(%rsp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 44 0f 11 94 24 00000408 - - ; asm: movss 1032(%rsp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 - ; asm: movss 1032(%rsp), %xmm10 - [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 10 94 24 00000408 - - ; asm: movss %xmm5, 1032(%rsp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss 1032(%rsp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2 - - ; asm: ucomiss %xmm10, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 41 0f 2e ea - ; asm: ucomiss %xmm10, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 44 0f 2e d5 - ; asm: ucomiss %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed - - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i64 1 - [-,%rbx] v351 = iconst.i64 2 - - ; asm: movss (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 - ; asm: movss 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 - ; asm: movss -0x32(%rax,%rbx,1),%xmm10 - [-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce - ; asm: 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 - ; asm: -0x2710(%rax,%rbx,1),%xmm10 - [-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0 - - ; asm: movsd %xmm5, (%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 - ; asm: movsd %xmm5, 50(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 - ; asm: movsd %xmm10, -50(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce - ; asm: movsd %xmm5, 10000(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 - ; asm: movsd %xmm10, -10000(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0 - - return -} - -function %F64() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%r11] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - [-,%rax] v2 = iconst.i64 11 - [-,%r14] v3 = iconst.i64 12 - [-,%r13] v4 = iconst.i64 13 - - ; asm: cvtsi2sdl %r11d, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 41 0f 2a eb - ; asm: cvtsi2sdl %esi, %xmm10 - [-,%xmm10] v11 = fcvt_from_sint.f64 v1 ; bin: f2 44 0f 2a d6 - - ; asm: cvtsi2sdq %rax, %xmm5 - [-,%xmm5] v12 = fcvt_from_sint.f64 v2 ; bin: f2 48 0f 2a e8 - ; asm: cvtsi2sdq %r14, %xmm10 - [-,%xmm10] v13 = fcvt_from_sint.f64 v3 ; bin: f2 4d 0f 2a d6 - - ; asm: cvtsd2ss %xmm10, %xmm5 - [-,%xmm5] v14 = fdemote.f32 v11 ; bin: f2 41 0f 5a ea - ; asm: cvtsd2ss %xmm5, %xmm10 - [-,%xmm10] v15 = fdemote.f32 v10 ; bin: f2 44 0f 5a d5 - - ; asm: movq %rax, %xmm5 - [-,%xmm5] v16 = bitcast.f64 v2 ; bin: 66 48 0f 6e e8 - ; asm: movq %r14, %xmm10 - [-,%xmm10] v17 = bitcast.f64 v3 ; bin: 66 4d 0f 6e d6 - - ; asm: movq %xmm5, %rcx - [-,%rcx] v18 = bitcast.i64 v10 ; bin: 66 48 0f 7e e9 - ; asm: movq %xmm10, %rsi - [-,%rsi] v19 = bitcast.i64 v11 ; bin: 66 4c 0f 7e d6 - - ; Binary arithmetic. - - ; asm: addsd %xmm10, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea - ; asm: addsd %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5 - - ; asm: subsd %xmm10, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea - ; asm: subsd %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5 - - ; asm: mulsd %xmm10, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea - ; asm: mulsd %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5 - - ; asm: divsd %xmm10, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea - ; asm: divsd %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm10, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea - ; asm: andps %xmm5, %xmm10 - [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 - - ; asm: andnps %xmm10, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea - ; asm: andnps %xmm5, %xmm10 - [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 - - ; asm: orps %xmm10, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea - ; asm: orps %xmm5, %xmm10 - [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 - - ; asm: xorps %xmm10, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea - ; asm: xorps %xmm5, %xmm10 - [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 - - ; asm: movaps %xmm10, %xmm5 - [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea - ; asm: movaps %xmm5, %xmm10 - [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 - - ; Convert float to int. - - ; asm: cvttsd2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd - ; asm: cvttsd2si %xmm10, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2 - - ; asm: cvttsd2si %xmm5, %rcx - [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd - ; asm: cvttsd2si %xmm10, %rsi - [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2 - - ; Min/max. - - ; asm: minsd %xmm10, %xmm5 - [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f2 41 0f 5d ea - ; asm: minsd %xmm5, %xmm10 - [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f2 44 0f 5d d5 - ; asm: maxsd %xmm10, %xmm5 - [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f2 41 0f 5f ea - ; asm: maxsd %xmm5, %xmm10 - [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f2 44 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtsd %xmm5, %xmm10 - [-,%xmm10] v50 = sqrt v10 ; bin: f2 44 0f 51 d5 - ; asm: sqrtsd %xmm10, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f2 41 0f 51 ea - - ; asm: roundsd $0, %xmm5, %xmm10 - [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0b d5 00 - ; asm: roundsd $0, %xmm10, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0b ea 00 - ; asm: roundsd $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 - - ; asm: roundsd $1, %xmm5, %xmm10 - [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0b d5 01 - ; asm: roundsd $1, %xmm10, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0b ea 01 - ; asm: roundsd $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 - - ; asm: roundsd $2, %xmm5, %xmm10 - [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0b d5 02 - ; asm: roundsd $2, %xmm10, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0b ea 02 - ; asm: roundsd $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 - - ; asm: roundsd $3, %xmm5, %xmm10 - [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0b d5 03 - ; asm: roundsd $3, %xmm10, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0b ea 03 - ; asm: roundsd $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 - - ; Load/Store - - ; asm: movsd (%r14), %xmm5 - [-,%xmm5] v100 = load.f64 v3 ; bin: heap_oob f2 41 0f 10 2e - ; asm: movsd (%rax), %xmm10 - [-,%xmm10] v101 = load.f64 v2 ; bin: heap_oob f2 44 0f 10 10 - ; asm: movsd 50(%r14), %xmm5 - [-,%xmm5] v110 = load.f64 v3+50 ; bin: heap_oob f2 41 0f 10 6e 32 - ; asm: movsd -50(%rax), %xmm10 - [-,%xmm10] v111 = load.f64 v2-50 ; bin: heap_oob f2 44 0f 10 50 ce - ; asm: movsd 10000(%r14), %xmm5 - [-,%xmm5] v120 = load.f64 v3+10000 ; bin: heap_oob f2 41 0f 10 ae 00002710 - ; asm: movsd -10000(%rax), %xmm10 - [-,%xmm10] v121 = load.f64 v2-10000 ; bin: heap_oob f2 44 0f 10 90 ffffd8f0 - - ; asm: movsd %xmm5, (%r14) - [-] store.f64 v100, v3 ; bin: heap_oob f2 41 0f 11 2e - ; asm: movsd %xmm10, (%rax) - [-] store.f64 v101, v2 ; bin: heap_oob f2 44 0f 11 10 - ; asm: movsd %xmm5, (%r13) - [-] store.f64 v100, v4 ; bin: heap_oob f2 41 0f 11 6d 00 - ; asm: movsd %xmm10, (%r13) - [-] store.f64 v101, v4 ; bin: heap_oob f2 45 0f 11 55 00 - ; asm: movsd %xmm5, 50(%r14) - [-] store.f64 v100, v3+50 ; bin: heap_oob f2 41 0f 11 6e 32 - ; asm: movsd %xmm10, -50(%rax) - [-] store.f64 v101, v2-50 ; bin: heap_oob f2 44 0f 11 50 ce - ; asm: movsd %xmm5, 10000(%r14) - [-] store.f64 v100, v3+10000 ; bin: heap_oob f2 41 0f 11 ae 00002710 - ; asm: movsd %xmm10, -10000(%rax) - [-] store.f64 v101, v2-10000 ; bin: heap_oob f2 44 0f 11 90 ffffd8f0 - - ; Spill / Fill. - - ; asm: movsd %xmm5, 1032(%rsp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd %xmm10, 1032(%rsp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 44 0f 11 94 24 00000408 - - ; asm: movsd 1032(%rsp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 - ; asm: movsd 1032(%rsp), %xmm10 - [-,%xmm10] v211 = fill v201 ; bin: f2 44 0f 10 94 24 00000408 - - ; asm: movsd %xmm5, 1032(%rsp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd 1032(%rsp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2 - - ; asm: ucomisd %xmm10, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 41 0f 2e ea - ; asm: ucomisd %xmm10, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 44 0f 2e d5 - ; asm: ucomisd %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i64 1 - [-,%rbx] v351 = iconst.i64 2 - ; asm: movsd (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18 - ; asm: movsd 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32 - ; asm: movsd -0x32(%rax,%rbx,1),%xmm10 - [-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce - ; asm: movsd 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710 - ; asm: movsd -0x2710(%rax,%rbx,1),%xmm10 - [-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0 - - ; asm: movsd %xmm5, (%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18 - ; asm: movsd %xmm5, 50(%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32 - ; asm: movsd %xmm10, -50(%rax,%rbx,1) - [-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce - ; asm: movsd %xmm5, 10000(%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710 - ; asm: movsd %xmm10, -10000(%rax,%rbx,1) - [-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0 - - return -} - -function %cpuflags_float(f32 [%xmm0]) { -block0(v0: f32 [%xmm0]): - ; asm: ucomiss %xmm0, %xmm0 - [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 - - jump block1 - -block1: - ; asm: jnp block1 - brff ord v1, block1 ; bin: 7b fe - jump block2 - -block2: - ; asm: jp block1 - brff uno v1, block1 ; bin: 7a fc - jump block3 - -block3: - ; asm: jne block1 - brff one v1, block1 ; bin: 75 fa - jump block4 - -block4: - ; asm: je block1 - brff ueq v1, block1 ; bin: 74 f8 - jump block5 - -block5: - ; asm: ja block1 - brff gt v1, block1 ; bin: 77 f6 - jump block6 - -block6: - ; asm: jae block1 - brff ge v1, block1 ; bin: 73 f4 - jump block7 - -block7: - ; asm: jb block1 - brff ult v1, block1 ; bin: 72 f2 - jump block8 - -block8: - ; asm: jbe block1 - brff ule v1, block1 ; bin: 76 f0 - jump block9 - -block9: - ; asm: jp .+4; ud2 - trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b - ; asm: jnp .+4; ud2 - trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b - ; asm: je .+4; ud2 - trapff one v1, user0 ; bin: 74 02 user0 0f 0b - ; asm: jne .+4; ud2 - trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapff gt v1, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapff ge v1, user0 ; bin: 72 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapff ult v1, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapff ule v1, user0 ; bin: 77 02 user0 0f 0b - - ; asm: setnp %bl - [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 - ; asm: setp %bl - [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 - ; asm: setne %dl - [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 - ; asm: sete %dl - [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 - ; asm: seta %r10b - [-,%r10] v14 = trueff gt v1 ; bin: 41 0f 97 c2 - ; asm: setae %r10b - [-,%r10] v15 = trueff ge v1 ; bin: 41 0f 93 c2 - ; asm: setb %r14b - [-,%r14] v16 = trueff ult v1 ; bin: 41 0f 92 c6 - ; asm: setbe %r14b - [-,%r14] v17 = trueff ule v1 ; bin: 41 0f 96 c6 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif deleted file mode 100644 index 4f2c650592..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif +++ /dev/null @@ -1,83 +0,0 @@ -; binary emission of 64-bit code. -test binemit -set opt_level=speed_and_size -set is_pic -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-pic.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests for i64 instructions. -function %I64() { - sig0 = () - fn0 = %foo() - fn1 = colocated %bar() - - gv0 = symbol %some_gv - gv1 = symbol colocated %some_gv - - ; Use incoming_arg stack slots because they won't be relocated by the frame - ; layout. - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Colocated functions. - - ; asm: call foo - call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 - - ; asm: lea 0x0(%rip), %rax - [-,%rax] v0 = func_addr.i64 fn1 ; bin: 48 8d 05 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v1 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v2 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 - - ; asm: call *%rax - call_indirect sig0, v0() ; bin: stk_ovf ff d0 - ; asm: call *%rsi - call_indirect sig0, v1() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v2() ; bin: stk_ovf 41 ff d2 - - ; Non-colocated functions. - - ; asm: call foo@PLT - call fn0() ; bin: stk_ovf e8 CallPLTRel4(%foo-4) 00000000 - - ; asm: mov 0x0(%rip), %rax - [-,%rax] v100 = func_addr.i64 fn0 ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v101 = func_addr.i64 fn0 ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v102 = func_addr.i64 fn0 ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000 - - ; asm: call *%rax - call_indirect sig0, v100() ; bin: stk_ovf ff d0 - ; asm: call *%rsi - call_indirect sig0, v101() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v102() ; bin: stk_ovf 41 ff d2 - - ; asm: mov 0x0(%rip), %rcx - [-,%rcx] v3 = symbol_value.i64 gv0 ; bin: 48 8b 0d GOTPCRel4(%some_gv-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v4 = symbol_value.i64 gv0 ; bin: 48 8b 35 GOTPCRel4(%some_gv-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v5 = symbol_value.i64 gv0 ; bin: 4c 8b 15 GOTPCRel4(%some_gv-4) 00000000 - - ; asm: lea 0x0(%rip), %rcx - [-,%rcx] v6 = symbol_value.i64 gv1 ; bin: 48 8d 0d PCRel4(%some_gv-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v7 = symbol_value.i64 gv1 ; bin: 48 8d 35 PCRel4(%some_gv-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v8 = symbol_value.i64 gv1 ; bin: 4c 8d 15 PCRel4(%some_gv-4) 00000000 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif deleted file mode 100644 index c5e1cf5099..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ /dev/null @@ -1,1692 +0,0 @@ -; binary emission of x86-64 code. -test binemit -set opt_level=speed_and_size -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests for i64 instructions. -function %I64() { - sig0 = () - fn0 = %foo() - fn1 = colocated %bar() - - gv0 = symbol %some_gv - - ; Use incoming_arg stack slots because they won't be relocated by the frame - ; layout. - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Integer Constants. - - ; asm: movq $0x01020304f1f2f3f4, %rcx - [-,%rcx] v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4 - ; asm: movq $0x11020304f1f2f3f4, %rsi - [-,%rsi] v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4 - ; asm: movq $0x21020304f1f2f3f4, %r10 - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - ; asm: movl $0xff001122, %r8d # 32-bit zero-extended constant. - [-,%r8] v4 = iconst.i64 0xff00_1122 ; bin: 41 b8 ff001122 - ; asm: movq $0xffffffff88001122, %r14 # 32-bit sign-extended constant. - [-,%r14] v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122 - - ; asm: movb $1, %cl - [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 - ; asm: movb $1, %sil - [-,%r10] v9008 = bconst.b1 true ; bin: 41 ba 00000001 - - ; Integer Register Operations. - - ; asm: notq %rcx - [-,%rcx] v4000 = bnot v1 ; bin: 48 f7 d1 - ; asm: notq %rsi - [-,%rsi] v4001 = bnot v2 ; bin: 48 f7 d6 - ; asm: notq %r10 - [-,%r10] v4002 = bnot v3 ; bin: 49 f7 d2 - - ; Integer Register-Register Operations. - - ; asm: addq %rsi, %rcx - [-,%rcx] v10 = iadd v1, v2 ; bin: 48 01 f1 - ; asm: addq %r10, %rsi - [-,%rsi] v11 = iadd v2, v3 ; bin: 4c 01 d6 - ; asm: addq %rcx, %r10 - [-,%r10] v12 = iadd v3, v1 ; bin: 49 01 ca - - ; asm: subq %rsi, %rcx - [-,%rcx] v20 = isub v1, v2 ; bin: 48 29 f1 - ; asm: subq %r10, %rsi - [-,%rsi] v21 = isub v2, v3 ; bin: 4c 29 d6 - ; asm: subq %rcx, %r10 - [-,%r10] v22 = isub v3, v1 ; bin: 49 29 ca - - ; asm: andq %rsi, %rcx - [-,%rcx] v30 = band v1, v2 ; bin: 48 21 f1 - ; asm: andq %r10, %rsi - [-,%rsi] v31 = band v2, v3 ; bin: 4c 21 d6 - ; asm: andq %rcx, %r10 - [-,%r10] v32 = band v3, v1 ; bin: 49 21 ca - - ; asm: orq %rsi, %rcx - [-,%rcx] v40 = bor v1, v2 ; bin: 48 09 f1 - ; asm: orq %r10, %rsi - [-,%rsi] v41 = bor v2, v3 ; bin: 4c 09 d6 - ; asm: orq %rcx, %r10 - [-,%r10] v42 = bor v3, v1 ; bin: 49 09 ca - - ; asm: xorq %rsi, %rcx - [-,%rcx] v50 = bxor v1, v2 ; bin: 48 31 f1 - ; asm: xorq %r10, %rsi - [-,%rsi] v51 = bxor v2, v3 ; bin: 4c 31 d6 - ; asm: xorq %rcx, %r10 - [-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca - - ; asm: shlq %cl, %rsi - [-,%rsi] v60 = ishl v2, v1 ; bin: 48 d3 e6 - ; asm: shlq %cl, %r10 - [-,%r10] v61 = ishl v3, v1 ; bin: 49 d3 e2 - ; asm: sarq %cl, %rsi - [-,%rsi] v62 = sshr v2, v1 ; bin: 48 d3 fe - ; asm: sarq %cl, %r10 - [-,%r10] v63 = sshr v3, v1 ; bin: 49 d3 fa - ; asm: shrq %cl, %rsi - [-,%rsi] v64 = ushr v2, v1 ; bin: 48 d3 ee - ; asm: shrq %cl, %r10 - [-,%r10] v65 = ushr v3, v1 ; bin: 49 d3 ea - - ; asm: rolq %cl, %rsi - [-,%rsi] v66 = rotl v2, v1 ; bin: 48 d3 c6 - ; asm: rolq %cl, %r10 - [-,%r10] v67 = rotl v3, v1 ; bin: 49 d3 c2 - ; asm: rorq %cl, %rsi - [-,%rsi] v68 = rotr v2, v1 ; bin: 48 d3 ce - ; asm: rorq %cl, %r10 - [-,%r10] v69 = rotr v3, v1 ; bin: 49 d3 ca - - ; Integer Register-Immediate Operations. - ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. - ; Some take 8-bit immediates that are sign-extended to 64 bits. - - ; asm: addq $-100000, %rcx - [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 48 81 c1 fffe7960 - ; asm: addq $100000, %rsi - [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 48 81 c6 000186a0 - ; asm: addq $0x7fffffff, %r10 - [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff - ; asm: addq $100, %r8 - [-,%r8] v73 = iadd_imm v4, 100 ; bin: 49 83 c0 64 - ; asm: addq $-100, %r14 - [-,%r14] v74 = iadd_imm v5, -100 ; bin: 49 83 c6 9c - - ; asm: andq $-100000, %rcx - [-,%rcx] v80 = band_imm v1, -100000 ; bin: 48 81 e1 fffe7960 - ; asm: andq $100000, %rsi - [-,%rsi] v81 = band_imm v2, 100000 ; bin: 48 81 e6 000186a0 - ; asm: andq $0x7fffffff, %r10 - [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff - ; asm: andq $100, %r8 - [-,%r8] v83 = band_imm v4, 100 ; bin: 49 83 e0 64 - ; asm: andq $-100, %r14 - [-,%r14] v84 = band_imm v5, -100 ; bin: 49 83 e6 9c - - ; asm: orq $-100000, %rcx - [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 48 81 c9 fffe7960 - ; asm: orq $100000, %rsi - [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 48 81 ce 000186a0 - ; asm: orq $0x7fffffff, %r10 - [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 49 81 ca 7fffffff - ; asm: orq $100, %r8 - [-,%r8] v93 = bor_imm v4, 100 ; bin: 49 83 c8 64 - ; asm: orq $-100, %r14 - [-,%r14] v94 = bor_imm v5, -100 ; bin: 49 83 ce 9c - ; asm: ret - - ; asm: xorq $-100000, %rcx - [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 48 81 f1 fffe7960 - ; asm: xorq $100000, %rsi - [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 48 81 f6 000186a0 - ; asm: xorq $0x7fffffff, %r10 - [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff - ; asm: xorq $100, %r8 - [-,%r8] v103 = bxor_imm v4, 100 ; bin: 49 83 f0 64 - ; asm: xorq $-100, %r14 - [-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c - - ; Register copies. - - ; asm: movq %rsi, %rcx - [-,%rcx] v110 = copy v2 ; bin: 48 89 f1 - ; asm: movq %r10, %rsi - [-,%rsi] v111 = copy v3 ; bin: 4c 89 d6 - ; asm: movq %rcx, %r10 - [-,%r10] v112 = copy v1 ; bin: 49 89 ca - - ; Copy Special - ; asm: movq %rsp, %rbp - copy_special %rsp -> %rbp ; bin: 48 89 e5 - ; asm: movq %r10, %r11 - copy_special %r10 -> %r11 ; bin: 4d 89 d3 - ; asm: movq %rsp, %r11 - copy_special %rsp -> %r11 ; bin: 49 89 e3 - ; asm: movq %r10, %rsp - copy_special %r10 -> %rsp ; bin: 4c 89 d4 - - ; Copy to SSA - - ; asm: movq %rax, %r15 - [-,%r15] v700 = copy_to_ssa.i64 %rax ; bin: 49 89 c7 - ; asm: movq %r15, %rax - [-,%rax] v701 = copy_to_ssa.i64 %r15 ; bin: 4c 89 f8 - ; asm: movq %rdi, %rsi - [-,%rsi] v702 = copy_to_ssa.i64 %rdi ; bin: 48 89 fe - ; asm: movq %r11, %r14 - [-,%r14] v703 = copy_to_ssa.i64 %r11 ; bin: 4d 89 de - - ; asm: movl %eax, %r15d - [-,%r15] v704 = copy_to_ssa.i32 %rax ; bin: 41 89 c7 - ; asm: movl %r15d, %eax - [-,%rax] v705 = copy_to_ssa.i32 %r15 ; bin: 44 89 f8 - ; asm: movl %edi, %esi. Unfortunately we get a redundant REX prefix. - [-,%rsi] v706 = copy_to_ssa.i32 %rdi ; bin: 40 89 fe - ; asm: movl %r11, %r14 - [-,%r14] v707 = copy_to_ssa.i32 %r11 ; bin: 45 89 de - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movq %rcx, (%r10) - store v1, v3 ; bin: heap_oob 49 89 0a - ; asm: movq %r10, (%rcx) - store v3, v1 ; bin: heap_oob 4c 89 11 - ; asm: movl %ecx, (%r10) - istore32 v1, v3 ; bin: heap_oob 41 89 0a - ; asm: movl %r10d, (%rcx) - istore32 v3, v1 ; bin: heap_oob 44 89 11 - ; asm: movw %cx, (%r10) - istore16 v1, v3 ; bin: heap_oob 66 41 89 0a - ; asm: movw %r10w, (%rcx) - istore16 v3, v1 ; bin: heap_oob 66 44 89 11 - ; asm: movb %cl, (%r10) - istore8 v1, v3 ; bin: heap_oob 41 88 0a - ; asm: movb %r10b, (%rcx) - istore8 v3, v1 ; bin: heap_oob 44 88 11 - - ; asm: movq (%rcx), %r14 - [-,%r14] v120 = load.i64 v1 ; bin: heap_oob 4c 8b 31 - ; asm: movq (%r10), %rdx - [-,%rdx] v121 = load.i64 v3 ; bin: heap_oob 49 8b 12 - ; asm: movl (%rcx), %r14d - [-,%r14] v122 = uload32.i64 v1 ; bin: heap_oob 44 8b 31 - ; asm: movl (%r10), %edx - [-,%rdx] v123 = uload32.i64 v3 ; bin: heap_oob 41 8b 12 - ; asm: movslq (%rcx), %r14 - [-,%r14] v124 = sload32.i64 v1 ; bin: heap_oob 4c 63 31 - ; asm: movslq (%r10), %rdx - [-,%rdx] v125 = sload32.i64 v3 ; bin: heap_oob 49 63 12 - ; asm: movzwq (%rcx), %r14 - [-,%r14] v126 = uload16.i64 v1 ; bin: heap_oob 4c 0f b7 31 - ; asm: movzwq (%r10), %rdx - [-,%rdx] v127 = uload16.i64 v3 ; bin: heap_oob 49 0f b7 12 - ; asm: movswq (%rcx), %r14 - [-,%r14] v128 = sload16.i64 v1 ; bin: heap_oob 4c 0f bf 31 - ; asm: movswq (%r10), %rdx - [-,%rdx] v129 = sload16.i64 v3 ; bin: heap_oob 49 0f bf 12 - ; asm: movzbq (%rcx), %r14 - [-,%r14] v130 = uload8.i64 v1 ; bin: heap_oob 4c 0f b6 31 - ; asm: movzbq (%r10), %rdx - [-,%rdx] v131 = uload8.i64 v3 ; bin: heap_oob 49 0f b6 12 - ; asm: movsbq (%rcx), %r14 - [-,%r14] v132 = sload8.i64 v1 ; bin: heap_oob 4c 0f be 31 - ; asm: movsbq (%r10), %rdx - [-,%rdx] v133 = sload8.i64 v3 ; bin: heap_oob 49 0f be 12 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movq %rcx, 100(%r10) - store v1, v3+100 ; bin: heap_oob 49 89 4a 64 - ; asm: movq %r10, -100(%rcx) - store v3, v1-100 ; bin: heap_oob 4c 89 51 9c - ; asm: movl %ecx, 100(%r10) - istore32 v1, v3+100 ; bin: heap_oob 41 89 4a 64 - ; asm: movl %r10d, -100(%rcx) - istore32 v3, v1-100 ; bin: heap_oob 44 89 51 9c - ; asm: movw %cx, 100(%r10) - istore16 v1, v3+100 ; bin: heap_oob 66 41 89 4a 64 - ; asm: movw %r10w, -100(%rcx) - istore16 v3, v1-100 ; bin: heap_oob 66 44 89 51 9c - ; asm: movb %cl, 100(%r10) - istore8 v1, v3+100 ; bin: heap_oob 41 88 4a 64 - ; asm: movb %r10b, 100(%rcx) - istore8 v3, v1+100 ; bin: heap_oob 44 88 51 64 - - ; asm: movq 50(%rcx), %r10 - [-,%r10] v140 = load.i64 v1+50 ; bin: heap_oob 4c 8b 51 32 - ; asm: movq -50(%r10), %rdx - [-,%rdx] v141 = load.i64 v3-50 ; bin: heap_oob 49 8b 52 ce - ; asm: movl 50(%rcx), %edi - [-,%rdi] v142 = uload32.i64 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%rsi), %edx - [-,%rdx] v143 = uload32.i64 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movslq 50(%rcx), %rdi - [-,%rdi] v144 = sload32.i64 v1+50 ; bin: heap_oob 48 63 79 32 - ; asm: movslq -50(%rsi), %rdx - [-,%rdx] v145 = sload32.i64 v2-50 ; bin: heap_oob 48 63 56 ce - ; asm: movzwq 50(%rcx), %rdi - [-,%rdi] v146 = uload16.i64 v1+50 ; bin: heap_oob 48 0f b7 79 32 - ; asm: movzwq -50(%rsi), %rdx - [-,%rdx] v147 = uload16.i64 v2-50 ; bin: heap_oob 48 0f b7 56 ce - ; asm: movswq 50(%rcx), %rdi - [-,%rdi] v148 = sload16.i64 v1+50 ; bin: heap_oob 48 0f bf 79 32 - ; asm: movswq -50(%rsi), %rdx - [-,%rdx] v149 = sload16.i64 v2-50 ; bin: heap_oob 48 0f bf 56 ce - ; asm: movzbq 50(%rcx), %rdi - [-,%rdi] v150 = uload8.i64 v1+50 ; bin: heap_oob 48 0f b6 79 32 - ; asm: movzbq -50(%rsi), %rdx - [-,%rdx] v151 = uload8.i64 v2-50 ; bin: heap_oob 48 0f b6 56 ce - ; asm: movsbq 50(%rcx), %rdi - [-,%rdi] v152 = sload8.i64 v1+50 ; bin: heap_oob 48 0f be 79 32 - ; asm: movsbq -50(%rsi), %rdx - [-,%rdx] v153 = sload8.i64 v2-50 ; bin: heap_oob 48 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movq %rcx, 10000(%r10) - store v1, v3+10000 ; bin: heap_oob 49 89 8a 00002710 - ; asm: movq %r10, -10000(%rcx) - store v3, v1-10000 ; bin: heap_oob 4c 89 91 ffffd8f0 - ; asm: movl %ecx, 10000(%rsi) - istore32 v1, v2+10000 ; bin: heap_oob 89 8e 00002710 - ; asm: movl %esi, -10000(%rcx) - istore32 v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 - ; asm: movw %cx, 10000(%rsi) - istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 - ; asm: movw %si, -10000(%rcx) - istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 - ; asm: movb %cl, 10000(%rsi) - istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 - ; asm: movb %sil, 10000(%rcx) - istore8 v2, v1+10000 ; bin: heap_oob 40 88 b1 00002710 - - ; asm: movq 50000(%rcx), %r10 - [-,%r10] v160 = load.i64 v1+50000 ; bin: heap_oob 4c 8b 91 0000c350 - ; asm: movq -50000(%r10), %rdx - [-,%rdx] v161 = load.i64 v3-50000 ; bin: heap_oob 49 8b 92 ffff3cb0 - ; asm: movl 50000(%rcx), %edi - [-,%rdi] v162 = uload32.i64 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%rsi), %edx - [-,%rdx] v163 = uload32.i64 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movslq 50000(%rcx), %rdi - [-,%rdi] v164 = sload32.i64 v1+50000 ; bin: heap_oob 48 63 b9 0000c350 - ; asm: movslq -50000(%rsi), %rdx - [-,%rdx] v165 = sload32.i64 v2-50000 ; bin: heap_oob 48 63 96 ffff3cb0 - ; asm: movzwq 50000(%rcx), %rdi - [-,%rdi] v166 = uload16.i64 v1+50000 ; bin: heap_oob 48 0f b7 b9 0000c350 - ; asm: movzwq -50000(%rsi), %rdx - [-,%rdx] v167 = uload16.i64 v2-50000 ; bin: heap_oob 48 0f b7 96 ffff3cb0 - ; asm: movswq 50000(%rcx), %rdi - [-,%rdi] v168 = sload16.i64 v1+50000 ; bin: heap_oob 48 0f bf b9 0000c350 - ; asm: movswq -50000(%rsi), %rdx - [-,%rdx] v169 = sload16.i64 v2-50000 ; bin: heap_oob 48 0f bf 96 ffff3cb0 - ; asm: movzbq 50000(%rcx), %rdi - [-,%rdi] v170 = uload8.i64 v1+50000 ; bin: heap_oob 48 0f b6 b9 0000c350 - ; asm: movzbq -50000(%rsi), %rdx - [-,%rdx] v171 = uload8.i64 v2-50000 ; bin: heap_oob 48 0f b6 96 ffff3cb0 - ; asm: movsbq 50000(%rcx), %rdi - [-,%rdi] v172 = sload8.i64 v1+50000 ; bin: heap_oob 48 0f be b9 0000c350 - ; asm: movsbq -50000(%rsi), %rdx - [-,%rdx] v173 = sload8.i64 v2-50000 ; bin: heap_oob 48 0f be 96 ffff3cb0 - - - ; More arithmetic. - - ; asm: imulq %rsi, %rcx - [-,%rcx] v180 = imul v1, v2 ; bin: 48 0f af ce - ; asm: imulq %r10, %rsi - [-,%rsi] v181 = imul v2, v3 ; bin: 49 0f af f2 - ; asm: imulq %rcx, %r10 - [-,%r10] v182 = imul v3, v1 ; bin: 4c 0f af d1 - - [-,%rax] v190 = iconst.i64 1 - [-,%rdx] v191 = iconst.i64 2 - ; asm: idivq %rcx - [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1 ; bin: int_divz 48 f7 f9 - ; asm: idivq %rsi - [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2 ; bin: int_divz 48 f7 fe - ; asm: idivq %r10 - [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3 ; bin: int_divz 49 f7 fa - ; asm: divq %rcx - [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1 ; bin: int_divz 48 f7 f1 - ; asm: divq %rsi - [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2 ; bin: int_divz 48 f7 f6 - ; asm: divq %r10 - [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3 ; bin: int_divz 49 f7 f2 - - ; double-length multiply instructions, 64 bit - [-,%rax] v1001 = iconst.i64 1 - [-,%r15] v1002 = iconst.i64 2 - ; asm: mulq %r15 - [-,%rax,%rdx] v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7 - ; asm: imulq %r15 - [-,%rax,%rdx] v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef - - ; double-length multiply instructions, 32 bit - [-,%rax] v1011 = iconst.i32 1 - [-,%r15] v1012 = iconst.i32 2 - [-,%rcx] v1017 = iconst.i32 3 - ; asm: mull %r15d - [-,%rax,%rdx] v1013, v1014 = x86_umulx v1011, v1012 ; bin: 41 f7 e7 - ; asm: imull %r15d - [-,%rax,%rdx] v1015, v1016 = x86_smulx v1011, v1012 ; bin: 41 f7 ef - - ; asm: mull %ecx - [-,%rax,%rdx] v1018, v1019 = x86_umulx v1011, v1017 ; bin: f7 e1 - ; asm: imull %ecx - [-,%rax,%rdx] v1020, v1021 = x86_smulx v1011, v1017 ; bin: f7 e9 - - ; Bit-counting instructions. - - ; asm: popcntq %rsi, %rcx - [-,%rcx] v210 = popcnt v2 ; bin: f3 48 0f b8 ce - ; asm: popcntq %r10, %rsi - [-,%rsi] v211 = popcnt v3 ; bin: f3 49 0f b8 f2 - ; asm: popcntq %rcx, %r10 - [-,%r10] v212 = popcnt v1 ; bin: f3 4c 0f b8 d1 - - ; asm: lzcntq %rsi, %rcx - [-,%rcx] v213 = clz v2 ; bin: f3 48 0f bd ce - ; asm: lzcntq %r10, %rsi - [-,%rsi] v214 = clz v3 ; bin: f3 49 0f bd f2 - ; asm: lzcntq %rcx, %r10 - [-,%r10] v215 = clz v1 ; bin: f3 4c 0f bd d1 - - ; asm: tzcntq %rsi, %rcx - [-,%rcx] v216 = ctz v2 ; bin: f3 48 0f bc ce - ; asm: tzcntq %r10, %rsi - [-,%rsi] v217 = ctz v3 ; bin: f3 49 0f bc f2 - ; asm: tzcntq %rcx, %r10 - [-,%r10] v218 = ctz v1 ; bin: f3 4c 0f bc d1 - - ; Integer comparisons. - - ; asm: cmpq %rsi, %rcx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 48 39 f1 0f 94 c3 - ; asm: cmpq %r10, %rsi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v3 ; bin: 4c 39 d6 0f 94 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 48 39 f1 0f 95 c3 - ; asm: cmpq %r10, %rsi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v3 ; bin: 4c 39 d6 0f 95 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 48 39 f1 0f 9c c3 - ; asm: cmpq %r10, %rsi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v3 ; bin: 4c 39 d6 0f 9c c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 48 39 f1 0f 9d c3 - ; asm: cmpq %r10, %rsi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v3 ; bin: 4c 39 d6 0f 9d c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 48 39 f1 0f 9f c3 - ; asm: cmpq %r10, %rsi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 4c 39 d6 0f 9f c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 48 39 f1 0f 9e c3 - ; asm: cmpq %r10, %rsi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v3 ; bin: 4c 39 d6 0f 9e c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 48 39 f1 0f 92 c3 - ; asm: cmpq %r10, %rsi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v3 ; bin: 4c 39 d6 0f 92 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 48 39 f1 0f 93 c3 - ; asm: cmpq %r10, %rsi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v3 ; bin: 4c 39 d6 0f 93 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 48 39 f1 0f 97 c3 - ; asm: cmpq %r10, %rsi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 4c 39 d6 0f 97 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 48 39 f1 0f 96 c3 - ; asm: cmpq %r10, %rsi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2 - - ; asm: cmpq $37, %rcx - ; asm: setl %bl - [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 48 83 f9 25 0f 9c c3 - - ; asm: cmpq $100000, %rcx - ; asm: setl %bl - [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3 - - ; Bool-to-int conversions. - - ; asm: movzbq %bl, %rcx - [-,%rcx] v350 = bint.i64 v300 ; bin: 0f b6 cb - ; asm: movzbq %dl, %rsi - [-,%rsi] v351 = bint.i64 v301 ; bin: 0f b6 f2 - - ; Colocated functions. - - ; asm: call bar - call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 - - ; asm: lea 0x0(%rip), %rcx - [-,%rcx] v400 = func_addr.i64 fn1 ; bin: 48 8d 0d PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v401 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v402 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 - - ; asm: call *%rcx - call_indirect sig0, v400() ; bin: stk_ovf ff d1 - ; asm: call *%rsi - call_indirect sig0, v401() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v402() ; bin: stk_ovf 41 ff d2 - - ; Non-colocated functions. Note that there is no non-colocated non-PIC call. - - ; asm: movabsq $0, %rcx - [-,%rcx] v410 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %rsi - [-,%rsi] v411 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %r10 - [-,%r10] v412 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) 0000000000000000 - - ; asm: call *%rcx - call_indirect sig0, v410() ; bin: stk_ovf ff d1 - ; asm: call *%rsi - call_indirect sig0, v411() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v412() ; bin: stk_ovf 41 ff d2 - - ; asm: movabsq $-1, %rcx - [-,%rcx] v450 = symbol_value.i64 gv0 ; bin: 48 b9 Abs8(%some_gv) 0000000000000000 - ; asm: movabsq $-1, %rsi - [-,%rsi] v451 = symbol_value.i64 gv0 ; bin: 48 be Abs8(%some_gv) 0000000000000000 - ; asm: movabsq $-1, %r10 - [-,%r10] v452 = symbol_value.i64 gv0 ; bin: 49 ba Abs8(%some_gv) 0000000000000000 - - ; Spill / Fill. - - ; asm: movq %rcx, 1032(%rsp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000408 - ; asm: movq %rsi, 1032(%rsp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 48 89 b4 24 00000408 - ; asm: movq %r10, 1032(%rsp) - [-,ss1] v502 = spill v3 ; bin: stk_ovf 4c 89 94 24 00000408 - - ; asm: movq 1032(%rsp), %rcx - [-,%rcx] v510 = fill v500 ; bin: 48 8b 8c 24 00000408 - ; asm: movq 1032(%rsp), %rsi - [-,%rsi] v511 = fill v501 ; bin: 48 8b b4 24 00000408 - ; asm: movq 1032(%rsp), %r10 - [-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408 - - ; asm: movq %rcx, 1032(%rsp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 48 89 8c 24 00000408 - ; asm: movq 1032(%rsp), %rcx - regfill v1, ss1 -> %rcx ; bin: 48 8b 8c 24 00000408 - - ; Push and Pop - ; asm: pushq %rcx - x86_push v1 ; bin: stk_ovf 51 - ; asm: pushq %r10 - x86_push v3 ; bin: stk_ovf 41 52 - ; asm: popq %rcx - [-,%rcx] v513 = x86_pop.i64 ; bin: 59 - ; asm: popq %r10 - [-,%r10] v514 = x86_pop.i64 ; bin: 41 5a - - ; Adjust Stack Pointer Up - ; asm: addq $64, %rsp - adjust_sp_up_imm 64 ; bin: 48 83 c4 40 - ; asm: addq $-64, %rsp - adjust_sp_up_imm -64 ; bin: 48 83 c4 c0 - ; asm: addq $1024, %rsp - adjust_sp_up_imm 1024 ; bin: 48 81 c4 00000400 - ; asm: addq $-1024, %rsp - adjust_sp_up_imm -1024 ; bin: 48 81 c4 fffffc00 - ; asm: addq $2147483647, %rsp - adjust_sp_up_imm 2147483647 ; bin: 48 81 c4 7fffffff - ; asm: addq $-2147483648, %rsp - adjust_sp_up_imm -2147483648 ; bin: 48 81 c4 80000000 - - ; Adjust Stack Pointer Down - ; asm: subq %rcx, %rsp - adjust_sp_down v1 ; bin: 48 29 cc - ; asm: subq %r10, %rsp - adjust_sp_down v3 ; bin: 4c 29 d4 - ; asm: subq $64, %rsp - adjust_sp_down_imm 64 ; bin: 48 83 ec 40 - ; asm: subq $-64, %rsp - adjust_sp_down_imm -64 ; bin: 48 83 ec c0 - ; asm: subq $1024, %rsp - adjust_sp_down_imm 1024 ; bin: 48 81 ec 00000400 - ; asm: subq $-1024, %rsp - adjust_sp_down_imm -1024 ; bin: 48 81 ec fffffc00 - ; asm: subq $2147483647, %rsp - adjust_sp_down_imm 2147483647 ; bin: 48 81 ec 7fffffff - ; asm: subq $-2147483648, %rsp - adjust_sp_down_imm -2147483648 ; bin: 48 81 ec 80000000 - - ; Shift immediates - ; asm: shlq $12, %rsi - [-,%rsi] v515 = ishl_imm v2, 12 ; bin: 48 c1 e6 0c - ; asm: shlq $13, %r8 - [-,%r8] v516 = ishl_imm v4, 13 ; bin: 49 c1 e0 0d - ; asm: sarq $32, %rsi - [-,%rsi] v517 = sshr_imm v2, 32 ; bin: 48 c1 fe 20 - ; asm: sarq $33, %r8 - [-,%r8] v518 = sshr_imm v4, 33 ; bin: 49 c1 f8 21 - ; asm: shrq $62, %rsi - [-,%rsi] v519 = ushr_imm v2, 62 ; bin: 48 c1 ee 3e - ; asm: shrq $63, %r8 - [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f - - - ; Rotate immediates - ; asm: rolq $12, %rsi - [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: 48 c1 c6 0c - ; asm: rolq $13, %r8 - [-,%r8] v5102 = rotl_imm v4, 13 ; bin: 49 c1 c0 0d - ; asm: rorq $32, %rsi - [-,%rsi] v5103 = rotr_imm v2, 32 ; bin: 48 c1 ce 20 - ; asm: rorq $33, %r8 - [-,%r8] v5104 = rotr_imm v4, 33 ; bin: 49 c1 c8 21 - - - ; Load Complex - [-,%rax] v521 = iconst.i64 1 - [-,%rbx] v522 = iconst.i64 1 - [-,%rdi] v523 = iconst.i32 1 - [-,%rsi] v524 = iconst.i32 1 - ; asm: movq (%rax,%rbx,1), %rcx - [-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18 - ; asm: movl (%rax,%rbx,1), %ecx - [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movq 1(%rax,%rbx,1), %rcx - [-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01 - ; asm: movl 1(%rax,%rbx,1), %ecx - [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 - ; asm: mov 0x100000(%rax,%rbx,1),%rcx - [-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000 - ; asm: mov 0x100000(%rax,%rbx,1),%ecx - [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 - ; asm: movzbq (%rax,%rbx,1),%rcx - [-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18 - ; asm: movzbl (%rax,%rbx,1),%ecx - [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 - ; asm: movsbq (%rax,%rbx,1),%rcx - [-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18 - ; asm: movsbl (%rax,%rbx,1),%ecx - [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 - ; asm: movzwq (%rax,%rbx,1),%rcx - [-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18 - ; asm: movzwl (%rax,%rbx,1),%ecx - [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 - ; asm: movswq (%rax,%rbx,1),%rcx - [-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18 - ; asm: movswl (%rax,%rbx,1),%ecx - [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 - ; asm: mov (%rax,%rbx,1),%ecx - [-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movslq (%rax,%rbx,1),%rcx - [-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18 - [-,%r13] v550 = iconst.i64 1 - [-,%r14] v551 = iconst.i64 1 - ; asm: mov 0x0(%r13,%r14,1),%r12d - [-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00 - - ; Store Complex - [-,%rcx] v600 = iconst.i64 1 - [-,%rcx] v601 = iconst.i32 1 - [-,%r10] v602 = iconst.i64 1 - [-,%r11] v603 = iconst.i32 1 - ; asm: mov %rcx,(%rax,%rbx,1) - store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18 - ; asm: mov %rcx,0x1(%rax,%rbx,1) - store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01 - ; asm: mov %rcx,0x100000(%rax,%rbx,1) - store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000 - ; asm: mov %ecx,(%rax,%rbx,1) - store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %ecx,0x1(%rax,%rbx,1) - store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 - ; asm: mov %ecx,0x100000(%rax,%rbx,1) - store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 - ; asm: mov %ecx,(%rax,%rbx,1) - istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %cx,(%rax,%rbx,1) - istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %cx,(%rax,%rbx,1) - istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %r10w,(%rax,%rbx,1) - istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18 - ; asm: mov %r11w,(%rax,%rbx,1) - istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18 - ; asm: mov %cl,(%rax,%rbx,1) - istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18 - ; asm: mov %cl,(%rax,%rbx,1) - istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 - - ; asm: testq %rcx, %rcx - ; asm: je block1 - brz v1, block1 ; bin: 48 85 c9 74 1b - fallthrough block3 - -block3: - ; asm: testq %rsi, %rsi - ; asm: je block1 - brz v2, block1 ; bin: 48 85 f6 74 16 - fallthrough block4 - -block4: - ; asm: testq %r10, %r10 - ; asm: je block1 - brz v3, block1 ; bin: 4d 85 d2 74 11 - fallthrough block5 - -block5: - ; asm: testq %rcx, %rcx - ; asm: jne block1 - brnz v1, block1 ; bin: 48 85 c9 75 0c - fallthrough block6 - -block6: - ; asm: testq %rsi, %rsi - ; asm: jne block1 - brnz v2, block1 ; bin: 48 85 f6 75 07 - fallthrough block7 - -block7: - ; asm: testq %r10, %r10 - ; asm: jne block1 - brnz v3, block1 ; bin: 4d 85 d2 75 02 - - ; asm: jmp block2 - jump block2 ; bin: eb 01 - - ; asm: block1: -block1: - return ; bin: c3 - - ; asm: block2: -block2: - ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. - ; asm: notq %rcx - [-,%rcx] v5000 = bnot v1 ; bin: 48 f7 d1 - jump block1 ; bin: eb fa -} - -; CPU flag instructions. -function %cpu_flags_I64() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%r10] v2 = iconst.i64 2 - jump block1 - -block1: - ; asm: cmpq %r10, %rcx - [-,%rflags] v10 = ifcmp v1, v2 ; bin: 4c 39 d1 - ; asm: cmpq %rcx, %r10 - [-,%rflags] v11 = ifcmp v2, v1 ; bin: 49 39 ca - - ; asm: je block1 - brif eq v11, block1 ; bin: 74 f8 - jump block2 - -block2: - ; asm: jne block1 - brif ne v11, block1 ; bin: 75 f6 - jump block3 - -block3: - ; asm: jl block1 - brif slt v11, block1 ; bin: 7c f4 - jump block4 - -block4: - ; asm: jge block1 - brif sge v11, block1 ; bin: 7d f2 - jump block5 - -block5: - ; asm: jg block1 - brif sgt v11, block1 ; bin: 7f f0 - jump block6 - -block6: - ; asm: jle block1 - brif sle v11, block1 ; bin: 7e ee - jump block7 - -block7: - ; asm: jb block1 - brif ult v11, block1 ; bin: 72 ec - jump block8 - -block8: - ; asm: jae block1 - brif uge v11, block1 ; bin: 73 ea - jump block9 - -block9: - ; asm: ja block1 - brif ugt v11, block1 ; bin: 77 e8 - jump block10 - -block10: - ; asm: jbe block1 - brif ule v11, block1 ; bin: 76 e6 - jump block11 - -block11: - - ; asm: sete %bl - [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 - ; asm: setne %bl - [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 - ; asm: setl %dl - [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 - ; asm: setge %dl - [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 - ; asm: setg %r10b - [-,%r10] v24 = trueif sgt v11 ; bin: 41 0f 9f c2 - ; asm: setle %r10b - [-,%r10] v25 = trueif sle v11 ; bin: 41 0f 9e c2 - ; asm: setb %r14b - [-,%r14] v26 = trueif ult v11 ; bin: 41 0f 92 c6 - ; asm: setae %r14b - [-,%r14] v27 = trueif uge v11 ; bin: 41 0f 93 c6 - ; asm: seta %r11b - [-,%r11] v28 = trueif ugt v11 ; bin: 41 0f 97 c3 - ; asm: setbe %r11b - [-,%r11] v29 = trueif ule v11 ; bin: 41 0f 96 c3 - - ; The trapif instructions are encoded as macros: a conditional jump over a ud2. - ; asm: jne .+4; ud2 - trapif eq v11, user0 ; bin: 75 02 user0 0f 0b - ; asm: je .+4; ud2 - trapif ne v11, user0 ; bin: 74 02 user0 0f 0b - ; asm: jnl .+4; ud2 - trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b - ; asm: jnge .+4; ud2 - trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b - ; asm: jng .+4; ud2 - trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b - ; asm: jnle .+4; ud2 - trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapif ult v11, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapif uge v11, user0 ; bin: 72 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapif ule v11, user0 ; bin: 77 02 user0 0f 0b - ; asm: jo .+4; ud2 - trapif of v11, user0 ; bin: 71 02 user0 0f 0b - ; asm: jno .+4; ud2 - trapif nof v11, user0 ; bin: 70 02 user0 0f 0b - - ; Debug trap. - debugtrap ; bin: cc - - ; Stack check. - ; asm: cmpq %rsp, %rcx - [-,%rflags] v40 = ifcmp_sp v1 ; bin: 48 39 e1 - ; asm: cmpq %rsp, %r10 - [-,%rflags] v41 = ifcmp_sp v2 ; bin: 49 39 e2 - - ; asm: cmpq $-100, %rcx - [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 48 83 f9 9c - ; asm: cmpq $100, %r10 - [-,%rflags] v523 = ifcmp_imm v2, 100 ; bin: 49 83 fa 64 - - ; asm: cmpq $-10000, %rcx - [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0 - ; asm: cmpq $10000, %r10 - [-,%rflags] v525 = ifcmp_imm v2, 10000 ; bin: 49 81 fa 00002710 - - - return -} - -; Test for the encoding of outgoing_arg stack slots. -function %outargs() { - ss0 = incoming_arg 16, offset -16 - ss1 = outgoing_arg 8, offset 8 - ss2 = outgoing_arg 8, offset 0 - -block0: - [-,%rcx] v1 = iconst.i64 1 - - ; asm: movq %rcx, 8(%rsp) - [-,ss1] v10 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000008 - ; asm: movq %rcx, (%rsp) - [-,ss2] v11 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000000 - - return -} - -; Tests for i32 instructions in 64-bit mode. -; -; Note that many i32 instructions can be encoded both with and without a REX -; prefix if they only use the low 8 registers. Here, we are testing the REX -; encodings which are chosen by default. Switching to non-REX encodings should -; be done by an instruction shrinking pass. -function %I32() { - sig0 = () - fn0 = %foo() - - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Integer Constants. - - ; asm: movl $0x01020304, %ecx - [-,%rcx] v1 = iconst.i32 0x0102_0304 ; bin: b9 01020304 - ; asm: movl $0x11020304, %esi - [-,%rsi] v2 = iconst.i32 0x1102_0304 ; bin: be 11020304 - ; asm: movl $0x21020304, %r10d - [-,%r10] v3 = iconst.i32 0x2102_0304 ; bin: 41 ba 21020304 - ; asm: movl $0xff001122, %r8d - [-,%r8] v4 = iconst.i32 0xff00_1122 ; bin: 41 b8 ff001122 - ; asm: movl $0x88001122, %r14d - [-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122 - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movl (%rcx), %edi - [-,%rdi] v10 = load.i32 v1 ; bin: heap_oob 8b 39 - ; asm: movl (%rsi), %edx - [-,%rdx] v11 = load.i32 v2 ; bin: heap_oob 8b 16 - ; asm: movzwl (%rcx), %edi - [-,%rdi] v12 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 - ; asm: movzwl (%rsi), %edx - [-,%rdx] v13 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 - ; asm: movswl (%rcx), %edi - [-,%rdi] v14 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 - ; asm: movswl (%rsi), %edx - [-,%rdx] v15 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 - ; asm: movzbl (%rcx), %edi - [-,%rdi] v16 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 - ; asm: movzbl (%rsi), %edx - [-,%rdx] v17 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 - ; asm: movsbl (%rcx), %edi - [-,%rdi] v18 = sload8.i32 v1 ; bin: heap_oob 0f be 39 - ; asm: movsbl (%rsi), %edx - [-,%rdx] v19 = sload8.i32 v2 ; bin: heap_oob 0f be 16 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movl 50(%rcx), %edi - [-,%rdi] v20 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%rsi), %edx - [-,%rdx] v21 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movzwl 50(%rcx), %edi - [-,%rdi] v22 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 - ; asm: movzwl -50(%rsi), %edx - [-,%rdx] v23 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce - ; asm: movswl 50(%rcx), %edi - [-,%rdi] v24 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 - ; asm: movswl -50(%rsi), %edx - [-,%rdx] v25 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce - ; asm: movzbl 50(%rcx), %edi - [-,%rdi] v26 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 - ; asm: movzbl -50(%rsi), %edx - [-,%rdx] v27 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce - ; asm: movsbl 50(%rcx), %edi - [-,%rdi] v28 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 - ; asm: movsbl -50(%rsi), %edx - [-,%rdx] v29 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movl 50000(%rcx), %edi - [-,%rdi] v30 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%rsi), %edx - [-,%rdx] v31 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movzwl 50000(%rcx), %edi - [-,%rdi] v32 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 - ; asm: movzwl -50000(%rsi), %edx - [-,%rdx] v33 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 - ; asm: movswl 50000(%rcx), %edi - [-,%rdi] v34 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 - ; asm: movswl -50000(%rsi), %edx - [-,%rdx] v35 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 - ; asm: movzbl 50000(%rcx), %edi - [-,%rdi] v36 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 - ; asm: movzbl -50000(%rsi), %edx - [-,%rdx] v37 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 - ; asm: movsbl 50000(%rcx), %edi - [-,%rdi] v38 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 - ; asm: movsbl -50000(%rsi), %edx - [-,%rdx] v39 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 - - ; Integer Register Operations. - - ; asm: notl %ecx - [-,%rcx] v4000 = bnot v1 ; bin: f7 d1 - ; asm: notl %esi - [-,%rsi] v4001 = bnot v2 ; bin: f7 d6 - ; asm: notl %r10d - [-,%r10] v4002 = bnot v3 ; bin: 41 f7 d2 - - ; Integer Register-Register Operations. - - ; asm: addl %esi, %ecx - [-,%rcx] v40 = iadd v1, v2 ; bin: 01 f1 - ; asm: addl %r10d, %esi - [-,%rsi] v41 = iadd v2, v3 ; bin: 44 01 d6 - ; asm: addl %ecx, %r10d - [-,%r10] v42 = iadd v3, v1 ; bin: 41 01 ca - - ; asm: subl %esi, %ecx - [-,%rcx] v50 = isub v1, v2 ; bin: 29 f1 - ; asm: subl %r10d, %esi - [-,%rsi] v51 = isub v2, v3 ; bin: 44 29 d6 - ; asm: subl %ecx, %r10d - [-,%r10] v52 = isub v3, v1 ; bin: 41 29 ca - - ; asm: andl %esi, %ecx - [-,%rcx] v60 = band v1, v2 ; bin: 21 f1 - ; asm: andl %r10d, %esi - [-,%rsi] v61 = band v2, v3 ; bin: 44 21 d6 - ; asm: andl %ecx, %r10d - [-,%r10] v62 = band v3, v1 ; bin: 41 21 ca - - ; asm: orl %esi, %ecx - [-,%rcx] v70 = bor v1, v2 ; bin: 09 f1 - ; asm: orl %r10d, %esi - [-,%rsi] v71 = bor v2, v3 ; bin: 44 09 d6 - ; asm: orl %ecx, %r10d - [-,%r10] v72 = bor v3, v1 ; bin: 41 09 ca - - ; asm: xorl %esi, %ecx - [-,%rcx] v80 = bxor v1, v2 ; bin: 31 f1 - ; asm: xorl %r10d, %esi - [-,%rsi] v81 = bxor v2, v3 ; bin: 44 31 d6 - ; asm: xorl %ecx, %r10d - [-,%r10] v82 = bxor v3, v1 ; bin: 41 31 ca - - ; asm: shll %cl, %esi - [-,%rsi] v90 = ishl v2, v1 ; bin: d3 e6 - ; asm: shll %cl, %r10d - [-,%r10] v91 = ishl v3, v1 ; bin: 41 d3 e2 - ; asm: sarl %cl, %esi - [-,%rsi] v92 = sshr v2, v1 ; bin: d3 fe - ; asm: sarl %cl, %r10d - [-,%r10] v93 = sshr v3, v1 ; bin: 41 d3 fa - ; asm: shrl %cl, %esi - [-,%rsi] v94 = ushr v2, v1 ; bin: d3 ee - ; asm: shrl %cl, %r10d - [-,%r10] v95 = ushr v3, v1 ; bin: 41 d3 ea - - ; asm: roll %cl, %esi - [-,%rsi] v96 = rotl v2, v1 ; bin: d3 c6 - ; asm: roll %cl, %r10d - [-,%r10] v97 = rotl v3, v1 ; bin: 41 d3 c2 - ; asm: rorl %cl, %esi - [-,%rsi] v98 = rotr v2, v1 ; bin: d3 ce - ; asm: rorl %cl, %r10d - [-,%r10] v99 = rotr v3, v1 ; bin: 41 d3 ca - - ; Integer Register-Immediate Operations. - ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. - ; Some take 8-bit immediates that are sign-extended to 64 bits. - - ; asm: addl $-100000, %ecx - [-,%rcx] v100 = iadd_imm v1, -100000 ; bin: 81 c1 fffe7960 - ; asm: addl $100000, %esi - [-,%rsi] v101 = iadd_imm v2, 100000 ; bin: 81 c6 000186a0 - ; asm: addl $0x7fffffff, %r10d - [-,%r10] v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff - ; asm: addl $100, %r8d - [-,%r8] v103 = iadd_imm v4, 100 ; bin: 41 83 c0 64 - ; asm: addl $-100, %r14d - [-,%r14] v104 = iadd_imm v5, -100 ; bin: 41 83 c6 9c - - ; asm: andl $-100000, %ecx - [-,%rcx] v110 = band_imm v1, -100000 ; bin: 81 e1 fffe7960 - ; asm: andl $100000, %esi - [-,%rsi] v111 = band_imm v2, 100000 ; bin: 81 e6 000186a0 - ; asm: andl $0x7fffffff, %r10d - [-,%r10] v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff - ; asm: andl $100, %r8d - [-,%r8] v113 = band_imm v4, 100 ; bin: 41 83 e0 64 - ; asm: andl $-100, %r14d - [-,%r14] v114 = band_imm v5, -100 ; bin: 41 83 e6 9c - - ; asm: orl $-100000, %ecx - [-,%rcx] v120 = bor_imm v1, -100000 ; bin: 81 c9 fffe7960 - ; asm: orl $100000, %esi - [-,%rsi] v121 = bor_imm v2, 100000 ; bin: 81 ce 000186a0 - ; asm: orl $0x7fffffff, %r10d - [-,%r10] v122 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff - ; asm: orl $100, %r8d - [-,%r8] v123 = bor_imm v4, 100 ; bin: 41 83 c8 64 - ; asm: orl $-100, %r14d - [-,%r14] v124 = bor_imm v5, -100 ; bin: 41 83 ce 9c - ; asm: ret - - ; asm: xorl $-100000, %ecx - [-,%rcx] v130 = bxor_imm v1, -100000 ; bin: 81 f1 fffe7960 - ; asm: xorl $100000, %esi - [-,%rsi] v131 = bxor_imm v2, 100000 ; bin: 81 f6 000186a0 - ; asm: xorl $0x7fffffff, %r10d - [-,%r10] v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff - ; asm: xorl $100, %r8d - [-,%r8] v133 = bxor_imm v4, 100 ; bin: 41 83 f0 64 - ; asm: xorl $-100, %r14d - [-,%r14] v134 = bxor_imm v5, -100 ; bin: 41 83 f6 9c - - ; Register copies. - - ; asm: movl %esi, %ecx - [-,%rcx] v140 = copy v2 ; bin: 89 f1 - ; asm: movl %r10d, %esi - [-,%rsi] v141 = copy v3 ; bin: 44 89 d6 - ; asm: movl %ecx, %r10d - [-,%r10] v142 = copy v1 ; bin: 41 89 ca - - ; More arithmetic. - - ; asm: imull %esi, %ecx - [-,%rcx] v150 = imul v1, v2 ; bin: 0f af ce - ; asm: imull %r10d, %esi - [-,%rsi] v151 = imul v2, v3 ; bin: 41 0f af f2 - ; asm: imull %ecx, %r10d - [-,%r10] v152 = imul v3, v1 ; bin: 44 0f af d1 - - [-,%rax] v160 = iconst.i32 1 - [-,%rdx] v161 = iconst.i32 2 - ; asm: idivl %ecx - [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1 ; bin: int_divz f7 f9 - ; asm: idivl %esi - [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2 ; bin: int_divz f7 fe - ; asm: idivl %r10d - [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3 ; bin: int_divz 41 f7 fa - ; asm: divl %ecx - [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1 ; bin: int_divz f7 f1 - ; asm: divl %esi - [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2 ; bin: int_divz f7 f6 - ; asm: divl %r10d - [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3 ; bin: int_divz 41 f7 f2 - - ; Bit-counting instructions. - - ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce - ; asm: popcntl %r10d, %esi - [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 - ; asm: popcntl %ecx, %r10d - [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 - - ; asm: lzcntl %esi, %ecx - [-,%rcx] v203 = clz v2 ; bin: f3 0f bd ce - ; asm: lzcntl %r10d, %esi - [-,%rsi] v204 = clz v3 ; bin: f3 41 0f bd f2 - ; asm: lzcntl %ecx, %r10d - [-,%r10] v205 = clz v1 ; bin: f3 44 0f bd d1 - - ; asm: tzcntl %esi, %ecx - [-,%rcx] v206 = ctz v2 ; bin: f3 0f bc ce - ; asm: tzcntl %r10d, %esi - [-,%rsi] v207 = ctz v3 ; bin: f3 41 0f bc f2 - ; asm: tzcntl %ecx, %r10d - [-,%r10] v208 = ctz v1 ; bin: f3 44 0f bc d1 - - ; Integer comparisons. - - ; asm: cmpl %esi, %ecx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 - ; asm: cmpl %r10d, %esi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v3 ; bin: 44 39 d6 0f 94 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 - ; asm: cmpl %r10d, %esi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v3 ; bin: 44 39 d6 0f 95 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 - ; asm: cmpl %r10d, %esi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v3 ; bin: 44 39 d6 0f 9c c2 - - ; asm: cmpl %esi, %ecx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 - ; asm: cmpl %r10d, %esi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v3 ; bin: 44 39 d6 0f 9d c2 - - ; asm: cmpl %esi, %ecx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 - ; asm: cmpl %r10d, %esi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 44 39 d6 0f 9f c2 - - ; asm: cmpl %esi, %ecx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 - ; asm: cmpl %r10d, %esi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v3 ; bin: 44 39 d6 0f 9e c2 - - ; asm: cmpl %esi, %ecx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 - ; asm: cmpl %r10d, %esi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v3 ; bin: 44 39 d6 0f 92 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 - ; asm: cmpl %r10d, %esi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v3 ; bin: 44 39 d6 0f 93 c2 - - ; asm: cmpl %esi, %ecx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 - ; asm: cmpl %r10d, %esi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 44 39 d6 0f 97 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 - ; asm: cmpl %r10d, %esi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2 - - ; asm: cmpl $37, %ecx - ; asm: setl %bl - [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3 - - ; asm: cmpl $100000, %ecx - ; asm: setl %bl - [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3 - - ; Bool-to-int conversions. - - ; asm: movzbl %bl, %ecx - [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb - ; asm: movzbl %dl, %esi - [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 - - ; Spill / Fill. - - ; asm: movl %ecx, 1032(%rsp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl %esi, 1032(%rsp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 - ; asm: movl %r10d, 1032(%rsp) - [-,ss1] v502 = spill v3 ; bin: stk_ovf 44 89 94 24 00000408 - - ; asm: movl 1032(%rsp), %ecx - [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 - ; asm: movl 1032(%rsp), %esi - [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 - ; asm: movl 1032(%rsp), %r10d - [-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408 - - ; asm: movl %ecx, 1032(%rsp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl 1032(%rsp), %ecx - regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 - - ; asm: cmpl %esi, %ecx - [-,%rflags] v520 = ifcmp v1, v2 ; bin: 39 f1 - ; asm: cmpl %r10d, %esi - [-,%rflags] v521 = ifcmp v2, v3 ; bin: 44 39 d6 - - ; asm: cmpl $-100, %ecx - [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 83 f9 9c - ; asm: cmpl $100, %r10d - [-,%rflags] v523 = ifcmp_imm v3, 100 ; bin: 41 83 fa 64 - - ; asm: cmpl $-10000, %ecx - [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 - ; asm: cmpl $10000, %r10d - [-,%rflags] v525 = ifcmp_imm v3, 10000 ; bin: 41 81 fa 00002710 - - ; asm: shll $2, %esi - [-,%rsi] v526 = ishl_imm v2, 2 ; bin: c1 e6 02 - ; asm: shll $12, %r10d - [-,%r10] v527 = ishl_imm v3, 12 ; bin: 41 c1 e2 0c - ; asm: sarl $5, %esi - [-,%rsi] v529 = sshr_imm v2, 5 ; bin: c1 fe 05 - ; asm: sarl $32, %r10d - [-,%r10] v530 = sshr_imm v3, 32 ; bin: 41 c1 fa 20 - ; asm: shrl $8, %esi - [-,%rsi] v532 = ushr_imm v2, 8 ; bin: c1 ee 08 - ; asm: shrl $31, %r10d - [-,%r10] v533 = ushr_imm v3, 31 ; bin: 41 c1 ea 1f - - ; asm: testl %ecx, %ecx - ; asm: je block1x - brz v1, block1 ; bin: 85 c9 74 18 - fallthrough block3 - -block3: - ; asm: testl %esi, %esi - ; asm: je block1x - brz v2, block1 ; bin: 85 f6 74 14 - fallthrough block4 - -block4: - ; asm: testl %r10d, %r10d - ; asm: je block1x - brz v3, block1 ; bin: 45 85 d2 74 0f - fallthrough block5 - -block5: - ; asm: testl %ecx, %ecx - ; asm: jne block1x - brnz v1, block1 ; bin: 85 c9 75 0b - fallthrough block6 - -block6: - ; asm: testl %esi, %esi - ; asm: jne block1x - brnz v2, block1 ; bin: 85 f6 75 07 - fallthrough block7 - -block7: - ; asm: testl %r10d, %r10d - ; asm: jne block1x - brnz v3, block1 ; bin: 45 85 d2 75 02 - - ; asm: jmp block2x - jump block2 ; bin: eb 01 - - ; asm: block1x: -block1: - return ; bin: c3 - - ; asm: block2x: -block2: - ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. - ; asm: notl %ecx - [-,%rcx] v5000 = bnot v1 ; bin: f7 d1 - jump block1 ; bin: eb fb - -} - -; Tests for i32/i8 conversion instructions. -function %I32_I8() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%r10] v3 = iconst.i32 3 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - [-,%rsi] v12 = ireduce.i8 v2 ; bin: - [-,%r10] v13 = ireduce.i8 v3 ; bin: - - ; asm: movsbl %cl, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 - ; asm: movsbl %sil, %r10d - [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f be d6 - ; asm: movsbl %r10b, %ecx - [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f be ca - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 - ; asm: movzbl %sil, %r10d - [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b6 d6 - ; asm: movzbl %r10b, %ecx - [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b6 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i32/i16 conversion instructions. -function %I32_I16() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%r10] v3 = iconst.i32 3 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - [-,%rsi] v12 = ireduce.i16 v2 ; bin: - [-,%r10] v13 = ireduce.i16 v3 ; bin: - - ; asm: movswl %cx, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 - ; asm: movswl %si, %r10d - [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f bf d6 - ; asm: movswl %r10w, %ecx - [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f bf ca - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 - ; asm: movzwl %si, %r10d - [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b7 d6 - ; asm: movzwl %r10w, %ecx - [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b7 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i8 conversion instructions. -function %I64_I8() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - [-,%rsi] v12 = ireduce.i8 v2 ; bin: - [-,%r10] v13 = ireduce.i8 v3 ; bin: - - ; asm: movsbq %cl, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f be f1 - ; asm: movsbq %sil, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f be d6 - ; asm: movsbq %r10b, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f be ca - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b6 f1 - ; asm: movzbl %sil, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b6 d6 - ; asm: movzbl %r10b, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b6 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i16 conversion instructions. -function %I64_I16() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - [-,%rsi] v12 = ireduce.i16 v2 ; bin: - [-,%r10] v13 = ireduce.i16 v3 ; bin: - - ; asm: movswq %cx, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f bf f1 - ; asm: movswq %si, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f bf d6 - ; asm: movswq %r10w, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f bf ca - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b7 f1 - ; asm: movzwl %si, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b7 d6 - ; asm: movzwl %r10w, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b7 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i32 conversion instructions. -function %I64_I32() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i32 v1 ; bin: - [-,%rsi] v12 = ireduce.i32 v2 ; bin: - [-,%r10] v13 = ireduce.i32 v3 ; bin: - - ; asm: movslq %ecx, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 63 f1 - ; asm: movslq %esi, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 63 d6 - ; asm: movslq %r10d, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 63 ca - - ; asm: movl %ecx, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 89 ce - ; asm: movl %esi, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 41 89 f2 - ; asm: movl %r10d, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 44 89 d1 - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64 jump table instructions. -function %I64_JT(i64 [%rdi]) { - jt0 = jump_table [block1, block2, block3] - -block0(v0: i64 [%rdi]): - ; Note: The next two lines will need to change whenever instructions are - ; added or removed from this test. - [-, %rax] v1 = jump_table_base.i64 jt0 ; bin: 48 8d 05 00000039 PCRelRodata4(jt0) - [-, %r10] v2 = jump_table_base.i64 jt0 ; bin: 4c 8d 15 00000032 PCRelRodata4(jt0) - - [-, %rbx] v10 = iconst.i64 1 - [-, %r13] v11 = iconst.i64 2 - - [-, %rax] v20 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 48 63 04 98 - [-, %rax] v21 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 49 63 04 9a - [-, %rax] v22 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4a 63 04 a8 - [-, %rax] v23 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4b 63 04 aa - - [-, %r10] v30 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 4c 63 14 98 - [-, %r10] v31 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 4d 63 14 9a - [-, %r10] v32 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4e 63 14 a8 - [-, %r10] v33 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4f 63 14 aa - - fallthrough block10 - -block10: - indirect_jump_table_br v10, jt0 ; bin: ff e3 -block11: - indirect_jump_table_br v11, jt0 ; bin: 41 ff e5 - -block1: - fallthrough block2 -block2: - fallthrough block3 -block3: - trap user0 -} - -function %r12_r13_loads() { -block0: - [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 - [-,%rax] v3 = iconst.i64 0x1 - - ;; Simple GPR load. - ; asm: movq (%r12), %rdx - [-,%rdx] v4 = load.i64 notrap v1 ; bin: 49 8b 14 24 - ; asm: movq (%r13), %rdx - [-,%rdx] v5 = load.i64 notrap v2 ; bin: 49 8b 55 00 - - ;; Load with disp8. - ; asm: movq 0x1(%r12), %rdx - [-,%rdx] v6 = load.i64 notrap v1+1 ; bin: 49 8b 54 24 01 - ; asm: movq 0x1(%r13), %rdx - [-,%rdx] v7 = load.i64 notrap v2+1 ; bin: 49 8b 55 01 - - ;; Load with disp32. - ; asm: movq 0x100(%r12), %rdx - [-,%rdx] v8 = load.i64 notrap v1+256 ; bin: 49 8b 94 24 00000100 - ; asm: movq 0x100(%r13), %rdx - [-,%rdx] v9 = load.i64 notrap v2+256 ; bin: 49 8b 95 00000100 - - ;; Load for base+index. - ; asm: movq (%r12, %rax, 1), %rdx - [-,%rdx] v10 = load_complex.i64 notrap v1+v3 ; bin: 49 8b 14 04 - ; asm: movq (%r13, %rax, 1), %rdx - [-,%rdx] v11 = load_complex.i64 notrap v2+v3 ; bin: 49 8b 54 05 00 - - ;; Now for FP values. - ; asm: movss (%r12), %xmm0 - [-,%xmm0] v12 = load.f32 notrap v1 ; bin: f3 41 0f 10 04 24 - ; asm: movss (%r13), %xmm0 - [-,%xmm0] v13 = load.f32 notrap v2 ; bin: f3 41 0f 10 45 00 - - ;; Load with disp8. - ; asm: movss 0x1(%r12), %xmm0 - [-,%xmm0] v14 = load.f32 notrap v1+1 ; bin: f3 41 0f 10 44 24 01 - ; asm: movss 0x1(%r13), %xmm0 - [-,%xmm0] v15 = load.f32 notrap v2+1 ; bin: f3 41 0f 10 45 01 - - ;; Load with disp32. - ; asm: movss 0x100(%r12), %xmm0 - [-,%xmm0] v16 = load.f32 notrap v1+256 ; bin: f3 41 0f 10 84 24 00000100 - ; asm: movss 0x100(%r13), %xmm0 - [-,%xmm0] v17 = load.f32 notrap v2+256 ; bin: f3 41 0f 10 85 00000100 - - ;; Load for base+index. - ; asm: movss (%r12, %rax, 1), %xmm0 - [-,%xmm0] v18 = load_complex.f32 notrap v1+v3 ; bin: f3 41 0f 10 04 04 - ; asm: movss (%r13, %rax, 1), %xmm0 - [-,%xmm0] v19 = load_complex.f32 notrap v2+v3 ; bin: f3 41 0f 10 44 05 00 - - return -} - -function %r12_r13_stores() { -block0: - [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 - [-,%rax] v3 = iconst.i64 0x1 - [-,%xmm0] v4 = f32const 0x1.0 - - ;; Simple GPR load. - ; asm: movq %rax, (%r12) - store notrap v3, v1; bin: 49 89 04 24 - ; asm: movq (%r13), %rdx - store notrap v3, v2; bin: 49 89 45 00 - - ; asm: movq %rax, 0x1(%r12) - store notrap v3, v1+1; bin: 49 89 44 24 01 - ; asm: movq %rax, 0x1(%r13) - store notrap v3, v2+1; bin: 49 89 45 01 - - ; asm: movq %rax, 0x100(%r12) - store notrap v3, v1+256; bin: 49 89 84 24 00000100 - ; asm: movq %rax, 0x100(%r13) - store notrap v3, v2+256; bin: 49 89 85 00000100 - - ; asm: movq %rax, (%r12, %rax, 1) - store_complex notrap v3, v1+v3; bin: 49 89 04 04 - ; asm: movq %rax, (%r13, %rax, 1) - store_complex notrap v3, v2+v3; bin: 49 89 44 05 00 - - ; asm: movb %al, (%r12) - istore8 notrap v3, v1; bin: 41 88 04 24 - ; asm: movb %al, (%r13) - istore8 notrap v3, v2; bin: 41 88 45 00 - - ; asm: movb %al, 0x1(%r12) - istore8 notrap v3, v1+1; bin: 41 88 44 24 01 - ; asm: movb %al, 0x1(%r13) - istore8 notrap v3, v2+1; bin: 41 88 45 01 - - ; asm: movb %al, 0x100(%r12) - istore8 notrap v3, v1+256; bin: 41 88 84 24 00000100 - ; asm: movb %al, 0x100(%r13) - istore8 notrap v3, v2+256; bin: 41 88 85 00000100 - - ; asm: movb %al, (%r12, %rax, 1) - istore8_complex notrap v3, v1+v3; bin: 41 88 04 04 - ; asm: movb %al, (%r13, %rax, 1) - istore8_complex notrap v3, v2+v3; bin: 41 88 44 05 00 - - ; asm: movss %xmm0, (%r12) - store notrap v4, v1; bin: f3 41 0f 11 04 24 - ; asm: movss %xmm0, (%r13) - store notrap v4, v2; bin: f3 41 0f 11 45 00 - - ; asm: movss %xmm0, 0x1(%r12) - store notrap v4, v1+1; bin: f3 41 0f 11 44 24 01 - ; asm: movss %xmm0, 0x1(%r13) - store notrap v4, v2+1; bin: f3 41 0f 11 45 01 - - ; asm: movss %xmm0, 0x100(%r12) - store notrap v4, v1+256; bin: f3 41 0f 11 84 24 00000100 - ; asm: movss %xmm0, 0x100(%r13) - store notrap v4, v2+256; bin: f3 41 0f 11 85 00000100 - - ; asm: movss %xmm0, (%r12, %rax, 1) - store_complex notrap v4, v1+v3; bin: f3 41 0f 11 04 04 - ; asm: movss %xmm0, (%r13, %rax, 1) - store_complex notrap v4, v2+v3; bin: f3 41 0f 11 44 05 00 - - return -} - -function %B64() { -block0: - [-, %rax] v1 = bconst.b64 true ; bin: 40 b8 00000001 - [-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001 - return -} - -function %V128() { -block0: - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - [-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33) - store v4, v3 ; bin: heap_oob 45 0f 11 0a - - [-, %r11] v5 = iconst.i64 0x1234 - [-, %xmm2] v6 = load.i32x4 v5 ; bin: heap_oob 41 0f 10 13 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/br-i128.clif b/cranelift/filetests/filetests/isa/x86/br-i128.clif deleted file mode 100644 index fccc691aa3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/br-i128.clif +++ /dev/null @@ -1,42 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) -> i8 fast { -block0(v0: i128): - brz v0, block2 - ; check: v0 = iconcat v3, v4 - ; nextln: v5 = icmp_imm eq v3, 0 - ; nextln: v6 = icmp_imm eq v4, 0 - ; nextln: v7 = band v5, v6 - ; nextln: brnz v7, block2 - jump block1 - -block1: - v1 = iconst.i8 0 - return v1 - -block2: - v2 = iconst.i8 1 - return v2 -} - -function u0:1(i128) -> i8 fast { -block0(v0: i128): - brnz v0, block2 - ; check: v0 = iconcat v3, v4 - ; nextln: brnz v3, block2 - ; nextln: fallthrough block3 - - ; check: block3: - ; nextln: brnz.i64 v4, block2 - jump block1 - ; nextln: fallthrough block1 - -block1: - v1 = iconst.i8 0 - return v1 - -block2: - v2 = iconst.i8 1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8.clif b/cranelift/filetests/filetests/isa/x86/brz-i8.clif deleted file mode 100644 index fda005bc81..0000000000 --- a/cranelift/filetests/filetests/isa/x86/brz-i8.clif +++ /dev/null @@ -1,38 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i8 0 - ; check: v0 = iconst.i8 0 - brz v0, block1 - ; nextln: v3 = uextend.i32 v0 - ; nextln: brz v3, block1 - jump block2 - -block1: - v1 = bconst.b1 true - return v1 - -block2: - v2 = bconst.b1 false - return v2 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i8 0 - ; check: v0 = iconst.i8 0 - brnz v0, block1 - ; nextln: v3 = uextend.i32 v0 - ; nextln: brnz v3, block1 - jump block2 - -block1: - v1 = bconst.b1 false - return v1 - -block2: - v2 = bconst.b1 true - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif b/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif deleted file mode 100644 index eb537d7c1a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - ; check: v6 = fill v0 - ; nextln: v3 = icmp_imm eq v6, 0 - ; nextln: v7 = fill v1 - ; nextln: v4 = icmp_imm eq v7, 0 - ; nextln: v5 = band v3, v4 - ; nextln: brnz v5, block1 - brz v2, block1 - jump block2 -block1: - trap unreachable -block2: - trap unreachable -} - -function u0:1(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - ; check: v3 = fill v0 - ; nextln: brnz v3, block1 - ; nextln: fallthrough block3 - ; check: block3: - ; nextln: v4 = fill.i32 v1 - ; nextln: brnz v4, block1 - brnz v2, block1 - jump block2 -block1: - trap unreachable -block2: - trap unreachable -} diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128.clif b/cranelift/filetests/filetests/isa/x86/extend-i128.clif deleted file mode 100644 index e7da3f0387..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extend-i128.clif +++ /dev/null @@ -1,37 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; nextln: v2 -> v0 - v1 = uextend.i128 v0 - ; nextln: v7 = iconst.i64 0 - ; nextln: v3 -> v7 - ; nextln: v1 = iconcat v0, v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 - v5 = icmp_imm eq v3, 0 - - v6 = band v4, v5 - return v6 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; nextln: v2 -> v0 - v1 = sextend.i128 v0 - ; nextln: v8 = copy v0 - ; nextln: v7 = sshr_imm v8, 63 - ; nextln: v3 -> v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 - v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff - v6 = band v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/extend-i64.clif b/cranelift/filetests/filetests/isa/x86/extend-i64.clif deleted file mode 100644 index a3d892c488..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extend-i64.clif +++ /dev/null @@ -1,37 +0,0 @@ -test compile -target i686 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i32 0xffff_ee00 - ; check: v0 = iconst.i32 0xffff_ee00 - ; nextln: v2 -> v0 - v1 = uextend.i64 v0 - ; nextln: v7 = iconst.i32 0 - ; nextln: v3 -> v7 - ; nextln: v1 = iconcat v0, v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0 - - v6 = band v4, v5 - return v6 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i32 0xffff_ee00 - ; check: v0 = iconst.i32 0xffff_ee00 - ; nextln: v2 -> v0 - v1 = sextend.i64 v0 - ; nextln: v10 = copy v0 - ; nextln: v7 = sshr_imm v10, 31 - ; nextln: v3 -> v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0xffff_ffff - v6 = band v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif deleted file mode 100644 index 3bc9adf5bc..0000000000 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif +++ /dev/null @@ -1,17 +0,0 @@ -; Check that floating-point and integer constants equal to zero are optimized correctly. -test binemit -target i686 legacy - -function %foo() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %bar() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif deleted file mode 100644 index 6fff51c7b1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif +++ /dev/null @@ -1,31 +0,0 @@ -; Check that floating-point constants equal to zero are optimized correctly. -test binemit -target x86_64 legacy - -function %zero_const_32bit_no_rex() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 - return v0 -} - -function %zero_const_32bit_rex() -> f32 fast { -block0: - ; asm: xorps %xmm8, %xmm8 - [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 - return v1 -} - -function %zero_const_64bit_no_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 - return v0 -} - -function %zero_const_64bit_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm8, %xmm8 - [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif deleted file mode 100644 index eda7b6dffd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif +++ /dev/null @@ -1,25 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> i128 system_v { -block0: - v0 = iconst.i64 0 - v1 = iconst.i64 0 - v2 = iconcat v0, v1 - jump block5 - -block2: - jump block4(v27) - -block4(v23: i128): - return v23 - -block5: - v27 = bxor.i128 v2, v2 - v32 = iconst.i32 0 - brz v32, block2 - jump block6 - -block6: - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/i128.clif b/cranelift/filetests/filetests/isa/x86/i128.clif deleted file mode 100644 index b171c0ccfd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/i128.clif +++ /dev/null @@ -1,46 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i128 fast { -block0(v0: i64, v1: i64): -;check: block0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]): - - v2 = iconcat.i64 v0, v1 - ; check: regmove v0, %rdi -> %rax - ; check: regmove v1, %rsi -> %rdx - - return v2 - ; check: v4 = x86_pop.i64 - ; check: return v0, v1, v4 -} - -function u0:1(i128) -> i64, i64 fast { -block0(v0: i128): -; check: block0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]): - - v1, v2 = isplit v0 - ; check: regmove v3, %rdi -> %rax - ; check: regmove v4, %rsi -> %rdx - - return v1, v2 - ; check: v6 = x86_pop.i64 - ; check: return v3, v4, v6 -} - -function u0:2(i64, i128) fast { -; check: block0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]): -block0(v0: i64, v1: i128): - ; check: store v2, v0+8 - ; check: store v3, v0+16 - store v1, v0+8 - return -} - -function u0:3(i64) -> i128 fast { -block0(v0: i64): - ; check: v2 = load.i64 v0+8 - ; check: v3 = load.i64 v0+16 - v1 = load.i128 v0+8 - ; check: return v2, v3, v5 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif deleted file mode 100644 index dd75cac4a1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif +++ /dev/null @@ -1,8 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i16) -> i8 fast { -block0(v0: i16): - v1 = ireduce.i8 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif deleted file mode 100644 index 9aedb61001..0000000000 --- a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif +++ /dev/null @@ -1,20 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i128 system_v { -block0(v0: i64, v1: i64): - trap user0 - -block30: - v245 = iconst.i64 0 - v246 = iconcat v245, v245 - ; The next instruction used to be legalized twice, causing a panic the second time. - v250, v251 = isplit.i128 v370 - v252, v253 = isplit v246 - trap user0 - -block45: - v369 = iconst.i64 0 - v370 = load.i128 v369 - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif deleted file mode 100644 index 948fa34d99..0000000000 --- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -set opt_level=speed_and_size -target x86_64 legacy - -function u0:0(i8) -> i8 fast { -block0(v0: i8): - v1 = iconst.i8 0 - v2 = isub v1, v0 - ; check: uextend.i32 - ; nextln: iconst.i32 - ; nextln: isub - ; nextln: ireduce.i8 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif deleted file mode 100644 index a08356ca53..0000000000 --- a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) system_v { -block0(v0: i128): - jump block1(v0) - -block1(v1: i128): - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif deleted file mode 100644 index 9d88db9d17..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0() -> i8 fast { -block0: - v14 = bconst.b1 false - v15 = bint.i8 v14 - return v15 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif deleted file mode 100644 index acdd21c9f0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif +++ /dev/null @@ -1,28 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:51(i64, i64) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 1 - ss2 = explicit_slot 1 - ss3 = explicit_slot 1 - -block0(v0: i64, v1: i64): - v2 = stack_addr.i64 ss1 - v3 = load.i8 v1 - store v3, v2 - v4 = stack_addr.i64 ss2 - v5 = stack_addr.i64 ss3 - jump block1 - -block1: - v6 = load.i8 v2 - store v6, v5 - v7 = load.i8 v5 - v8 = bnot v7 - store v8, v4 - v9 = load.i8 v4 - store v9, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif deleted file mode 100644 index f64108531c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif +++ /dev/null @@ -1,46 +0,0 @@ -test legalizer - -target x86_64 legacy - -function %br_icmp(i64) fast { -block0(v0: i64): - v1 = iconst.i64 0 - br_icmp eq v0, v1, block1 - jump block1 - -block1: - return -} - -; sameln: function %br_icmp(i64 [%rdi]) fast { -; nextln: block0(v0: i64): -; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v2 = icmp eq v0, v1 -; nextln: [RexOp1t8jccb#75] brnz v2, block1 -; nextln: [Op1jmpb#eb] jump block1 -; nextln: -; nextln: block1: -; nextln: [Op1ret#c3] return -; nextln: } - - -function %br_icmp_args(i64) fast { -block0(v0: i64): - v1 = iconst.i64 0 - br_icmp eq v0, v1, block1(v0) - jump block1(v0) - -block1(v2: i64): - return -} - -; sameln: function %br_icmp_args(i64 [%rdi]) fast { -; nextln: block0(v0: i64): -; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v3 = icmp eq v0, v1 -; nextln: [RexOp1t8jccb#75] brnz v3, block1(v0) -; nextln: [Op1jmpb#eb] jump block1(v0) -; nextln: -; nextln: block1(v2: i64): -; nextln: [Op1ret#c3] return -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif deleted file mode 100644 index c931d6cacf..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif +++ /dev/null @@ -1,31 +0,0 @@ -test compile -set opt_level=speed_and_size -target x86_64 legacy -; regex: V=v\d+ -; regex: BB=block\d+ - -function u0:0(i64) system_v { - ss0 = explicit_slot 1 - jt0 = jump_table [block1] - -block0(v0: i64): - v1 = stack_addr.i64 ss0 - v2 = load.i8 v1 - br_table v2, block2, jt0 -; check: $(oob=$V) = ifcmp_imm $(idx=$V), 1 -; block2 is replaced by block1 by fold_redundant_jump -; nextln: brif uge $oob, block1 -; nextln: fallthrough $(inb=$BB) -; check: $inb: -; nextln: $(final_idx=$V) = uextend.i64 $idx -; nextln: $(base=$V) = jump_table_base.i64 jt0 -; nextln: $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0 -; nextln: $(addr=$V) = iadd $base, $rel_addr -; nextln: indirect_jump_table_br $addr, jt0 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif deleted file mode 100644 index 7c135d54ae..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) fast { -fn0 = %black_box(i8) -ss0 = explicit_slot 1 ; black box - -block0(v0: i8, v1: i8): - v99 = stack_addr.i64 ss0 - - ; check: istore8 $(V), $(V) - - v2 = band v0, v1 - store v2, v99 - v3 = bor v0, v1 - store v3, v99 - v4 = bxor v0, v1 - store v4, v99 - v5 = bnot v0 - store v5, v99 - v6 = band_not v0, v1 - store v6, v99 - v7 = bor_not v0, v1 - store v7, v99 - v8 = bxor_not v0, v1 - store v8, v99 - v9 = band_imm v0, 42 - store v9, v99 - v10 = bor_imm v0, 42 - store v10, v99 - v11 = bxor_imm v0, 42 - store v11, v99 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-call.clif b/cranelift/filetests/filetests/isa/x86/legalize-call.clif deleted file mode 100644 index b21099281e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-call.clif +++ /dev/null @@ -1,14 +0,0 @@ -; Test legalization of a non-colocated call in 64-bit non-PIC mode. -test legalizer -set opt_level=speed_and_size -target x86_64 legacy haswell - -function %call() { - fn0 = %foo() -block0: - call fn0() - return -} - -; check: v0 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v0() diff --git a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif deleted file mode 100644 index af5e158b07..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif +++ /dev/null @@ -1,25 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8) -> i8, i8 fast { -block0(v0: i8): - v1 = clz v0 - ; check: v3 = uextend.i32 v0 - ; nextln: v6 = iconst.i32 -1 - ; nextln: v7 = iconst.i32 31 - ; nextln: v8, v9 = x86_bsr v3 - ; nextln: v10 = selectif.i32 eq v9, v6, v8 - ; nextln: v4 = isub v7, v10 - ; nextln: v5 = iadd_imm v4, -24 - ; nextln: v1 = ireduce.i8 v5 - v2 = ctz v0 - ; nextln: v11 = uextend.i32 v0 - ; nextln: v12 = bor_imm v11, 256 - ; nextln: v14 = iconst.i32 32 - ; nextln: v15, v16 = x86_bsf v12 - ; nextln: v13 = selectif.i32 eq v16, v14, v15 - ; nextln: v2 = ireduce.i8 v13 - return v1, v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif deleted file mode 100644 index 0c51e064dd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif +++ /dev/null @@ -1,133 +0,0 @@ -; Test the custom legalizations. -test legalizer -target i686 legacy -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %cond_trap(i32) { -block0(v1: i32): - trapz v1, user67 - return - ; check: block0(v1: i32 - ; nextln: $(f=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $f, user67 - ; nextln: return -} - -function %cond_trap2(i32) { -block0(v1: i32): - trapnz v1, int_ovf - return - ; check: block0(v1: i32 - ; nextln: $(f=$V) = ifcmp_imm v1, 0 - ; nextln: trapif ne $f, int_ovf - ; nextln: return -} - -function %cond_trap_b1(i32) { -block0(v1: i32): - v2 = icmp_imm eq v1, 6 - trapz v2, user7 - return - ; check: block0(v1: i32 - ; check: brnz v2, $(new=$BB) - ; check: jump $(trap=$BB) - ; check: $trap: - ; nextln: trap user7 - ; check: $new: - ; nextln: return -} - -function %cond_trap2_b1(i32) { -block0(v1: i32): - v2 = icmp_imm eq v1, 6 - trapnz v2, user9 - return - ; check: block0(v1: i32 - ; check: brz v2, $(new=$BB) - ; check: jump $(trap=$BB) - ; check: $trap: - ; nextln: trap user9 - ; check: $new: - ; nextln: return -} - -function %f32const() -> f32 { -block0: - v1 = f32const 0x1.0p1 - ; check: $(tmp=$V) = iconst.i32 - ; check: v1 = bitcast.f32 $tmp - return v1 -} - -function %select_f64(f64, f64, i32) -> f64 { -block0(v0: f64, v1: f64, v2: i32): - v3 = select v2, v0, v1 - ; check: brnz v2, $(new=$BB)(v0) - ; nextln: jump $new(v1) - ; check: $new(v3: f64): - ; nextln: return v3 - return v3 -} - -function %f32_min(f32, f32) -> f32 { -block0(v0: f32, v1: f32): - v2 = fmin v0, v1 - return v2 - ; check: $(vnat=$V) = x86_fmin.f32 v0, v1 - ; nextln: jump $(done=$BB)($vnat) - - ; check: $(uno=$BB): - ; nextln: $(vuno=$V) = fadd.f32 v0, v1 - ; nextln: jump $(done=$BB)($vuno) - - ; check: $(ueq=$BB): - ; check: $(veq=$V) = bor.f32 v0, v1 - ; nextln: jump $(done=$BB)($veq) - - ; check: $done(v2: f32): - ; nextln: return v2 -} - -function %ineg_legalized_i8() { -block0: - v0 = iconst.i8 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v0 = ireduce.i8 v2 - ; nextln: v3 = iconst.i8 0 - ; nextln: v4 = uextend.i32 v3 - ; nextln: v5 = uextend.i32 v0 - ; nextln: v6 = isub v4, v5 - ; nextln: v1 = ireduce.i8 v6 - - return -} - -function %ineg_legalized_i16() { -block0: - v0 = iconst.i16 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v0 = ireduce.i16 v2 - ; nextln: v3 = iconst.i16 0 - ; nextln: v4 = uextend.i32 v3 - ; nextln: v5 = uextend.i32 v0 - ; nextln: v6 = isub v4, v5 - ; nextln: v1 = ireduce.i16 v6 - - return -} - -function %ineg_legalized_i32() { -block0: - v0 = iconst.i32 1 - v1 = ineg v0 - ; check: v0 = iconst.i32 1 - ; nextln: v2 = iconst.i32 0 - ; nextln: v1 = isub v2, v0 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif deleted file mode 100644 index 9e579c1bcd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif +++ /dev/null @@ -1,192 +0,0 @@ -; Test the division legalizations. -test legalizer -; See also legalize-div.clif. -set avoid_div_traps=1 -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %udiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = udiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %udiv_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = udiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %udiv_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = udiv v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %urem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = urem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %urem_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = urem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %urem_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = urem v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %sdiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = sdiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($q) - ; check: $m1: - ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 - ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin - ; nextln: trapif eq $fm, int_ovf - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -function %sdiv_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = sdiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return v2 -} - -function %sdiv_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = sdiv v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($q) - ; check: $m1: - ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 - ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin - ; nextln: trapif eq $fm, int_ovf - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. -; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. -function %srem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = srem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -function %srem_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = srem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return v2 -} - -function %srem_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = srem v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div.clif b/cranelift/filetests/filetests/isa/x86/legalize-div.clif deleted file mode 100644 index b172a9aef3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-div.clif +++ /dev/null @@ -1,57 +0,0 @@ -; Test the division legalizations. -test legalizer -; See also legalize-div-traps.clif. -set avoid_div_traps=0 -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %udiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = udiv v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %urem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = urem v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %sdiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = sdiv v0, v1 - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. -; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. -function %srem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = srem v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif deleted file mode 100644 index 43f57f8372..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif +++ /dev/null @@ -1,13 +0,0 @@ -; Test the legalization of f64const. -test legalizer -target x86_64 legacy - -; regex: V=v\d+ - -function %f64const() -> f64 { -block0: - v1 = f64const 0x1.0p1 - ; check: $(tmp=$V) = iconst.i64 - ; check: v1 = bitcast.f64 $tmp - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif deleted file mode 100644 index 32a256c9e7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i16) -> f64 fast { -block0(v0: i16): - v1 = fcvt_from_uint.f64 v0 - return v1 -} - -function u0:1(i16) -> f64 fast { -block0(v0: i16): - v1 = fcvt_from_sint.f64 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif deleted file mode 100644 index 242a0f8dfa..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif +++ /dev/null @@ -1,123 +0,0 @@ -test legalizer -set enable_heap_access_spectre_mitigation=false -target x86_64 legacy - -; Test legalization for various forms of heap addresses. -; regex: BB=block\d+ - -function %heap_addrs(i32, i64, i64 vmctx) { - gv4 = vmctx - gv0 = iadd_imm.i64 gv4, 64 - gv1 = iadd_imm.i64 gv4, 72 - gv2 = iadd_imm.i64 gv4, 80 - gv3 = load.i64 notrap aligned gv4+88 - - heap0 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 - heap1 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32 - heap2 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i64 - heap3 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i64 - heap4 = dynamic gv1, min 0x1_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 - heap5 = dynamic gv1, bound gv3, offset_guard 0x1000, index_type i32 - heap6 = dynamic gv1, min 0x1_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 - heap7 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i64 - - ; check: heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - ; check: heap1 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i32 - ; check: heap2 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i64 - ; check: heap3 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i64 - ; check: heap4 = dynamic gv1, min 0x0001_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 - ; check: heap5 = dynamic gv1, min 0, bound gv3, offset_guard 4096, index_type i32 - ; check: heap6 = dynamic gv1, min 0x0001_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 - ; check: heap7 = dynamic gv1, min 0, bound gv2, offset_guard 4096, index_type i64 - -block0(v0: i32, v1: i64, v3: i64): - ; The fast-path; 32-bit index, static heap with a sufficient bound, no bounds check needed! - v4 = heap_addr.i64 heap0, v0, 0 - ; check: v12 = uextend.i64 v0 - ; check: v13 = iadd_imm v3, 64 - ; check: v4 = iadd v13, v12 - - v5 = heap_addr.i64 heap1, v0, 0 - ; check: v14 = uextend.i64 v0 - ; check: v15 = icmp_imm ugt v14, 0x0001_0000 - ; check: brz v15, $(resume_1=$BB) - ; nextln: jump $(trap_1=$BB) - ; check: $trap_1: - ; nextln: trap heap_oob - ; check: $resume_1: - ; check: v16 = iadd_imm.i64 v3, 64 - ; check: v5 = iadd v16, v14 - - v6 = heap_addr.i64 heap2, v1, 0 - ; check: v19 = iconst.i64 0x0001_0000_0000 - ; check: v17 = icmp.i64 ugt v1, v19 - ; check: brz v17, $(resume_2=$BB) - ; nextln: jump $(trap_2=$BB) - ; check: $trap_2: - ; nextln: trap heap_oob - ; check: $resume_2: - ; check: v18 = iadd_imm.i64 v3, 64 - ; check: v6 = iadd v18, v1 - - v7 = heap_addr.i64 heap3, v1, 0 - ; check: v20 = icmp_imm.i64 ugt v1, 0x0001_0000 - ; check: brz v20, $(resume_3=$BB) - ; nextln: jump $(trap_3=$BB) - ; check: $trap_3: - ; nextln: trap heap_oob - ; check: $resume_3: - ; check: v21 = iadd_imm.i64 v3, 64 - ; check: v7 = iadd v21, v1 - - v8 = heap_addr.i64 heap4, v0, 0 - ; check: v22 = uextend.i64 v0 - ; check: v23 = load.i64 notrap aligned v3+88 - ; check: v24 = iadd_imm v23, 0 - ; check: v25 = icmp ugt v22, v24 - ; check: brz v25, $(resume_4=$BB) - ; nextln: jump $(trap_4=$BB) - ; check: $trap_4: - ; nextln: trap heap_oob - ; check: $resume_4: - ; check: v26 = iadd_imm.i64 v3, 72 - ; check: v8 = iadd v26, v22 - - v9 = heap_addr.i64 heap5, v0, 0 - ; check: v27 = uextend.i64 v0 - ; check: v28 = load.i64 notrap aligned v3+88 - ; check: v29 = iadd_imm v28, 0 - ; check: v30 = icmp ugt v27, v29 - ; check: brz v30, $(resume_5=$BB) - ; nextln: jump $(trap_5=$BB) - ; check: $trap_5: - ; nextln: trap heap_oob - ; check: $resume_5: - ; check: v31 = iadd_imm.i64 v3, 72 - ; check: v9 = iadd v31, v27 - - v10 = heap_addr.i64 heap6, v1, 0 - ; check: v32 = iadd_imm.i64 v3, 80 - ; check: v33 = iadd_imm v32, 0 - ; check: v34 = icmp.i64 ugt v1, v33 - ; check: brz v34, $(resume_6=$BB) - ; nextln: jump $(trap_6=$BB) - ; check: $trap_6: - ; nextln: trap heap_oob - ; check: $resume_6: - ; check: v35 = iadd_imm.i64 v3, 72 - ; check: v10 = iadd v35, v1 - - v11 = heap_addr.i64 heap7, v1, 0 - ; check: v36 = iadd_imm.i64 v3, 80 - ; check: v37 = iadd_imm v36, 0 - ; check: v38 = icmp.i64 ugt v1, v37 - ; check: brz v38, $(resume_7=$BB) - ; nextln: jump $(trap_7=$BB) - ; check: $trap_7: - ; nextln: trap heap_oob - ; check: $resume_7: - ; check: v39 = iadd_imm.i64 v3, 72 - ; check: v11 = iadd v39, v1 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif deleted file mode 100644 index 276de82d4e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif +++ /dev/null @@ -1,20 +0,0 @@ -; Test the legalization of i128 instructions on x86_64. -test legalizer -target x86_64 legacy haswell - -; regex: V=v\d+ - -function %imul(i128, i128) -> i128 { -block0(v1: i128, v2: i128): - v10 = imul v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb - ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb - ; nextln: $(v13=$V) = iadd $v11, $v12 - ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd $v13, $v14 - ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif deleted file mode 100644 index 7e2d381947..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif +++ /dev/null @@ -1,357 +0,0 @@ -; Test the legalization of i64 instructions on x86_32. -test legalizer -target i686 legacy haswell - -; regex: V=v\d+ - -function %iadd(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = iadd v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V), $(carry=$V) = iadd_ifcout $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd_ifcin $v1_msb, $v2_msb, $carry - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %isub(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = isub v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V), $(borrow=$V) = isub_ifbout $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = isub_ifbin $v1_msb, $v2_msb, $borrow - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %imul(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = imul v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb - ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb - ; nextln: $(v13=$V) = iadd $v11, $v12 - ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd $v13, $v14 - ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %icmp_eq(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp eq v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb - ; nextln: v10 = band $v10_lsb, $v10_msb - return v10 -} - -function %icmp_imm_eq(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm eq v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb - ; nextln: v10 = band $v10_lsb, $v10_msb - return v10 -} - -function %icmp_ne(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ne v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb - ; nextln: v10 = bor $v10_lsb, $v10_msb - return v10 -} - -function %icmp_imm_ne(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ne v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb - ; nextln: v10 = bor $v10_lsb, $v10_msb - return v10 -} - -function %icmp_sgt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sgt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sgt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sgt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_sge(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sge v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sge(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sge v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_slt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp slt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_slt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm slt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_sle(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sle v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sle(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sle v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ugt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ugt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ugt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ugt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_uge(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp uge v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_uge(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm uge v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ult(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ult v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ult(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ult v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ule(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ule v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ule(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ule v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %ineg_legalized_i64() { -block0: - v0 = iconst.i64 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v3 = iconst.i32 0 - ; nextln: v0 = iconcat v2, v3 - ; nextln: v5 = iconst.i32 0 - ; nextln: v6 = iconst.i32 0 - ; nextln: v4 = iconcat v5, v6 - ; nextln: v7, v8 = isub_ifbout v5, v2 - ; nextln: v9 = isub_ifbin v6, v3, v8 - ; nextln: v1 = iconcat v7, v9 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif deleted file mode 100644 index 32f2b3d3e7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = icmp_imm sle v0, 0 - ; check: $(e1=$V) = sextend.i32 v0 - ; nextln: v2 = icmp_imm sle $e1, 0 - v3 = bint.i8 v2 - v4 = icmp eq v0, v1 - ; check: $(e2=$V) = uextend.i32 v0 - ; nextln: $(e3=$V) = uextend.i32 v1 - ; nextln: v4 = icmp eq $e2, $e3 - v5 = bint.i8 v4 - v6 = iadd v3, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif deleted file mode 100644 index 1e6a70434a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif +++ /dev/null @@ -1,18 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64) system_v { - ss0 = explicit_slot 0 - -block0(v0: i64): - jump block1 - -block1: -; _0 = const 42u8 - v1 = iconst.i8 42 - store v1, v0 -; -; return - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif deleted file mode 100644 index b1f5b12095..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8, i8) system_v { - -block0(v0: i64, v1: i8, v2: i8): - v11 = imul v1, v2 - store v11, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif deleted file mode 100644 index 4f84d93d0b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif +++ /dev/null @@ -1,15 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8) system_v { - ss0 = explicit_slot 1 - -block0(v0: i64, v1: i8): - v3 = stack_addr.i64 ss0 - v5 = load.i8 v3 - v6 = iconst.i8 2 - v7 = imul_imm v5, 42 - store v7, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif deleted file mode 100644 index a36a2d6ed0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif +++ /dev/null @@ -1,13 +0,0 @@ -; Test the custom legalization of ineg.i64 on x86_64. -test legalizer -target x86_64 legacy - -function %ineg_legalized_i64() { -block0: - v0 = iconst.i64 1 - v1 = ineg v0 - ; check: v0 = iconst.i64 1 - ; nextln: v2 = iconst.i64 0 - ; nextln: v1 = isub v2, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif deleted file mode 100644 index 527710d4fe..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i64 v2 - ; check: v3 = copy v0 - ; check: return v3 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif deleted file mode 100644 index 3ad3f4c69f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - v3 = ireduce.i32 v2 - ; check: v3 = fill v0 - ; check: return v3 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif deleted file mode 100644 index 0d042bf3ff..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) -> i64, i64 fast { -; check: block0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]): -block0(v0: i128): - jump block2 - -block1: - ; When this `isplit` is legalized, the bnot below is not yet legalized, - ; so there isn't a corresponding `iconcat` yet. We should try legalization - ; for this `isplit` again once all instrucions have been legalized. - v2, v3 = isplit.i128 v1 - ; return v6, v7 - return v2, v3 - -block2: - ; check: v6 = bnot.i64 v4 - ; check: v2 -> v6 - ; check: v7 = bnot.i64 v5 - ; check: v3 -> v7 - v1 = bnot.i128 v0 - jump block1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif deleted file mode 100644 index 838a915bf0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif +++ /dev/null @@ -1,15 +0,0 @@ -test legalizer - -; Pre-SSE 4.1, we need to use runtime library calls for floating point rounding operations. -set is_pic -target x86_64 legacy - -function %floor(f32) -> f32 { -block0(v0: f32): - v1 = floor v0 - return v1 -} -; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] fast { -; check: sig0 = (f32 [%xmm0]) -> f32 [%xmm0] system_v -; check: fn0 = %FloorF32 sig0 -; check: v1 = call fn0(v0) diff --git a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif deleted file mode 100644 index 4cbf3e088e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif +++ /dev/null @@ -1,31 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8, i8) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 1 - ss2 = explicit_slot 1 - ss3 = explicit_slot 1 - ss4 = explicit_slot 1 - -block0(v0: i64, v1: i8, v2: i8): - v3 = stack_addr.i64 ss1 - store v1, v3 - v4 = stack_addr.i64 ss2 - store v2, v4 - v5 = stack_addr.i64 ss3 - v6 = stack_addr.i64 ss4 - jump block1 - -block1: - v7 = load.i8 v3 - store v7, v5 - v8 = load.i8 v4 - store v8, v6 - v9 = load.i8 v5 - v10 = load.i8 v6 - v11 = imul v9, v10 - store v11, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif deleted file mode 100644 index 11a0f1d20f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif +++ /dev/null @@ -1,115 +0,0 @@ -; Test the legalization of memory objects. -test legalizer -set enable_heap_access_spectre_mitigation=false -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %vmctx(i64 vmctx) -> i64 { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, -16 - -block1(v1: i64): - v2 = global_value.i64 gv1 - ; check: v2 = iadd_imm v1, -16 - return v2 - ; check: return v2 -} - -function %load(i64 vmctx) -> i64 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0-16 - gv2 = iadd_imm.i64 gv1, 32 - -block1(v1: i64): - v2 = global_value.i64 gv2 - ; check: $(p1=$V) = load.i64 notrap aligned v1-16 - ; check: v2 = iadd_imm $p1, 32 - return v2 - ; check: return v2 -} - -function %symbol() -> i64 { - gv0 = symbol %something - gv1 = symbol u123:456 - -block1: - v0 = global_value.i64 gv0 - ; check: v0 = symbol_value.i64 gv0 - v1 = global_value.i64 gv1 - ; check: v1 = symbol_value.i64 gv1 - v2 = bxor v0, v1 - return v2 -} - -; SpiderMonkey VM-style static 4+2 GB heap. -; This eliminates bounds checks completely for offsets < 2GB. -function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; check: block0( - v1 = heap_addr.i64 heap0, v0, 1 - ; Boundscheck should be eliminated. - ; Checks here are assuming that no pipehole opts fold the load offsets. - ; nextln: $(xoff=$V) = uextend.i64 v0 - ; check: $(hbase=$V) = iadd_imm v999, 64 - ; nextln: v1 = iadd $hbase, $xoff - v2 = load.f32 v1+16 - ; nextln: v2 = load.f32 v1+16 - v3 = load.f32 v1+20 - ; nextln: v3 = load.f32 v1+20 - v4 = fadd v2, v3 - return v4 -} - -function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; Everything after the obviously OOB access should be eliminated, leaving - ; the `trap heap_oob` instruction as the terminator of the block and moving - ; the remainder of the instructions into an inaccessible block. - ; check: block0( - ; nextln: trap heap_oob - ; check: block1: - ; nextln: v1 = iconst.i64 0 - ; nextln: v2 = load.f32 v1+16 - ; nextln: return v2 - ; nextln: } - v1 = heap_addr.i64 heap0, v0, 0x1000_0001 - v2 = load.f32 v1+16 - return v2 -} - - -; SpiderMonkey VM-style static 4+2 GB heap. -; Offsets >= 2 GB do require a boundscheck. -function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; check: block0( - v1 = heap_addr.i64 heap0, v0, 0x8000_0000 - ; Boundscheck code - ; check: $(xoff=$V) = uextend.i64 v0 - ; check: $(oob=$V) = icmp - ; nextln: brz $oob, $(ok=$BB) - ; nextln: jump $(trap_oob=$BB) - ; check: $trap_oob: - ; nextln: trap heap_oob - ; check: $ok: - ; Checks here are assuming that no pipehole opts fold the load offsets. - ; check: $(hbase=$V) = iadd_imm.i64 v999, 64 - ; nextln: v1 = iadd $hbase, $xoff - v2 = load.f32 v1+0x7fff_ffff - ; nextln: v2 = load.f32 v1+0x7fff_ffff - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif deleted file mode 100644 index 179ef824f3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif +++ /dev/null @@ -1,43 +0,0 @@ -test compile -target x86_64 legacy baseline - -; umulhi/smulhi on 64 bit operands - -function %i64_umulhi(i64, i64) -> i64 { -block0(v10: i64, v11: i64): - v12 = umulhi v10, v11 - ; check: %rdi -> %rax - ; check: x86_umulx - ; check: %rdx -> %rax - return v12 -} - -function %i64_smulhi(i64, i64) -> i64 { -block0(v20: i64, v21: i64): - v22 = smulhi v20, v21 - ; check: %rdi -> %rax - ; check: x86_smulx - ; check: %rdx -> %rax - return v22 -} - - -; umulhi/smulhi on 32 bit operands - -function %i32_umulhi(i32, i32) -> i32 { -block0(v30: i32, v31: i32): - v32 = umulhi v30, v31 - ; check: %rdi -> %rax - ; check: x86_umulx - ; check: %rdx -> %rax - return v32 -} - -function %i32_smulhi(i32, i32) -> i32 { -block0(v40: i32, v41: i32): - v42 = smulhi v40, v41 - ; check: %rdi -> %rax - ; check: x86_smulx - ; check: %rdx -> %rax - return v42 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif deleted file mode 100644 index fb9c4f49b8..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif +++ /dev/null @@ -1,9 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i8 fast { -block0(v0: i8): - v1 = popcnt v0 - ; check-not: sextend.i32 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif deleted file mode 100644 index f770ba5643..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i64, i64) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 8 - ss2 = explicit_slot 8 - ss3 = explicit_slot 2 - ss4 = explicit_slot 8 - sig0 = (i64, i16, i64) system_v - fn0 = colocated u0:11 sig0 - -block0(v0: i64, v1: i64, v2: i64): - v3 = stack_addr.i64 ss1 - store v1, v3 - v4 = stack_addr.i64 ss2 - store v2, v4 - v5 = stack_addr.i64 ss3 - v6 = stack_addr.i64 ss4 - jump block1 - -block1: - v7 = load.i64 v3 - v8 = load.i16 v7 - store v8, v5 - v9 = load.i64 v4 - store v9, v6 - v10 = load.i16 v5 - v11 = load.i64 v6 - call fn0(v0, v10, v11) - jump block2 - -block2: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif deleted file mode 100644 index e058602615..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif +++ /dev/null @@ -1,35 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ -; regex: R=%[a-z0-9]+ - -function %i32_rotr(i32, i32) -> i32 fast { -block0(v0: i32, v1: i32): - ; check: regmove v1, $R -> %rcx - ; check: v2 = rotr v0, v1 - v2 = rotr v0, v1 - return v2 -} - -function %i32_rotr_imm_1(i32) -> i32 fast { -block0(v0: i32): - ; check: $V = rotr_imm v0, 1 - v2 = rotr_imm v0, 1 - return v2 -} - -function %i32_rotl(i32, i32) -> i32 fast { -block0(v0: i32, v1: i32): - ; check: regmove v1, $R -> %rcx - ; check: v2 = rotl v0, v1 - v2 = rotl v0, v1 - return v2 -} - -function %i32_rotl_imm_1(i32) -> i32 fast { -block0(v0: i32): - ; check: $V = rotl_imm v0, 1 - v2 = rotl_imm v0, 1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif deleted file mode 100644 index 9759a8b155..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = ishl v0, v1 - ; check: $(e1=$V) = uextend.i32 v0 - ; check: $(r1=$V) = ishl $e1, v1 - ; check v2 = ireduce.i8 $r1 - v3 = ushr v0, v1 - ; check: $(e2=$V) = uextend.i32 v0 - ; check: $(r2=$V) = ushr $e2, v1 - ; check v2 = ireduce.i8 $r2 - v4 = sshr v0, v1 - ; check: $(e3=$V) = sextend.i32 v0 - ; check: $(r3=$V) = sshr $e3, v1 - ; check v2 = ireduce.i8 $r3 - - v5 = iadd v2, v3 - v6 = iadd v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif deleted file mode 100644 index 10912afe76..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif +++ /dev/null @@ -1,73 +0,0 @@ -test legalizer -target x86_64 legacy - -; Test legalization for various forms of table addresses. -; regex: BB=block\d+ - -function %table_addrs(i32, i64, i64 vmctx) { - gv4 = vmctx - gv0 = iadd_imm.i64 gv4, 72 - gv1 = iadd_imm.i64 gv4, 80 - gv2 = load.i32 notrap aligned gv4+88 - - table0 = dynamic gv0, min 0x1_0000, bound gv2, element_size 1, index_type i32 - table1 = dynamic gv0, bound gv2, element_size 16, index_type i32 - table2 = dynamic gv0, min 0x1_0000, bound gv1, element_size 1, index_type i64 - table3 = dynamic gv0, bound gv1, element_size 16, index_type i64 - - ; check: table0 = dynamic gv0, min 0x0001_0000, bound gv2, element_size 1, index_type i32 - ; check: table1 = dynamic gv0, min 0, bound gv2, element_size 16, index_type i32 - ; check: table2 = dynamic gv0, min 0x0001_0000, bound gv1, element_size 1, index_type i64 - ; check: table3 = dynamic gv0, min 0, bound gv1, element_size 16, index_type i64 - -block0(v0: i32, v1: i64, v3: i64): - v4 = table_addr.i64 table0, v0, +0 - ; check: v8 = load.i32 notrap aligned v3+88 - ; check: v9 = icmp uge v0, v8 - ; check: brz v9, $(resume_1=$BB) - ; nextln: jump $(trap_1=$BB) - ; check: $trap_1: - ; nextln: trap table_oob - ; check: $resume_1: - ; check: v10 = uextend.i64 v0 - ; check: v11 = iadd_imm.i64 v3, 72 - ; check: v4 = iadd v11, v10 - - v5 = table_addr.i64 table1, v0, +0 - ; check: v12 = load.i32 notrap aligned v3+88 - ; check: v13 = icmp.i32 uge v0, v12 - ; check: brz v13, $(resume_2=$BB) - ; nextln: jump $(trap_2=$BB) - ; check: $trap_2: - ; nextln: trap table_oob - ; check: $resume_2: - ; check: v14 = uextend.i64 v0 - ; check: v15 = iadd_imm.i64 v3, 72 - ; check: v16 = ishl_imm v14, 4 - ; check: v5 = iadd v15, v16 - - v6 = table_addr.i64 table2, v1, +0 - ; check: v17 = iadd_imm.i64 v3, 80 - ; check: v18 = icmp.i64 uge v1, v17 - ; check: brz v18, $(resume_3=$BB) - ; nextln: jump $(trap_3=$BB) - ; check: $trap_3: - ; nextln: trap table_oob - ; check: $resume_3: - ; check: v19 = iadd_imm.i64 v3, 72 - ; check: v6 = iadd v19, v1 - - v7 = table_addr.i64 table3, v1, +0 - ; check: v20 = iadd_imm.i64 v3, 80 - ; check: v21 = icmp.i64 uge v1, v20 - ; check: brz v21, $(resume_4=$BB) - ; nextln: jump $(trap_4=$BB) - ; check: $trap_4: - ; nextln: trap table_oob - ; check: $resume_4: - ; check: v22 = iadd_imm.i64 v3, 72 - ; check: v23 = ishl_imm.i64 v1, 4 - ; check: v7 = iadd v22, v23 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif deleted file mode 100644 index 7be308308c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif +++ /dev/null @@ -1,15 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = urem v0, v1 - ; check: $(a=$V) = uextend.i32 v0 - ; nextln: $(b=$V) = uextend.i32 v1 - ; nextln: $(c=$V) = iconst.i32 0 - ; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b - ; nextln: v2 = ireduce.i8 $r - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif b/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif deleted file mode 100644 index ff5d11a4d7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif +++ /dev/null @@ -1,51 +0,0 @@ -test compile -set enable_simd -target i686 legacy haswell - -function u0:1(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i32 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:2(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:3(i32) -> i32 system_v { - block1(v0: i32): - v1 = load.i32 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:4(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i32 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} - -function u0:5(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} - -function u0:6(i32) -> i32 system_v { - block1(v0: i32): - v1 = load.i32 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif deleted file mode 100644 index 4e0af65c9f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif +++ /dev/null @@ -1,16 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i64, i32) system_v { -block0(v0: i64, v1: i32): - v2 = bor v0, v0 - store v2, v1 - return -} - -function u0:1(i32) -> i64 system_v { -block0(v1: i32): - v0 = load.i64 v1 - v2 = bor v0, v0 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/nop.clif b/cranelift/filetests/filetests/isa/x86/nop.clif deleted file mode 100644 index cafa90eb4f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/nop.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile - -target x86_64 legacy - -function %test(i32) -> i32 system_v { -block0(v0: i32): - nop - v1 = iconst.i32 42 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif deleted file mode 100644 index b5a9658b67..0000000000 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif +++ /dev/null @@ -1,52 +0,0 @@ -; Check that floating-point and integer constants equal to zero are optimized correctly. -test binemit -set opt_level=speed_and_size -target i686 legacy - -function %foo() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %bar() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 - return v1 -} - -function %zero_dword() -> i32 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i32 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i32 0 ; bin: 31 ff - return v0 -} - -function %zero_word() -> i16 fast { -block0: - ; while you may expect this to be encoded like 6631c0, aka - ; xor %ax, %ax, the upper 16 bits of the register used for - ; i16 are left undefined, so it's not wrong to clear them. - ; - ; discarding the 66 prefix is shorter, so this test expects - ; that we do so. - ; - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff - return v0 -} - -function %zero_byte() -> i8 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i8 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i8 0 ; bin: 31 ff - return v0 -} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif deleted file mode 100644 index 8e469b8b7a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif +++ /dev/null @@ -1,72 +0,0 @@ -; Check that floating-point constants equal to zero are optimized correctly. -test binemit -set opt_level=speed_and_size -target x86_64 legacy - -function %zero_const_32bit_no_rex() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %zero_const_32bit_rex() -> f32 fast { -block0: - ; asm: xorps %xmm8, %xmm8 - [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 - return v1 -} - -function %zero_const_64bit_no_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 0f 57 c0 - return v0 -} - -function %zero_const_64bit_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm8, %xmm8 - [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 - return v1 -} - -function %imm_zero_register() -> i64 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i64 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i64 0 ; bin: 31 ff - ; asm: xor %r8, r8 - [-,%r8] v2 = iconst.i64 0 ; bin: 45 31 c0 - ; asm: xor %r15, %r15 - [-,%r15] v4 = iconst.i64 0 ; bin: 45 31 ff - return v0 -} - -function %zero_word() -> i16 fast { -block0: - ; while you may expect this to be encoded like 6631c0, aka - ; xor %ax, %ax, the upper 16 bits of the register used for - ; i16 are left undefined, so it's not wrong to clear them. - ; - ; discarding the 66 prefix is shorter, so this test expects - ; that we do so. - ; - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff - return v0 -} - -function %zero_byte() -> i8 fast { -block0: - ; asm: xor %r8d, %r8d - [-,%r15] v0 = iconst.i8 0 ; bin: 45 31 ff - ; asm: xor %eax, eax - [-,%rax] v1 = iconst.i8 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v2 = iconst.i8 0 ; bin: 31 ff - return v0 -} diff --git a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif deleted file mode 100644 index b9bc230c33..0000000000 --- a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif +++ /dev/null @@ -1,74 +0,0 @@ -test compile - -set enable_pinned_reg=true -set use_pinned_reg_as_heap_base=true -set opt_level=speed_and_size - -target x86_64 legacy - -; regex: V=v\d+ - -; r15 is the pinned heap register. It must not be rewritten, so it must not be -; used as a tied output register. -function %tied_input() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd_imm v1, 42 - return v2 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd_imm v1, 42 - -;; It musn't be used even if this is a tied input used twice. -function %tied_twice() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd v1, v1 - return v2 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd v1, v1 - -function %uses() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd_imm v1, 42 - v3 = get_pinned_reg.i64 - v4 = iadd v2, v3 - return v4 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd_imm v1, 42 -; nextln: ,%r15 -; sameln: v3 = get_pinned_reg.i64 -; nextln: ,%rax] -; sameln: iadd v2, v3 - -; When the pinned register is used as the heap base, the final load instruction -; must use the %r15 register, since x86 implements the complex addressing mode. -function u0:1(i64 vmctx) -> i64 system_v { - gv0 = vmctx - heap0 = static gv0, min 0x000a_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - -block0(v42: i64): - v5 = iconst.i32 42 - v6 = heap_addr.i64 heap0, v5, 0 - v7 = load.i64 v6 - return v7 -} - -; check: ,%r15] -; sameln: $(heap_base=$V) = get_pinned_reg.i64 -; nextln: load_complex.i64 $heap_base+ diff --git a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif deleted file mode 100644 index 4b4a05244c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif +++ /dev/null @@ -1,28 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set probestack_func_adjusts_sp=1 -target x86_64 legacy - -; Like %big in probestack.clif, but with the probestack function adjusting -; the stack pointer itself. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 -; nextln: [Op1call_id#e8] call fn0(v1) -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v2 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif deleted file mode 100644 index 6b9b4f3342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set enable_probestack=0 -target x86_64 legacy - -; Like %big in probestack.clif, but with probes disabled. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 0x0004_93e0 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif deleted file mode 100644 index 2837ddd0c9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif +++ /dev/null @@ -1,27 +0,0 @@ -test compile -target x86_64 legacy - -; Like %big in probestack.clif, but without a colocated libcall. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 -; nextln: [RexOp1fnaddr8#80b8,%r11] v2 = func_addr.i64 fn0 -; nextln: [RexOp1call_r#20ff,%rax] v3 = call_indirect sig0, v2(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v3 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v4 = x86_pop.i64 -; nextln: [Op1ret#c3] return v4 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-size.clif b/cranelift/filetests/filetests/isa/x86/probestack-size.clif deleted file mode 100644 index efb1900170..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-size.clif +++ /dev/null @@ -1,74 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set probestack_size_log2=13 -target x86_64 legacy - -; Like %big in probestack.clif, but now the probestack size is bigger -; and it no longer needs a probe. - -function %big() system_v { - ss0 = explicit_slot 4097 -block0: - return -} - -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4097, offset -4113 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4112 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } - - -; Like %big; still doesn't need a probe. - -function %bigger() system_v { - ss0 = explicit_slot 8192 -block0: - return -} - -; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 8192, offset -8208 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 8192 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8192 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } - - -; Like %bigger; this needs a probe. - -function %biggest() system_v { - ss0 = explicit_slot 8193 -block0: - return -} - -; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 8193, offset -8209 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 8208 -; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8208 -; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 -; nextln: [Op1ret#c3] return v3 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack.clif b/cranelift/filetests/filetests/isa/x86/probestack.clif deleted file mode 100644 index c434cf5f63..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack.clif +++ /dev/null @@ -1,49 +0,0 @@ -test compile -set use_colocated_libcalls=1 -target x86_64 legacy - -; A function with a big stack frame. This should have a stack probe. - -function %big() system_v { - ss0 = explicit_slot 4097 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4097, offset -4113 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 4112 -; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 -; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 -; nextln: [Op1ret#c3] return v3 -; nextln: } - - -; A function with a small enough stack frame. This shouldn't have a stack probe. - -function %small() system_v { - ss0 = explicit_slot 4096 -block0: - return -} - -; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4096, offset -4112 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4096 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4096 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif deleted file mode 100644 index 831928186b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif +++ /dev/null @@ -1,314 +0,0 @@ -test compile -set opt_level=speed_and_size -set is_pic -set enable_probestack=false -target x86_64 legacy haswell - -; An empty function. - -function %empty() { -block0: - return -} - -; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function with a single stack slot. - -function %one_stack_slot() { - ss0 = explicit_slot 168 -block0: - return -} - -; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 168, offset -184 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 176 -; nextln: adjust_sp_up_imm 176 -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function performing a call. - -function %call() { - fn0 = %foo() - -block0: - call fn0() - return -} - -; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = incoming_arg 16, offset -16 -; nextln: sig0 = () fast -; nextln: fn0 = %foo sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: call fn0() -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function that uses a lot of registers but doesn't quite need to spill. - -function %no_spill(i64, i64) { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - return -} - -; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { -; nextln: ss0 = incoming_arg 56, offset -56 -; nextln: -; nextln: block0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]): -; nextln: x86_push v15 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v16 -; nextln: x86_push v17 -; nextln: x86_push v18 -; nextln: x86_push v19 -; nextln: x86_push v20 -; nextln: v2 = load.i32 v0 -; nextln: v3 = load.i32 v0+8 -; nextln: v4 = load.i32 v0+16 -; nextln: v5 = load.i32 v0+24 -; nextln: v6 = load.i32 v0+32 -; nextln: v7 = load.i32 v0+40 -; nextln: v8 = load.i32 v0+48 -; nextln: v9 = load.i32 v0+56 -; nextln: v10 = load.i32 v0+64 -; nextln: v11 = load.i32 v0+72 -; nextln: v12 = load.i32 v0+80 -; nextln: v13 = load.i32 v0+88 -; nextln: v14 = load.i32 v0+96 -; nextln: store v2, v1 -; nextln: store v3, v1+8 -; nextln: store v4, v1+16 -; nextln: store v5, v1+24 -; nextln: store v6, v1+32 -; nextln: store v7, v1+40 -; nextln: store v8, v1+48 -; nextln: store v9, v1+56 -; nextln: store v10, v1+64 -; nextln: store v11, v1+72 -; nextln: store v12, v1+80 -; nextln: store v13, v1+88 -; nextln: store v14, v1+96 -; nextln: v26 = x86_pop.i64 -; nextln: v25 = x86_pop.i64 -; nextln: v24 = x86_pop.i64 -; nextln: v23 = x86_pop.i64 -; nextln: v22 = x86_pop.i64 -; nextln: v21 = x86_pop.i64 -; nextln: return v21, v22, v23, v24, v25, v26 -; nextln: } - -; This function requires too many registers and must spill. - -function %yes_spill(i64, i64) { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - v15 = load.i32 v0+104 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - store.i32 v15, v1+104 - return -} - -; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { -; check: ss0 = spill_slot - -; check: block0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]): -; nextln: x86_push v48 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v49 -; nextln: x86_push v50 -; nextln: x86_push v51 -; nextln: x86_push v52 -; nextln: x86_push v53 -; nextln: adjust_sp_down_imm - -; check: spill - -; check: fill - -; check: adjust_sp_up_imm -; nextln: v59 = x86_pop.i64 -; nextln: v58 = x86_pop.i64 -; nextln: v57 = x86_pop.i64 -; nextln: v56 = x86_pop.i64 -; nextln: v55 = x86_pop.i64 -; nextln: v54 = x86_pop.i64 -; nextln: return v54, v55, v56, v57, v58, v59 -; nextln: } - -; A function which uses diverted registers. - -function %divert(i32) -> i32 system_v { -block0(v0: i32): - v2 = iconst.i32 0 - v3 = iconst.i32 1 - jump block1(v0, v3, v2) - -block1(v4: i32, v5: i32, v6: i32): - brz v4, block3 - jump block2 - -block2: - v7 = iadd v5, v6 - v8 = iadd_imm v4, -1 - jump block1(v8, v7, v5) - -block3: - return v5 -} - -; check: function %divert -; check: regmove.i32 v5, %rcx -> %rbx -; check: [Op1popq#58,%rbx] v15 = x86_pop.i64 - -; Stack limit checking - -function %stack_limit(i64 stack_limit) { - ss0 = explicit_slot 168 -block0(v0: i64): - return -} - -; check: function %stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 168, offset -184 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rdi], v4: i64 [%rbp]): -; nextln: v1 = copy v0 -; nextln: v2 = iadd_imm v1, 176 -; nextln: v3 = ifcmp_sp v2 -; nextln: trapif uge v3, stk_ovf -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 176 -; nextln: adjust_sp_up_imm 176 -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -function %big_stack_limit(i64 stack_limit) { - ss0 = explicit_slot 40000 -block0(v0: i64): - return -} - -; check: function %big_stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 40000, offset -40016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: v1 = copy v0 -; nextln: v2 = ifcmp_sp v1 -; nextln: trapif uge v2, stk_ovf -; nextln: v3 = iadd_imm v1, 0x9c40 -; nextln: v4 = ifcmp_sp v3 -; nextln: trapif uge v4, stk_ovf -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 0x9c40 -; nextln: adjust_sp_up_imm 0x9c40 -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } - -function %limit_preamble(i64 vmctx) { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0 - gv2 = load.i64 notrap aligned gv1+4 - stack_limit = gv2 - ss0 = explicit_slot 20 -block0(v0: i64): - return -} - -; check: function %limit_preamble(i64 vmctx [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 20, offset -36 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned gv0 -; nextln: gv2 = load.i64 notrap aligned gv1+4 -; nextln: stack_limit = gv2 -; nextln: -; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: v1 = load.i64 notrap aligned v0 -; nextln: v2 = load.i64 notrap aligned v1+4 -; nextln: v3 = iadd_imm v2, 32 -; nextln: v4 = ifcmp_sp v3 -; nextln: trapif uge v4, stk_ovf -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 32 -; nextln: adjust_sp_up_imm 32 -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif deleted file mode 100644 index 0e123f8a36..0000000000 --- a/cranelift/filetests/filetests/isa/x86/relax_branch.clif +++ /dev/null @@ -1,132 +0,0 @@ -test binemit -set opt_level=speed_and_size -set avoid_div_traps -set baldrdash_prologue_words=3 -set emit_all_ones_funcaddrs -set enable_probestack=false -target x86_64 legacy haswell - -; This checks that a branch that is too far away is getting relaxed. In -; particular, the first block has to be non-empty but its encoding size must be -; zero (i.e. not generate any code). See also issue #666 for more details. - -function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash_system_v { - ss0 = incoming_arg 24, offset -24 - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - gv2 = load.i64 notrap aligned readonly gv0 - heap0 = static gv2, min 0xd839_6000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - - block0(v0: i32 [%rdi], v1: i32 [%rsi], v2: i64 [%r14]): -@0005 [-] fallthrough block3(v0, v1) - - block3(v8: i32 [%rdi], v19: i32 [%rsi]): -@0005 [RexOp1ldDisp8#808b,%rax] v7 = load.i64 v2+48 -@0005 [RexOp1rcmp_ib#f083,%rflags] v91 = ifcmp_imm v7, 0 -@0005 [trapif#00] trapif ne v91, interrupt -[DynRexOp1umr#89,%rax] v105 = copy v8 -@000b [DynRexOp1r_ib#83,%rax] v10 = iadd_imm v105, 1 - v80 -> v10 -@0010 [Op1umr#89,%rcx] v92 = uextend.i64 v8 -@0010 [RexOp1ld#808b,%rdx] v93 = load.i64 notrap aligned readonly v2 - v95 -> v93 -@0010 [Op2ldWithIndex#4be,%rcx] v12 = sload8_complex.i32 v93+v92 -[DynRexOp1umr#89,%rbx] v106 = copy v12 -@0017 [DynRexOp1r_ib#40c1,%rbx] v14 = ishl_imm v106, 24 -@001a [DynRexOp1r_ib#70c1,%rbx] v16 = sshr_imm v14, 24 -[DynRexOp1umr#89,%rdi] v107 = copy v16 -@001f [DynRexOp1r_ib#83,%rdi] v18 = iadd_imm v107, 32 -[DynRexOp1umr#89,%r8] v108 = copy v19 -@0026 [DynRexOp1r_ib#83,%r8] v21 = iadd_imm v108, 1 - v82 -> v21 -@002b [Op1umr#89,%rsi] v94 = uextend.i64 v19 -@002b [Op2ldWithIndex#4be,%rdx] v23 = sload8_complex.i32 v93+v94 - v55 -> v23 -[DynRexOp1umr#89,%rsi] v109 = copy v23 -@0032 [DynRexOp1r_ib#40c1,%rsi] v25 = ishl_imm v109, 24 -@0035 [DynRexOp1r_ib#70c1,%rsi] v27 = sshr_imm v25, 24 - v69 -> v27 -[DynRexOp1umr#89,%r9] v110 = copy v27 -@003a [DynRexOp1r_ib#83,%r9] v29 = iadd_imm v110, 32 - v68 -> v29 -@0042 [DynRexOp1r_ib#83,%rcx] v31 = iadd_imm v12, -65 -@0045 [DynRexOp1r_ib#40c1,%rcx] v33 = ishl_imm v31, 24 -@0048 [DynRexOp1r_ib#70c1,%rcx] v35 = sshr_imm v33, 24 -@004c [DynRexOp1r_id#4081,%rcx] v37 = band_imm v35, 255 -[DynRexOp1rcmp_ib#7083,%rflags] v97 = ifcmp_imm v37, 26 -@0050 [Op1brib#70] brif sge v97, block6 -@0050 [-] fallthrough block10 - - block10: -[DynRexOp1umr#89,%rcx] v101 = copy v18 -@0054 [Op1jmpb#eb] jump block5(v18, v101) - - block6: -[DynRexOp1umr#89,%rcx] v102 = copy.i32 v16 -@0059 [RexOp1rmov#89] regmove v102, %rcx -> %rdi -@0059 [RexOp1rmov#89] regmove.i32 v16, %rbx -> %rcx -@0059 [-] fallthrough block5(v102, v16) - - block5(v41: i32 [%rdi], v84: i32 [%rcx]): - v83 -> v84 -@005d [DynRexOp1r_id#4081,%rdi] v43 = band_imm v41, 255 -@0062 [DynRexOp1r_ib#40c1,%rdi] v45 = ishl_imm v43, 24 - v52 -> v45 -@0065 [RexOp1rmov#89] regmove v45, %rdi -> %rbx -@0065 [DynRexOp1r_ib#70c1,%rbx] v47 = sshr_imm v45, 24 - v54 -> v47 -@0068 [RexOp1rmov#89] regmove v47, %rbx -> %rdi -@0068 [DynRexOp1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0 -@0068 [RexOp2urm_noflags#4b6,%r10] v50 = bint.i32 v49 -@0076 [DynRexOp1r_ib#83,%rdx] v57 = iadd_imm.i32 v23, -65 -@0079 [DynRexOp1r_ib#40c1,%rdx] v59 = ishl_imm v57, 24 -@007c [DynRexOp1r_ib#70c1,%rdx] v61 = sshr_imm v59, 24 -@0080 [DynRexOp1r_id#4081,%rdx] v63 = band_imm v61, 255 -[DynRexOp1rcmp_ib#7083,%rflags] v98 = ifcmp_imm v63, 26 -@0084 [RexOp1rmov#89] regmove v47, %rdi -> %rbx -@0084 [Op1brib#70] brif sge v98, block8 -@0084 [-] fallthrough block11 - - block11: -[DynRexOp1umr#89,%rdx] v103 = copy.i32 v29 -@0088 [Op1jmpb#eb] jump block7(v29, v10, v21, v103) - - block8: -[DynRexOp1umr#89,%rdx] v104 = copy.i32 v27 -@008d [RexOp1rmov#89] regmove v104, %rdx -> %r9 -@008d [RexOp1rmov#89] regmove.i32 v27, %rsi -> %rdx -@008d [-] fallthrough block7(v104, v10, v21, v27) - - block7(v67: i32 [%r9], v79: i32 [%rax], v81: i32 [%r8], v87: i32 [%rdx]): -@0091 [DynRexOp1r_id#4081,%r9] v71 = band_imm v67, 255 -@0094 [DynRexOp1r_ib#40c1,%r9] v73 = ishl_imm v71, 24 -@0097 [DynRexOp1r_ib#70c1,%r9] v75 = sshr_imm v73, 24 -@0098 [DynRexOp1icscc#39,%rbx] v76 = icmp.i32 eq v47, v75 -@0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76 -@0099 [DynRexOp1rr#21,%r10] v78 = band.i32 v50, v77 -@009a [RexOp1tjccb#74] brz v78, block9 -@009a [-] fallthrough block12 - - block12: -[DynRexOp1umr#89,%rcx] v99 = copy v81 -[DynRexOp1umr#89,%rdx] v100 = copy v79 -@00a4 [RexOp1rmov#89] regmove v100, %rdx -> %rdi -@00a4 [RexOp1rmov#89] regmove v99, %rcx -> %rsi -@00a4 [Op1jmpd#e9] jump block3(v100, v99); bin: e9 ffffff2d - - block9: -@00a7 [-] fallthrough block4 - - block4: -@00ad [DynRexOp1r_id#4081,%rcx] v86 = band_imm.i32 v84, 255 -@00b3 [DynRexOp1r_id#4081,%rdx] v89 = band_imm.i32 v87, 255 -@00b4 [DynRexOp1rr#29,%rcx] v90 = isub v86, v89 -@00b5 [-] fallthrough block2(v90) - - block2(v5: i32 [%rcx]): -@00b6 [-] fallthrough block1(v5) - - block1(v3: i32 [%rcx]): -@00b6 [Op1umr#89,%rax] v96 = uextend.i64 v3 -@00b6 [-] fallthrough_return v96 -} diff --git a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif deleted file mode 100644 index a26e2d865c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif +++ /dev/null @@ -1,13 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> f32 system_v { -block0: - v0 = iconst.i8 255 -; check: v2 = iconst.i32 255 -; nextln: v0 = ireduce.i8 v2 - v1 = fcvt_from_uint.f32 v0 -; nextln: v3 = uextend.i64 v0 -; nextln: v1 = fcvt_from_sint.f32 v3 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/select-i8.clif b/cranelift/filetests/filetests/isa/x86/select-i8.clif deleted file mode 100644 index 44b7e32d12..0000000000 --- a/cranelift/filetests/filetests/isa/x86/select-i8.clif +++ /dev/null @@ -1,8 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(b1, i8, i8) -> i8 { -block0(v0: b1, v1: i8, v2: i8): - v3 = select v0, v1, v2 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif deleted file mode 100644 index 31b73da391..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif +++ /dev/null @@ -1,18 +0,0 @@ -test shrink -set opt_level=speed_and_size -target x86_64 legacy - -function %test_multiple_uses(i32 [%rdi]) -> i32 { -block0(v0: i32 [%rdi]): -[DynRexOp1rcmp_ib#7083,%rflags] v3 = ifcmp_imm v0, 0 -[Op2seti_abcd#490,%rax] v1 = trueif eq v3 -[RexOp2urm_noflags#4b6,%rax] v2 = bint.i32 v1 -[Op1brib#70] brif eq v3, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v2 - -block1: -[Op2trap#40b] trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/shrink.clif b/cranelift/filetests/filetests/isa/x86/shrink.clif deleted file mode 100644 index bb787832c9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shrink.clif +++ /dev/null @@ -1,40 +0,0 @@ -test binemit -set opt_level=speed_and_size -target x86_64 legacy - -; Test that instruction shrinking eliminates REX prefixes when possible. - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %test_shrinking(i32) -> i32 { -block0(v0: i32 [ %rdi ]): - ; asm: movl $0x2,%eax -[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 - ; asm: subl %ecx,%edi -[-,%rdi] v2 = isub v0, v1 ; bin: 29 cf - return v2 -} - -function %test_not_shrinking(i32) -> i32 { -block0(v0: i32 [ %r8 ]): - ; asm: movl $0x2,%eax -[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 - ; asm: subl %ecx,%edi -[-,%r8] v2 = isub v0, v1 ; bin: 41 29 c8 - return v2 -} - -function %test_not_shrinking_i8() { -block0: -[-,%rsi] v1 = iconst.i8 1 - ; asm: movsbl %sil,%esi -[-,%rsi] v2 = sextend.i32 v1 ; bin: 40 0f be f6 - ; asm: movzbl %sil,%esi -[-,%rsi] v3 = uextend.i32 v1 ; bin: 40 0f b6 f6 - ; asm: movzbl %sil,%esi -[-,%rsi] v4 = uextend.i64 v1 ; bin: 40 0f b6 f6 - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif deleted file mode 100644 index 0a8fbe7f0c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ /dev/null @@ -1,116 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %arithmetic_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): -[-, %xmm6] v2 = iadd v0, v1 ; bin: 66 0f fc f2 -[-, %xmm6] v3 = isub v0, v1 ; bin: 66 0f f8 f2 -[-, %xmm6] v4 = sadd_sat v0, v1 ; bin: 66 0f ec f2 -[-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2 -[-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2 -[-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2 -[-, %xmm6] v9 = iabs v1 ; bin: 66 0f 38 1c f2 - - return -} - -function %arithmetic_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]): -[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f fd dd -[-, %xmm3] v3 = isub v0, v1 ; bin: 66 0f f9 dd -[-, %xmm3] v4 = imul v0, v1 ; bin: 66 0f d5 dd -[-, %xmm3] v5 = uadd_sat v0, v1 ; bin: 66 0f dd dd -[-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd -[-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd -[-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd -[-, %xmm3] v9 = iabs v1 ; bin: 66 0f 38 1d dd - - return -} - -function %arithmetic_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]): -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 -[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1 -[-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1 -[-, %xmm0] v5 = iabs v1 ; bin: 66 0f 38 1e c1 - - return -} - -function %arithmetic_i64x2(i64x2, i64x2) { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm2]): -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f d4 c2 -[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fb c2 - - return -} - -function %arithmetic_i64x2_rex(i64x2, i64x2) { -block0(v0: i64x2 [%xmm8], v1: i64x2 [%xmm10]): -[-, %xmm8] v2 = iadd v0, v1 ; bin: 66 45 0f d4 c2 -[-, %xmm8] v3 = isub v0, v1 ; bin: 66 45 0f fb c2 - - return -} - -function %arithmetic_f32x4(f32x4, f32x4) { -block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 0f 58 dd -[-, %xmm3] v3 = fsub v0, v1 ; bin: 0f 5c dd -[-, %xmm3] v4 = fmul v0, v1 ; bin: 0f 59 dd -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 0f 5e dd -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 0f 5d dd -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 0f 5f dd -[-, %xmm3] v8 = sqrt v0 ; bin: 0f 51 db - return -} - -function %arithmetic_f32x4_rex(f32x4, f32x4) { -block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 41 0f 58 da -[-, %xmm3] v3 = fsub v0, v1 ; bin: 41 0f 5c da -[-, %xmm3] v4 = fmul v0, v1 ; bin: 41 0f 59 da -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 41 0f 5e da -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 41 0f 5d da -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 41 0f 5f da -[-, %xmm3] v8 = sqrt v1 ; bin: 41 0f 51 da - return -} - -function %arithmetic_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 66 0f 58 dd -[-, %xmm3] v3 = fsub v0, v1 ; bin: 66 0f 5c dd -[-, %xmm3] v4 = fmul v0, v1 ; bin: 66 0f 59 dd -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 66 0f 5e dd -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 66 0f 5d dd -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 66 0f 5f dd -[-, %xmm3] v8 = sqrt v0 ; bin: 66 0f 51 db - return -} - -function %arithmetic_f64x2_rex(f64x2, f64x2) { -block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]): -[-, %xmm11] v2 = fadd v0, v1 ; bin: 66 45 0f 58 dd -[-, %xmm11] v3 = fsub v0, v1 ; bin: 66 45 0f 5c dd -[-, %xmm11] v4 = fmul v0, v1 ; bin: 66 45 0f 59 dd -[-, %xmm11] v5 = fdiv v0, v1 ; bin: 66 45 0f 5e dd -[-, %xmm11] v6 = x86_fmin v0, v1 ; bin: 66 45 0f 5d dd -[-, %xmm11] v7 = x86_fmax v0, v1 ; bin: 66 45 0f 5f dd -[-, %xmm11] v8 = sqrt v0 ; bin: 66 45 0f 51 db - return -} - -function %pmuludq(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm5]): -[-, %xmm3] v2 = x86_pmuludq v0, v1 ; bin: 66 0f f4 dd - return v2 -} - -function %pmaddwd(i16x8, i16x8) -> i32x4 { -block0(v0: i16x8 [%xmm8], v1: i16x8 [%xmm9]): -[-, %xmm8] v2 = widening_pairwise_dot_product_s v0, v1 ; bin: 66 45 0f f5 c1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif deleted file mode 100644 index 74bc68ee67..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ /dev/null @@ -1,117 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %ineg_i32x4() -> b1 { -; check: const0 = 0x00000001000000010000000100000001 -; nextln: const1 = 0x00000000000000000000000000000000 -block0: - v0 = vconst.i32x4 [1 1 1 1] - v2 = ineg v0 - ; check: v5 = vconst.i32x4 const1 - ; nextln: v2 = isub v5, v0 - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, -1 - - return v4 -} - -function %ineg_legalized() { -; check: const0 = 0x00000000000000000000000000000000 -block0: - v0 = vconst.i8x16 0x00 - v1 = ineg v0 - ; check: v6 = vconst.i8x16 const0 - ; nextln: v1 = isub v6, v0 - - v2 = raw_bitcast.i16x8 v0 - v3 = ineg v2 - ; check: v7 = vconst.i16x8 const0 - ; nextln: v3 = isub v7, v2 - - v4 = raw_bitcast.i64x2 v0 - v5 = ineg v4 - ; check: v8 = vconst.i64x2 const0 - ; nextln: v5 = isub v8, v4 - - return -} - -function %fneg_legalized() { -; check: const2 = 0xffffffffffffffffffffffffffffffff -block0: - v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0] - v1 = fneg v0 - ; check: v4 = vconst.i32x4 const2 - ; nextln: v5 = ishl_imm v4, 31 - ; nextln: v6 = raw_bitcast.f32x4 v5 - ; nextln: v1 = bxor v0, v6 - - v2 = vconst.f64x2 [0x1.0 0x2.0] - v3 = fneg v2 - ; check: v7 = vconst.i64x2 const2 - ; nextln: v8 = ishl_imm v7, 63 - ; nextln: v9 = raw_bitcast.f64x2 v8 - ; nextln: v3 = bxor v2, v9 - - return -} - -function %fabs_legalized() { -; check: const1 = 0xffffffffffffffffffffffffffffffff -block0: - v0 = vconst.f64x2 [0x1.0 -0x2.0] - v1 = fabs v0 - ; check: v2 = vconst.i64x2 const1 - ; nextln: v3 = ushr_imm v2, 1 - ; nextln: v4 = raw_bitcast.f64x2 v3 - ; nextln: v1 = band v0, v4 - return -} - -function %imul_i64x2(i64x2, i64x2) { -block0(v0:i64x2, v1:i64x2): - v2 = imul v0, v1 - ; check: v3 = ushr_imm v0, 32 - ; nextln: v4 = x86_pmuludq v3, v1 - ; nextln: v5 = ushr_imm v1, 32 - ; nextln: v6 = x86_pmuludq v5, v0 - ; nextln: v7 = iadd v4, v6 - ; nextln: v8 = ishl_imm v7, 32 - ; nextln: v9 = x86_pmuludq v0, v1 - ; nextln: v2 = iadd v9, v8 - return -} - -function %fmin_f32x4(f32x4, f32x4) { -block0(v0:f32x4, v1:f32x4): - v2 = fmin v0, v1 - ; check: v3 = x86_fmin v0, v1 - ; nextln: v4 = x86_fmin v1, v0 - ; nextln: v5 = bor v4, v3 - ; nextln: v6 = fcmp uno v3, v5 - ; nextln: v7 = raw_bitcast.f32x4 v6 - ; nextln: v8 = bor v5, v7 - ; nextln: v9 = raw_bitcast.i32x4 v7 - ; nextln: v10 = ushr_imm v9, 10 - ; nextln: v11 = raw_bitcast.f32x4 v10 - ; nextln: v2 = band_not v8, v11 - return -} - -function %fmax_f64x2(f64x2, f64x2) { -block0(v0:f64x2, v1:f64x2): - v2 = fmax v0, v1 - ; check: v3 = x86_fmax v0, v1 - ; nextln: v4 = x86_fmax v1, v0 - ; nextln: v5 = bxor v3, v4 - ; nextln: v6 = bor v4, v5 - ; nextln: v7 = fsub v6, v5 - ; nextln: v8 = fcmp uno v5, v7 - ; nextln: v9 = raw_bitcast.i64x2 v8 - ; nextln: v10 = ushr_imm v9, 13 - ; nextln: v11 = raw_bitcast.f64x2 v10 - ; nextln: v2 = band_not v7, v11 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif deleted file mode 100644 index 0daf064713..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif +++ /dev/null @@ -1,17 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake has_avx512dq=true - -function %imul_i64x2() { -block0: - [-, %xmm1] v0 = vconst.i64x2 [1 2] - [-, %xmm2] v1 = vconst.i64x2 [2 2] - [-, %xmm14] v2 = x86_pmullq v0, v1 ; bin: 62 72 f5 08 40 f2 - ; 62, mandatory EVEX prefix - ; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38 - ; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01 - ; 08 = 0000 1000, everything, LL' indicates 128-bit, V' is unset (inverted, %xmm1 has MSB of 0) - ; 40, opcode (correct) - ; f2 = 1111 0010, ModR/M byte using 0b110 from %xmm14 in reg and 0b010 from %xmm2 in r/m - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif deleted file mode 100644 index 294902d45b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif +++ /dev/null @@ -1,10 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake has_avx512dq=true - -function %imul_i64x2(i64x2, i64x2) { -block0(v0:i64x2, v1:i64x2): - v2 = imul v0, v1 - ; check: v2 = x86_pmullq v0, v1 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif deleted file mode 100644 index 6f235e6b3b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif +++ /dev/null @@ -1,9 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy has_avx512vl=true - -function %fcvt_from_uint(i32x4) { -block0(v0: i32x4 [%xmm2]): -[-, %xmm6] v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif deleted file mode 100644 index cdadd3254d..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif +++ /dev/null @@ -1,10 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake has_avx512f=true - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0:i32x4): - v1 = fcvt_from_uint.f32x4 v0 - ; check: v1 = x86_vcvtudq2ps v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif deleted file mode 100644 index 3131a8aa0c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif +++ /dev/null @@ -1,99 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %ishl_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psll v0, v1 ; bin: 66 0f f1 d1 - return v2 -} - -function %ishl_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psll v0, v1 ; bin: 66 0f f2 e0 - return v2 -} - -function %ishl_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): -[-, %xmm6] v2 = x86_psll v0, v1 ; bin: 66 0f f3 f3 - return v2 -} - -function %ushr_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psrl v0, v1 ; bin: 66 0f d1 d1 - return v2 -} - -function %ushr_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psrl v0, v1 ; bin: 66 0f d2 e0 - return v2 -} - -function %ushr_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): -[-, %xmm6] v2 = x86_psrl v0, v1 ; bin: 66 0f d3 f3 - return v2 -} - -function %sshr_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psra v0, v1 ; bin: 66 0f e1 d1 - return v2 -} - -function %sshr_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psra v0, v1 ; bin: 66 0f e2 e0 - return v2 -} - -function %ishl_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = ishl_imm v0, 3 ; bin: 66 0f 71 f2 03 - return v2 -} - -function %ishl_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = ishl_imm v0, 10 ; bin: 66 0f 72 f4 0a - return v2 -} - -function %ishl_imm_i64x2(i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6]): -[-, %xmm6] v2 = ishl_imm v0, 42 ; bin: 66 0f 73 f6 2a - return v2 -} - -function %ushr_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = ushr_imm v0, 3 ; bin: 66 0f 71 d2 03 - return v2 -} - -function %ushr_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = ushr_imm v0, 10 ; bin: 66 0f 72 d4 0a - return v2 -} - -function %ushr_imm_i64x2(i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6]): -[-, %xmm6] v2 = ushr_imm v0, 42 ; bin: 66 0f 73 d6 2a - return v2 -} - -function %sshr_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = sshr_imm v0, 3 ; bin: 66 0f 71 e2 03 - return v2 -} - -function %sshr_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = sshr_imm v0, 10 ; bin: 66 0f 72 e4 0a - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif deleted file mode 100644 index 7674f83e01..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ /dev/null @@ -1,111 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %ushr_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = ushr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v4 = raw_bitcast.i16x8 v1 - ; nextln: v5 = x86_psrl v4, v3 - ; nextln: v6 = raw_bitcast.i8x16 v5 - ; nextln: v7 = const_addr.i64 const1 - ; nextln: v8 = ishl_imm v0, 4 - ; nextln: v9 = load_complex.i8x16 v7+v8 - ; nextln: v2 = band v6, v9 - return v2 -} - -function %sshr_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = sshr v1, v0 - ; check: v3 = iadd_imm v0, 8 - ; nextln: v4 = bitcast.i64x2 v3 - - ; nextln: v5 = x86_punpckl v1, v1 - ; nextln: v6 = raw_bitcast.i16x8 v5 - ; nextln: v7 = x86_psra v6, v4 - - ; nextln: v8 = x86_punpckh v1, v1 - ; nextln: v9 = raw_bitcast.i16x8 v8 - ; nextln: v10 = x86_psra v9, v4 - - ; nextln: v2 = snarrow v7, v10 - return v2 -} - -function %ishl_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = ishl v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v4 = raw_bitcast.i16x8 v1 - ; nextln: v5 = x86_psll v4, v3 - ; nextln: v6 = raw_bitcast.i8x16 v5 - ; nextln: v7 = const_addr.i64 const1 - ; nextln: v8 = ishl_imm v0, 4 - ; nextln: v9 = load_complex.i8x16 v7+v8 - ; nextln: v2 = band v6, v9 - return v2 -} - -function %ishl_i32x4() -> i32x4 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i32x4 [1 2 4 8] - v2 = ishl v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psll v1, v3 - return v2 -} - -function %ushr_i64x2() -> i64x2 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i64x2 [1 2] - v2 = ushr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psrl v1, v3 - return v2 -} - -function %sshr_i16x8() -> i16x8 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i16x8 [1 2 4 8 16 32 64 128] - v2 = sshr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psra v1, v3 - return v2 -} - -function %sshr_i64x2() -> i64x2 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i64x2 [1 2] - v2 = sshr v1, v0 - ; check: v3 = x86_pextr v1, 0 - ; nextln: v4 = sshr v3, v0 - ; nextln: v5 = x86_pinsr v1, v4, 0 - ; nextln: v6 = x86_pextr v1, 1 - ; nextln: v7 = sshr v6, v0 - ; nextln: v2 = x86_pinsr v5, v7, 1 - return v2 -} - -function %bitselect_i16x8() -> i16x8 { -block0: - v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v1 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v2 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v3 = bitselect v0, v1, v2 - ; check: v4 = band v1, v0 - ; nextln: v5 = band_not v2, v0 - ; nextln: v3 = bor v4, v5 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif deleted file mode 100644 index 1d3db4a119..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ /dev/null @@ -1,138 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %icmp_i8x16() { -block0: -[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db -[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 -[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc - return -} - -function %icmp_i16x8_rex() { -block0: -[-, %xmm0] v0 = vconst.i16x8 0x00 -[-, %xmm15] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 41 0f 75 c7 - return -} - -function %icmp_i32x4() { -block0: -[-, %xmm0] v0 = vconst.i32x4 0x00 -[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 - return -} - -function %icmp_i64x2_rex() { -block0: -[-, %xmm8] v0 = vconst.i64x2 0x00 -[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff -[-, %xmm8] v2 = icmp eq v0, v1 ; bin: 66 44 0f 38 29 c1 - return -} - -function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 { -block0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]): -[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1 - return v2 -} - -function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 { -block0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]): -[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3 - return v2 -} - -function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 { -block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]): -[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5 - return v2 -} - -function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]): -[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7 - return v2 -} - -function %min_max_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]): -[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9 -[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9 -[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9 -[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9 - return -} - -function %min_max_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]): -[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5 -[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5 -[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5 -[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5 - return -} - -function %min_max_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]): -[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4 -[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4 -[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4 -[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4 - return -} - -function %fcmp_f32x4(f32x4, f32x4) { -block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]): -[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 0f c2 d4 00 -[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 0f c2 d4 01 -[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 0f c2 d4 02 -[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 0f c2 d4 03 -[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 0f c2 d4 04 -[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 0f c2 d4 05 -[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 0f c2 d4 06 -[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 0f c2 d4 07 - return -} - -function %fcmp_f32x4_rex(f32x4, f32x4) { -block0(v0: f32x4 [%xmm8], v1: f32x4 [%xmm8]): -[-, %xmm8] v2 = fcmp eq v0, v1 ; bin: 45 0f c2 c0 00 -[-, %xmm8] v3 = fcmp lt v0, v1 ; bin: 45 0f c2 c0 01 -[-, %xmm8] v4 = fcmp le v0, v1 ; bin: 45 0f c2 c0 02 -[-, %xmm8] v5 = fcmp uno v0, v1 ; bin: 45 0f c2 c0 03 -[-, %xmm8] v6 = fcmp ne v0, v1 ; bin: 45 0f c2 c0 04 -[-, %xmm8] v7 = fcmp uge v0, v1 ; bin: 45 0f c2 c0 05 -[-, %xmm8] v8 = fcmp ugt v0, v1 ; bin: 45 0f c2 c0 06 -[-, %xmm8] v9 = fcmp ord v0, v1 ; bin: 45 0f c2 c0 07 - return -} - -function %fcmp_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]): -[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 0f c2 d0 00 -[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 0f c2 d0 01 -[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 0f c2 d0 02 -[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 0f c2 d0 03 -[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 0f c2 d0 04 -[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 0f c2 d0 05 -[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 0f c2 d0 06 -[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 0f c2 d0 07 - return -} - -function %fcmp_f64x2_rex(f64x2, f64x2) { -block0(v0: f64x2 [%xmm9], v1: f64x2 [%xmm11]): -[-, %xmm9] v2 = fcmp eq v0, v1 ; bin: 66 45 0f c2 cb 00 -[-, %xmm9] v3 = fcmp lt v0, v1 ; bin: 66 45 0f c2 cb 01 -[-, %xmm9] v4 = fcmp le v0, v1 ; bin: 66 45 0f c2 cb 02 -[-, %xmm9] v5 = fcmp uno v0, v1 ; bin: 66 45 0f c2 cb 03 -[-, %xmm9] v6 = fcmp ne v0, v1 ; bin: 66 45 0f c2 cb 04 -[-, %xmm9] v7 = fcmp uge v0, v1 ; bin: 66 45 0f c2 cb 05 -[-, %xmm9] v8 = fcmp ugt v0, v1 ; bin: 66 45 0f c2 cb 06 -[-, %xmm9] v9 = fcmp ord v0, v1 ; bin: 66 45 0f c2 cb 07 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif deleted file mode 100644 index a6324a34cc..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif +++ /dev/null @@ -1,40 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: i32x4, v1: i32x4): - v2 = icmp ne v0, v1 - ; check: v3 = icmp eq v0, v1 - ; nextln: v4 = vconst.b32x4 const0 - ; nextln: v2 = bxor v4, v3 - return v2 -} - -function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: i32x4, v1: i32x4): - v2 = icmp ugt v0, v1 - ; check: v3 = x86_pmaxu v0, v1 - ; nextln: v4 = icmp eq v3, v1 - ; nextln: v5 = vconst.b32x4 const0 - ; nextln: v2 = bxor v5, v4 - return v2 -} - -function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 { -block0(v0: i16x8, v1: i16x8): - v2 = icmp sge v0, v1 - ; check: v3 = x86_pmins v0, v1 - ; nextln: v2 = icmp eq v3, v1 - return v2 -} - -function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 { -block0(v0: i8x16, v1: i8x16): - v2 = icmp uge v0, v1 - ; check: v3 = x86_pminu v0, v1 - ; nextln: v2 = icmp eq v3, v1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif deleted file mode 100644 index f26b436931..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ /dev/null @@ -1,26 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy nehalem - -; Ensure raw_bitcast emits no instructions. -function %raw_bitcast_i16x8_to_b32x4() { -block0: -[-, %rbx] v0 = bconst.b16 true -[-, %xmm2] v1 = scalar_to_vector.b16x8 v0 -[-, %xmm2] v2 = raw_bitcast.i32x4 v1 ; bin: - return -} - -function %conversions_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]): -[-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6 -[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03 - return -} - -function %conversions_i16x8(i16x8) { -block0(v0: i16x8 [%xmm6]): -[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6 -[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif deleted file mode 100644 index 6de14e181a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif +++ /dev/null @@ -1,70 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0:i32x4): - v1 = fcvt_from_uint.f32x4 v0 - ; check: v2 = raw_bitcast.i16x8 v0 - ; nextln: v3 = vconst.i16x8 const0 - ; nextln: v4 = x86_pblendw v3, v2, 85 - ; nextln: v5 = raw_bitcast.i32x4 v4 - ; nextln: v6 = isub v0, v5 - ; nextln: v7 = fcvt_from_sint.f32x4 v5 - ; nextln: v8 = ushr_imm v6, 1 - ; nextln: v9 = fcvt_from_sint.f32x4 v8 - ; nextln: v10 = fadd v9, v9 - ; nextln: v1 = fadd v10, v7 - return v1 -} - -function %fcvt_to_sint_sat(f32x4) -> i32x4 { -block0(v0:f32x4): - v1 = fcvt_to_sint_sat.i32x4 v0 - ; check: v2 = fcmp eq v0, v0 - ; nextln: v3 = raw_bitcast.f32x4 v2 - ; nextln: v4 = band v0, v3 - ; nextln: v5 = bxor v3, v0 - ; nextln: v6 = raw_bitcast.i32x4 v5 - ; nextln: v7 = x86_cvtt2si.i32x4 v4 - ; nextln: v8 = band v6, v7 - ; nextln: v9 = sshr_imm v8, 31 - ; nextln: v1 = bxor v7, v9 - return v1 -} - -function %fcvt_to_uint_sat(f32x4) -> i32x4 { -; check: const0 = 0x00000000000000000000000000000000 -; nextln: const1 = 0x4f0000004f0000004f0000004f000000 -block0(v0:f32x4): - v1 = fcvt_to_uint_sat.i32x4 v0 - ; check: v2 = vconst.f32x4 const0 - ; nextln: v3 = vconst.f32x4 const1 - ; nextln: v4 = x86_fmax v0, v2 - ; nextln: v5 = fsub v4, v3 - ; nextln: v6 = fcmp le v3, v5 - ; nextln: v7 = x86_cvtt2si.i32x4 v5 - ; nextln: v8 = raw_bitcast.i32x4 v6 - ; nextln: v9 = bxor v7, v8 - ; nextln: v10 = raw_bitcast.i32x4 v2 - ; nextln: v11 = x86_pmaxs v9, v10 - ; nextln: v12 = x86_cvtt2si.i32x4 v4 - ; nextln: v1 = iadd v12, v11 - return v1 -} - -function %uwiden_high(i8x16) -> i16x8 { -block0(v0: i8x16): - v1 = uwiden_high v0 - ; check: v2 = x86_palignr v0, v0, 8 - ; nextln: v1 = uwiden_low v2 - return v1 -} - -function %swiden_high(i16x8) -> i32x4 { -block0(v0: i16x8): - v1 = swiden_high v0 - ; check: v2 = x86_palignr v0, v0, 8 - ; nextln: v1 = swiden_low v2 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif deleted file mode 100644 index 6240a08557..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif +++ /dev/null @@ -1,34 +0,0 @@ -test binemit -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy - -;; These scalar_to_vector tests avoid the use of REX prefixes with the speed_and_size optimization flag. - -function %scalar_to_vector_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = scalar_to_vector.b8x16 v0 ; bin: 66 0f 6e c0 - return -} - -function %scalar_to_vector_i16() { -block0: -[-, %rbx] v0 = iconst.i16 42 -[-, %xmm2] v1 = scalar_to_vector.i16x8 v0 ; bin: 66 0f 6e d3 - return -} - -function %scalar_to_vector_b32() { -block0: -[-, %rcx] v0 = bconst.b32 false -[-, %xmm3] v1 = scalar_to_vector.b32x4 v0 ; bin: 66 0f 6e d9 - return -} - -function %scalar_to_vector_i64() { -block0: -[-, %rdx] v0 = iconst.i64 42 -[-, %xmm7] v1 = scalar_to_vector.i64x2 v0 ; bin: 66 48 0f 6e fa - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif deleted file mode 100644 index a8c14a6342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ /dev/null @@ -1,126 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -; for insertlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested - -function %insertlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %rbx] v1 = bconst.b8 false -[-, %xmm0] v2 = splat.b8x16 v0 -[-, %xmm0] v3 = x86_pinsr v2, v1, 10 ; bin: 66 0f 3a 20 c3 0a - return -} - -function %insertlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %rbx] v1 = iconst.i16 5 -[-, %xmm1] v2 = splat.i16x8 v0 -[-, %xmm1] v3 = x86_pinsr v2, v1, 4 ; bin: 66 0f c4 cb 04 - return -} - -function %insertlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %rbx] v1 = iconst.i32 99 -[-, %xmm4] v2 = splat.i32x4 v0 -[-, %xmm4] v3 = x86_pinsr v2, v1, 2 ; bin: 66 0f 3a 22 e3 02 - return -} - -function %insertlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 true -[-, %rbx] v1 = bconst.b64 false -[-, %xmm2] v2 = splat.b64x2 v0 -[-, %xmm2] v3 = x86_pinsr v2, v1, 1 ; bin: 66 48 0f 3a 22 d3 01 - return -} - -; for extractlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested - -function %extractlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = splat.b8x16 v0 -[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a - return -} - -function %extractlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %xmm1] v1 = splat.i16x8 v0 -[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 - return -} - -function %extractlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm4] v1 = splat.i32x4 v0 -[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02 - return -} - -function %extractlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 false -[-, %xmm2] v1 = splat.b64x2 v0 -[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01 - return -} - -;; shuffle - -function %pshufd() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 0f 6e c0 -[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00 - return -} - -function %pshufb() { -block0: -[-, %rax] v0 = iconst.i8 42 -[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 0f 6e c0 -[-, %rbx] v2 = iconst.i8 43 -[-, %xmm12] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 44 0f 6e e3 -[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4 - return -} - -;; blend - -function %pblendw(b16x8, b16x8) { -block0(v0: b16x8 [%xmm10], v1: b16x8 [%xmm2]): -[-, %xmm10] v2 = x86_pblendw v0, v1, 0x55 ; bin: 66 44 0f 3a 0e d2 55 - return -} - -;; pack/unpack - -function %unpack_high_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm0], v1: i8x16 [%xmm12]): -[-, %xmm0] v2 = x86_punpckh v0, v1 ; bin: 66 41 0f 68 c4 - return -} - -function %unpack_low_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]): -[-, %xmm7] v2 = x86_punpckl v0, v1 ; bin: 66 0f 62 fe - return -} - -function %narrowing_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]): -[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8 -[-, %xmm7] v3 = unarrow v0, v1 ; bin: 66 41 0f 67 f8 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif deleted file mode 100644 index 91ff8eb9a0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -set opt_level=speed_and_size -set enable_probestack=false -set enable_simd -target x86_64 legacy - -; Ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) -function %scalar_to_vector_f32() -> f32x4 baldrdash_system_v { -block0: - v0 = f32const 0x0.42 - v1 = scalar_to_vector.f32x4 v0 - return v1 -} - -; check: block0 -; nextln: v2 = iconst.i32 0x3e84_0000 -; nextln: v0 = bitcast.f32 v2 -; nextln: [null_fpr#00,%xmm0] v1 = scalar_to_vector.f32x4 v0 -; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif deleted file mode 100644 index 284ef35180..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif +++ /dev/null @@ -1,101 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -;; shuffle - -function %shuffle_different_ssa_values() -> i8x16 { -; check: const2 = 0x80000000000000000000000000000000 -; nextln: const3 = 0x01808080808080808080808080808080 -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 0x01 - v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 - return v2 -} -; check: v1 = vconst.i8x16 const1 -; nextln: v3 = vconst.i8x16 const2 -; nextln: v4 = x86_pshufb v0, v3 -; nextln: v5 = vconst.i8x16 const3 -; nextln: v6 = x86_pshufb v1, v5 -; nextln: v2 = bor v4, v6 - -function %shuffle_same_ssa_value() -> i8x16 { -; check: const1 = 0x03000000000000000000000000000000 -block0: - v1 = vconst.i8x16 0x01 - v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 - return v2 -} -; check: v1 = vconst.i8x16 const0 -; nextln: v3 = vconst.i8x16 const1 -; nextln: v2 = x86_pshufb v1, v3 - -;; splat - -function %splat_i32() -> i32x4 { -block0: - v0 = iconst.i32 42 - v1 = splat.i32x4 v0 - return v1 -} -; check: block0: -; nextln: v0 = iconst.i32 42 -; nextln: v2 = scalar_to_vector.i32x4 v0 -; nextln: v1 = x86_pshufd v2, 0 -; nextln: return v1 -; nextln: } - -function %splat_i64() -> i64x2 { -block0: - v0 = iconst.i64 42 - v1 = splat.i64x2 v0 - return v1 -} -; check: block0: -; nextln: v0 = iconst.i64 42 -; nextln: v2 = scalar_to_vector.i64x2 v0 -; nextln: v1 = x86_pinsr v2, v0, 1 -; nextln: return v1 - -function %splat_b16() -> b16x8 { -block0: - v0 = bconst.b16 true - v1 = splat.b16x8 v0 - return v1 -} -; check: block0: -; nextln: v0 = bconst.b16 true -; nextln: v2 = scalar_to_vector.b16x8 v0 -; nextln: v3 = x86_pinsr v2, v0, 1 -; nextln: v4 = raw_bitcast.i32x4 v3 -; nextln: v5 = x86_pshufd v4, 0 -; nextln: v1 = raw_bitcast.b16x8 v5 -; nextln: return v1 - -function %splat_i8() -> i8x16 { -; check: const0 = 0x00000000000000000000000000000000 -block0: - v0 = iconst.i8 42 - v1 = splat.i8x16 v0 - return v1 -} -; check: block0: -; nextln: v2 = iconst.i32 42 -; nextln: v0 = ireduce.i8 v2 -; nextln: v3 = scalar_to_vector.i8x16 v0 -; nextln: v4 = vconst.i8x16 const0 -; nextln: v1 = x86_pshufb v3, v4 -; nextln: return v1 - -function %swizzle() -> i8x16 { -; check: const1 = 0x70707070707070707070707070707070 -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = swizzle.i8x16 v0, v1 - ; check: v3 = vconst.i8x16 const1 - ; nextln: v4 = uadd_sat v1, v3 - ; nextln: v2 = x86_pshufb v0, v4 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif deleted file mode 100644 index af5ca0fe63..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif +++ /dev/null @@ -1,33 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %bor_b16x8(b16x8, b16x8) -> b16x8 { -block0(v0: b16x8 [%xmm2], v1: b16x8 [%xmm1]): -[-, %xmm2] v2 = bor v0, v1 ; bin: 66 0f eb d1 - return v2 -} - -function %band_b64x2(b64x2, b64x2) -> b64x2 { -block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): -[-, %xmm6] v2 = band v0, v1 ; bin: 66 0f db f3 - return v2 -} - -function %bxor_b32x4(b32x4, b32x4) -> b32x4 { -block0(v0: b32x4 [%xmm4], v1: b32x4 [%xmm0]): -[-, %xmm4] v2 = bxor v0, v1 ; bin: 66 0f ef e0 - return v2 -} - -function %band_not_b64x2(b64x2, b64x2) -> b64x2 { -block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): -[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de - return v2 -} - -function %x86_ptest_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]): -[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif deleted file mode 100644 index 5e5bb7ac43..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %bnot_b32x4(b32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: b32x4): - v1 = bnot v0 - ; check: v2 = vconst.b32x4 const0 - ; nextln: v1 = bxor v2, v0 - return v1 -} - -function %vany_true_b32x4(b32x4) -> b1 { -block0(v0: b32x4): - v1 = vany_true v0 - ; check: v2 = x86_ptest v0, v0 - ; nextln: v1 = trueif ne v2 - return v1 -} - -function %vall_true_i64x2(i64x2) -> b1 { -; check: const0 = 0x00000000000000000000000000000000 -block0(v0: i64x2): - v1 = vall_true v0 - ; check: v2 = vconst.i64x2 const0 - ; nextln: v3 = icmp eq v0, v2 - ; nextln: v4 = x86_ptest v3, v3 - ; nextln: v1 = trueif eq v4 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif deleted file mode 100644 index 6b6b91a915..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif +++ /dev/null @@ -1,11 +0,0 @@ -test rodata -set enable_simd -target x86_64 legacy skylake - -function %bnot_b32x4(b32x4) -> b32x4 { -block0(v0: b32x4): - v1 = bnot v0 - return v1 -} - -; sameln: [FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF] diff --git a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif deleted file mode 100644 index 4f8b050d01..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif +++ /dev/null @@ -1,85 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %load_store_simple(i64) { -block0(v0: i64 [%rax]): -[-, %xmm0] v10 = load.i32x4 v0 ; bin: heap_oob 0f 10 00 -[-] store v10, v0 ; bin: heap_oob 0f 11 00 - - ; use REX prefix -[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 -[-] store v12, v0 ; bin: heap_oob 44 0f 11 00 - - return -} - -function %load_store_with_displacement(i64) { -block0(v0: i64 [%rax]): - ; use 8-bit displacement -[-, %xmm0] v1 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a -[-] store v1, v0+42 ; bin: heap_oob 0f 11 40 2a - - ; use 8-bit displacement with REX prefix -[-, %xmm8] v2 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 -[-] store v2, v0 ; bin: heap_oob 44 0f 11 00 - - ; use 32-bit displacement -[-, %xmm0] v3 = load.f32x4 v0+256 ; bin: heap_oob 0f 10 80 00000100 -[-] store v3, v0+256 ; bin: heap_oob 0f 11 80 00000100 - - ; use 32-bit displacement with REX prefix -[-, %xmm8] v4 = load.f32x4 v0+256 ; bin: heap_oob 44 0f 10 80 00000100 -[-] store v4, v0+256 ; bin: heap_oob 44 0f 11 80 00000100 - - return -} - -function %load_store_complex(i64, i64) { -block0(v0: i64 [%rax], v1: i64 [%rbx]): - ; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows - ; %rax and %rbx form the SIB 0x18 -[-, %xmm1] v10 = load_complex.f64x2 v0+v1 ; bin: heap_oob 40 0f 10 0c 18 - ; enabling bit 6 of the ModR/M byte indicates a disp8 follows -[-] store_complex v10, v0+v1+5 ; bin: heap_oob 40 0f 11 4c 18 05 - - return -} - -function %copy_to_ssa() { -block0: -[-, %xmm1] v0 = copy_to_ssa.i64x2 %xmm3 ; bin: 40 0f 28 cb -[-, %xmm2] v1 = copy_to_ssa.i64x2 %xmm15 ; bin: 41 0f 28 d7 - - return -} - -function %uload_extend() { -block0: - [-,%rdx] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%xmm2] v3 = uload8x8 v1+0 ; bin: heap_oob 66 0f 38 30 12 - [-,%xmm2] v4 = uload8x8 v1+20 ; bin: heap_oob 66 0f 38 30 52 14 - [-,%xmm2] v5 = uload8x8 v1+256 ; bin: heap_oob 66 0f 38 30 92 00000100 - [-,%xmm2] v6 = uload16x4 v1+0 ; bin: heap_oob 66 0f 38 33 12 - [-,%xmm2] v7 = uload16x4 v1+20 ; bin: heap_oob 66 0f 38 33 52 14 - [-,%xmm2] v8 = uload16x4 v1+256 ; bin: heap_oob 66 0f 38 33 92 00000100 - [-,%xmm10] v9 = uload32x2 v1+0 ; bin: heap_oob 66 44 0f 38 35 12 - [-,%xmm10] v10 = uload32x2 v1+20 ; bin: heap_oob 66 44 0f 38 35 52 14 - [-,%xmm10] v11 = uload32x2 v1+256 ; bin: heap_oob 66 44 0f 38 35 92 00000100 - return -} - -function %sload_extend() { -block0: - [-,%rdx] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%xmm2] v3 = sload8x8 v1+0 ; bin: heap_oob 66 0f 38 20 12 - [-,%xmm2] v4 = sload8x8 v1+20 ; bin: heap_oob 66 0f 38 20 52 14 - [-,%xmm2] v5 = sload8x8 v1+256 ; bin: heap_oob 66 0f 38 20 92 00000100 - [-,%xmm10] v6 = sload16x4 v1+0 ; bin: heap_oob 66 44 0f 38 23 12 - [-,%xmm10] v7 = sload16x4 v1+20 ; bin: heap_oob 66 44 0f 38 23 52 14 - [-,%xmm10] v8 = sload16x4 v1+256 ; bin: heap_oob 66 44 0f 38 23 92 00000100 - [-,%xmm2] v9 = sload32x2 v1+0 ; bin: heap_oob 66 0f 38 25 12 - [-,%xmm2] v10 = sload32x2 v1+20 ; bin: heap_oob 66 0f 38 25 52 14 - [-,%xmm2] v11 = sload32x2 v1+256 ; bin: heap_oob 66 0f 38 25 92 00000100 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif deleted file mode 100644 index 4141a05b32..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif +++ /dev/null @@ -1,22 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v { - ss0 = explicit_slot 32, offset -48 - ss1 = spill_slot 16, offset -64 - ss2 = incoming_arg 16, offset -16 - sig0 = () system_v - fn0 = colocated u0:2 sig0 - -block0(v5: i64 [%rbp]): -[-] x86_push v5 -[-] copy_special %rsp -> %rbp -[-] adjust_sp_down_imm 48 -[-,%rax] v0 = stack_addr.i64 ss0 -[-,%xmm15] v4 = load.i32x4 v0 -[-,%rax] v2 = x86_pextr v4, 1 ; bin: 66 44 0f 3a 16 f8 01 -[-] adjust_sp_up_imm 48 -[-] v6 = x86_pop.i64 -[-] return v2, v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif deleted file mode 100644 index 23aee87655..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif +++ /dev/null @@ -1,29 +0,0 @@ -test binemit -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy - -function %vconst_b8() { -block0: -[-, %xmm2] v0 = vconst.b8x16 0x01 ; bin: 0f 10 15 00000008 PCRelRodata4(15) -[-, %xmm3] v1 = vconst.b8x16 0x02 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) - return -} - -function %vconst_with_preamble() { -const42 = i32x4 [1 0 0 0] -const43 = i32x4 [2 0 0 0] - -block0: -[-, %xmm2] v0 = vconst.i32x4 const42 ; bin: 0f 10 15 00000008 PCRelRodata4(15) -[-, %xmm3] v1 = vconst.i32x4 const43 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) - return -} - -function %address_of_vconst() { -const42 = i32x4 [1 0 0 0] - -block0: -[-, %rax] v0 = const_addr.i64 const42 ; bin: 48 8d 05 00000001 PCRelRodata4(8) - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif deleted file mode 100644 index 477984b344..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif +++ /dev/null @@ -1,16 +0,0 @@ -test compile -set enable_simd=true -set enable_probestack=false -target x86_64 legacy haswell - -; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) -function %vconst_i32() -> i32x4 baldrdash_system_v { -block0: - v0 = vconst.i32x4 0x1234 - return v0 -} -; check: const0 = 0x00000000000000000000000000001234 -; check: block0: -; nextln: v0 = vconst.i32x4 const0 -; nextln: return v0 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif deleted file mode 100644 index 07fa364752..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif +++ /dev/null @@ -1,10 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy - -function %vconst_optimizations() { -block0: -[-, %xmm4] v0 = vconst.b8x16 0x00 ; bin: 66 0f ef e4 -[-, %xmm7] v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 ff - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif deleted file mode 100644 index e7e63e65ea..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif +++ /dev/null @@ -1,49 +0,0 @@ -test rodata -set enable_simd=true -target x86_64 legacy haswell - -function %vconst_i32() -> i32x4 { -block0: - v0 = vconst.i32x4 0x1234 - return v0 -} - -; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - -function %vconst_b16() -> b16x8 { -block0: - v0 = vconst.b16x8 [true false true false true false true true] - return v0 -} - -; sameln: [FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, FF, FF] - - -; Since both jump tables and constants are emitted after the function body, it is important that they do not interfere. -; This test shows that even in the presence of jump tables, constants are emitted correctly -function %vconst_with_jumptables() { -jt0 = jump_table [block0] - -block10: - v10 = iconst.i64 0 - br_table v10, block1, jt0 -block0: - jump block11 -block1: - jump block11 -block11: - v11 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16] - return -} - -; sameln: [1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, 10] - -function %vconst_preamble() -> b16x8 { -const42 = i32x4 [0 1 2 3] -const43 = i32x4 [4 5 6 7] -block0: - v0 = vconst.b16x8 const42 - return v0 -} - -; sameln: [0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0] diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif deleted file mode 100644 index 275a5e4411..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif +++ /dev/null @@ -1,27 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -function %vselect_i8x16(b8x16, i8x16, i8x16) { -block0(v0: b8x16 [%xmm0], v1: i8x16 [%xmm3], v2: i8x16 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 10 eb - return -} - -function %vselect_i16x8(b16x8, i16x8, i16x8) { -block0(v0: b16x8 [%xmm0], v1: i16x8 [%xmm3], v2: i16x8 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 10 eb - return -} - -function %vselect_i32x4(b32x4, i32x4, i32x4) { -block0(v0: b32x4 [%xmm0], v1: i32x4 [%xmm3], v2: i32x4 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 14 eb - return -} - -function %vselect_i64x2(b64x2, i64x2, i64x2) { -block0(v0: b64x2 [%xmm0], v1: i64x2 [%xmm3], v2: i64x2 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 15 eb - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif deleted file mode 100644 index 648b3f5584..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif +++ /dev/null @@ -1,45 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy - -;; Test if vselect gets legalized if BLEND* instructions are not available - -function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 { -block0(v0: b8x16, v1: i8x16, v2: i8x16): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i8x16 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { -block0(v0: b16x8, v1: i16x8, v2: i16x8): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i16x8 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 { -block0(v0: b32x4, v1: i32x4, v2: i32x4): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i32x4 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 { -block0(v0: b64x2, v1: i64x2, v2: i64x2): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i64x2 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif deleted file mode 100644 index f06b3ec0eb..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif +++ /dev/null @@ -1,33 +0,0 @@ -; binary emission of stack address instructions on i686. -test binemit -set opt_level=none -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr32.clif | llvm-mc -show-encoding -triple=i686 -; - -function %stack_addr() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - ss4 = explicit_slot 8, offset 0 - ss5 = explicit_slot 8, offset 1024 - -block0: -[-,%rcx] v0 = stack_addr.i32 ss0 ; bin: 8d 8c 24 00000808 -[-,%rcx] v1 = stack_addr.i32 ss1 ; bin: 8d 8c 24 00000408 -[-,%rcx] v2 = stack_addr.i32 ss2 ; bin: 8d 8c 24 00000008 -[-,%rcx] v3 = stack_addr.i32 ss3 ; bin: 8d 8c 24 00000000 -[-,%rcx] v4 = stack_addr.i32 ss4 ; bin: 8d 8c 24 00000808 -[-,%rcx] v5 = stack_addr.i32 ss5 ; bin: 8d 8c 24 00000c08 - -[-,%rcx] v20 = stack_addr.i32 ss4+1 ; bin: 8d 8c 24 00000809 -[-,%rcx] v21 = stack_addr.i32 ss4+2 ; bin: 8d 8c 24 0000080a -[-,%rcx] v22 = stack_addr.i32 ss4+2048 ; bin: 8d 8c 24 00001008 -[-,%rcx] v23 = stack_addr.i32 ss4-4096 ; bin: 8d 8c 24 fffff808 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif deleted file mode 100644 index 5b8d5d7ab7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif +++ /dev/null @@ -1,45 +0,0 @@ -; binary emission of stack address instructions on x86-64. -test binemit -set opt_level=none -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %stack_addr() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - ss4 = explicit_slot 8, offset 0 - ss5 = explicit_slot 8, offset 1024 - -block0: -[-,%rcx] v0 = stack_addr.i64 ss0 ; bin: 48 8d 8c 24 00000808 -[-,%rcx] v1 = stack_addr.i64 ss1 ; bin: 48 8d 8c 24 00000408 -[-,%rcx] v2 = stack_addr.i64 ss2 ; bin: 48 8d 8c 24 00000008 -[-,%rcx] v3 = stack_addr.i64 ss3 ; bin: 48 8d 8c 24 00000000 -[-,%rcx] v4 = stack_addr.i64 ss4 ; bin: 48 8d 8c 24 00000808 -[-,%rcx] v5 = stack_addr.i64 ss5 ; bin: 48 8d 8c 24 00000c08 - -[-,%rcx] v20 = stack_addr.i64 ss4+1 ; bin: 48 8d 8c 24 00000809 -[-,%rcx] v21 = stack_addr.i64 ss4+2 ; bin: 48 8d 8c 24 0000080a -[-,%rcx] v22 = stack_addr.i64 ss4+2048 ; bin: 48 8d 8c 24 00001008 -[-,%rcx] v23 = stack_addr.i64 ss4-4096 ; bin: 48 8d 8c 24 fffff808 - -[-,%r8] v50 = stack_addr.i64 ss0 ; bin: 4c 8d 84 24 00000808 -[-,%r8] v51 = stack_addr.i64 ss1 ; bin: 4c 8d 84 24 00000408 -[-,%r8] v52 = stack_addr.i64 ss2 ; bin: 4c 8d 84 24 00000008 -[-,%r8] v53 = stack_addr.i64 ss3 ; bin: 4c 8d 84 24 00000000 -[-,%r8] v54 = stack_addr.i64 ss4 ; bin: 4c 8d 84 24 00000808 -[-,%r8] v55 = stack_addr.i64 ss5 ; bin: 4c 8d 84 24 00000c08 - -[-,%r8] v70 = stack_addr.i64 ss4+1 ; bin: 4c 8d 84 24 00000809 -[-,%r8] v71 = stack_addr.i64 ss4+2 ; bin: 4c 8d 84 24 0000080a -[-,%r8] v72 = stack_addr.i64 ss4+2048 ; bin: 4c 8d 84 24 00001008 -[-,%r8] v73 = stack_addr.i64 ss4-4096 ; bin: 4c 8d 84 24 fffff808 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif deleted file mode 100644 index 508fae04d2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif +++ /dev/null @@ -1,21 +0,0 @@ -; legalization of stack load and store instructions on x86-64. -test legalizer -set opt_level=none -target x86_64 legacy haswell - -function %stack_load_and_store() { - ss0 = explicit_slot 8, offset 0 - -block0: - v0 = stack_load.i64 ss0 - -; check: v1 = stack_addr.i64 ss0 -; check: v0 = load.i64 notrap aligned v1 - - stack_store.i64 v0, ss0 - -; check: v2 = stack_addr.i64 ss0 -; check: store notrap aligned v0, v2 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif deleted file mode 100644 index 0a9f973fac..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i8 { - ss0 = explicit_slot 1 - -block0(v0: i8): - stack_store v0, ss0 - ; check: v2 = stack_addr.i64 ss0 - ; nextln: v3 = uextend.i32 v0 - ; nextln: istore8 notrap aligned v3, v2 - - v1 = stack_load.i8 ss0 - ; check: v4 = stack_addr.i64 ss0 - ; nextln: v5 = uload8.i32 notrap aligned v4 - ; nextln: v1 = ireduce.i8 v5 - - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/struct-arg.clif b/cranelift/filetests/filetests/isa/x86/struct-arg.clif deleted file mode 100644 index 8358e8633a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/struct-arg.clif +++ /dev/null @@ -1,117 +0,0 @@ -test compile -set is_pic -target x86_64 legacy - -function u0:0(i64 sarg(64)) -> i8 system_v { -block0(v0: i64): - v1 = load.i8 v0 - return v1 -} - -; check: function u0:0(sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = incoming_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 - -; check: block0(v3: sarg_t [ss0], v5: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v5 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1spaddr_id#808d,%rax] v2 = stack_addr.i64 ss0 -; nextln: v0 -> v2 -; nextln: [RexOp2ld#4b6,%rax] v4 = uload8.i32 v2 -; nextln: [null#00,%rax] v1 = ireduce.i8 v4 -; nextln: [RexOp1popq#58,%rbp] v6 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1, v6 -; nextln: } - -function u0:1(i64, i64 sarg(64)) -> i8 system_v { -block0(v0: i64, v1: i64): - v2 = load.i8 v1 - return v2 -} - -; check: function u0:1(i64 [%rdi], sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = incoming_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 - -; check: block0(v0: i64 [%rdi], v4: sarg_t [ss0], v6: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v6 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1spaddr_id#808d,%rax] v3 = stack_addr.i64 ss0 -; nextln: v1 -> v3 -; nextln: [RexOp2ld#4b6,%rax] v5 = uload8.i32 v3 -; nextln: [null#00,%rax] v2 = ireduce.i8 v5 -; nextln: [RexOp1popq#58,%rbp] v7 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2, v7 -; nextln: } - - -function u0:2(i64) -> i8 system_v { -fn1 = u0:0(i64 sarg(64)) -> i8 system_v - -block0(v0: i64): - v1 = call fn1(v0) - return v1 -} - -; check: function u0:2(i64 [%rdi], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = outgoing_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (sarg_t sarg(64) [0]) -> i8 [%rax] system_v -; nextln: sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v -; nextln: fn1 = u0:0 sig0 -; nextln: fn2 = %Memcpy sig1 - -; check: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v5 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_ib#d083] adjust_sp_down_imm 64 -; nextln: [RexOp1spaddr_id#808d,%rax] v2 = stack_addr.i64 ss0 -; nextln: [RexOp1pu_id#b8,%rcx] v3 = iconst.i64 64 -; nextln: [RexOp1rmov#8089] regmove v0, %rdi -> %rsi -; nextln: [RexOp1rmov#8089] regmove v2, %rax -> %rdi -; nextln: [RexOp1rmov#8089] regmove v3, %rcx -> %rdx -; nextln: [Op1call_plt_id#e8] call fn2(v2, v0, v3) -; nextln: [dummy_sarg_t#00,ss0] v4 = dummy_sarg_t -; nextln: [Op1call_plt_id#e8,%rax] v1 = call fn1(v4) -; nextln: [RexOp1adjustsp_ib#8083] adjust_sp_up_imm 64 -; nextln: [RexOp1popq#58,%rbp] v6 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1, v6 -; nextln: } - -function u0:3(i64, i64) -> i8 system_v { -fn1 = u0:0(i64, i64 sarg(64)) -> i8 system_v - -block0(v0: i64, v1: i64): - v2 = call fn1(v0, v1) - return v2 -} - -; check: function u0:3(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%r15]) -> i8 [%rax], i64 fp [%rbp], i64 csr [%r15] system_v { -; nextln: ss0 = outgoing_arg 64, offset 0 -; nextln: ss1 = spill_slot 8, offset -32 -; nextln: ss2 = incoming_arg 24, offset -24 -; nextln: sig0 = (i64 [%rdi], sarg_t sarg(64) [0]) -> i8 [%rax] system_v -; nextln: sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v -; nextln: fn1 = u0:0 sig0 -; nextln: fn2 = %Memcpy sig1 - -; check: block0(v6: i64 [%rdi], v1: i64 [%rsi], v8: i64 [%rbp], v9: i64 [%r15]): -; nextln: [RexOp1pushq#50] x86_push v8 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pushq#50] x86_push v9 -; nextln: [RexOp1adjustsp_ib#d083] adjust_sp_down_imm 72 -; nextln: [RexOp1spillSib32#8089,ss1] v0 = spill v6 -; nextln: [RexOp1spaddr_id#808d,%rax] v3 = stack_addr.i64 ss0 -; nextln: [RexOp1pu_id#b8,%rcx] v4 = iconst.i64 64 -; nextln: [RexOp1rmov#8089] regmove v3, %rax -> %rdi -; nextln: [RexOp1rmov#8089] regmove v4, %rcx -> %rdx -; nextln: [Op1call_plt_id#e8] call fn2(v3, v1, v4) -; nextln: [dummy_sarg_t#00,ss0] v5 = dummy_sarg_t -; nextln: [RexOp1fillSib32#808b,%r15] v7 = fill v0 -; nextln: [RexOp1rmov#8089] regmove v7, %r15 -> %rdi -; nextln: [Op1call_plt_id#e8,%rax] v2 = call fn1(v7, v5) -; nextln: [RexOp1adjustsp_ib#8083] adjust_sp_up_imm 72 -; nextln: [RexOp1popq#58,%r15] v11 = x86_pop.i64 -; nextln: [RexOp1popq#58,%rbp] v10 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2, v10, v11 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif deleted file mode 100644 index c5144bfd97..0000000000 --- a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif +++ /dev/null @@ -1,205 +0,0 @@ -test unwind -set opt_level=speed_and_size -set is_pic -target x86_64-linux legacy haswell - -; check the unwind information with a function with no args -function %no_args() system_v { -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x0000000000000006 (end_addr = 0x0000000000000006) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop - -; check a function with medium-sized stack alloc -function %medium_stack() system_v { - ss0 = explicit_slot 100000 -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x000000000000001a (end_addr = 0x000000000000001a) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (21) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop - -; check a function with large-sized stack alloc -function %large_stack() system_v { - ss0 = explicit_slot 524288 -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x000000000000001a (end_addr = 0x000000000000001a) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (21) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop -; nextln: - -; check a function that has CSRs -function %lots_of_registers(i64, i64) system_v { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000044 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x0000000000000074 (end_addr = 0x0000000000000074) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_offset (r3, 3) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r12, 4) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r13, 5) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r14, 6) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r15, 7) -; nextln: DW_CFA_advance_loc (94) -; nextln: DW_CFA_same_value (r15) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r14) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r13) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r12) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r3) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop diff --git a/cranelift/filetests/filetests/isa/x86/tls_elf.clif b/cranelift/filetests/filetests/isa/x86/tls_elf.clif deleted file mode 100644 index 2c957e0b9a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_elf.clif +++ /dev/null @@ -1,18 +0,0 @@ -test regalloc -set tls_model=elf_gd -target x86_64 legacy - -function u0:0(i32) -> i32, i64 { -gv0 = symbol colocated tls u1:0 - -block0(v0: i32): - ; check: block0(v2: i32 [%rdi]): - ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 - v1 = global_value.i64 gv0 - ; nextln: [elf_tls_get_addr#00,%rax] v1 = x86_elf_tls_get_addr gv0 - ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 - return v0, v1 - ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx - ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax - ; nextln: [Op1ret#c3] return v3, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/tls_enc.clif b/cranelift/filetests/filetests/isa/x86/tls_enc.clif deleted file mode 100644 index d3481a15bf..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_enc.clif +++ /dev/null @@ -1,11 +0,0 @@ -test binemit -target x86_64 legacy - -function u0:0() -> i64, i64 { -gv0 = symbol colocated tls u1:0 - -block0: - [-, %rax] v0 = x86_elf_tls_get_addr gv0 ; bin: 66 48 8d 3d ElfX86_64TlsGd(u1:0-4) 00000000 66 66 48 e8 CallPLTRel4(%ElfTlsGetAddr-4) 00000000 - [-, %rax] v1 = x86_macho_tls_get_addr gv0; bin: 48 8b 3d MachOX86_64Tlv(u1:0-4) 00000000 ff 17 - return v0, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/tls_macho.clif b/cranelift/filetests/filetests/isa/x86/tls_macho.clif deleted file mode 100644 index 3747ac9f05..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_macho.clif +++ /dev/null @@ -1,18 +0,0 @@ -test regalloc -set tls_model=macho -target x86_64 legacy - -function u0:0(i32) -> i32, i64 { -gv0 = symbol colocated tls u1:0 - -block0(v0: i32): - ; check: block0(v2: i32 [%rdi]): - ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 - v1 = global_value.i64 gv0 - ; nextln: [macho_tls_get_addr#00,%rax] v1 = x86_macho_tls_get_addr gv0 - ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 - return v0, v1 - ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx - ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax - ; nextln: [Op1ret#c3] return v3, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif deleted file mode 100644 index 931b6e0aca..0000000000 --- a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i16 fast { -block0(v0: i8): - v1 = uextend.i16 v0 - return v1 -} - -function u0:1(i8) -> i16 fast { -block0(v0: i8): - v1 = sextend.i16 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif deleted file mode 100644 index 13cf504d13..0000000000 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ /dev/null @@ -1,255 +0,0 @@ -test compile -set opt_level=speed_and_size -set is_pic -target x86_64 legacy haswell - -; check if for one arg we use the right register -function %one_arg(i64) windows_fastcall { -block0(v0: i64): - return -} -; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rbp]): -; nextln: x86_push v1 -; nextln: copy_special %rsp -> %rbp -; nextln: v2 = x86_pop.i64 -; nextln: return v2 -; nextln: } - -; check if we still use registers for 4 arguments -function %four_args(i64, i64, i64, i64) windows_fastcall { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - return -} -; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -; check if float arguments are passed through XMM registers -function %four_float_args(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): - return -} -; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -; check if we use stack space for > 4 arguments -function %five_args(i64, i64, i64, i64, i64) windows_fastcall { -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64): - return -} -; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 8, offset 32 -; nextln: ss1 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]): -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } - -; check that we preserve xmm6 and above if we're using them locally -function %float_callee_saves(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): -; explicitly use a callee-save register -[-, %xmm6] v4 = fadd v0, v1 -[-, %xmm7] v5 = fadd v0, v1 - return -} -; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall { -; nextln: ss0 = incoming_arg 48, offset -48 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]): -; nextln: x86_push v7 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 32 -; nextln: store notrap aligned v8, v6+16 -; nextln: store notrap aligned v9, v6 -; nextln: v11 = load.f64x2 notrap aligned v6+16 -; nextln: v12 = load.f64x2 notrap aligned v6 -; nextln: adjust_sp_up_imm 32 -; nextln: v10 = x86_pop.i64 -; nextln: return v10, v11, v12 -; nextln: } - -function %mixed_int_float(i64, f64, i64, f32) windows_fastcall { -block0(v0: i64, v1: f64, v2: i64, v3: f32): - return -} -; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall { -block0(v0: f32, v1: f64, v2: i64, v3: i64): - return v1 -} -; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: regmove v1, %xmm1 -> %xmm0 -; nextln: v5 = x86_pop.i64 -; nextln: return v1, v5 -; nextln: } - -function %ret_val_i128(i64, i64) -> i128 windows_fastcall { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - return v2 -} -; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall { - -; check if i128 is passed by reference -function %i128_arg(i128) windows_fastcall { -block0(v0: i128): - return -} -; check: function %i128_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { - -; check if vector types are passed by reference -function %i32x4_arg(i32x4) windows_fastcall { -block0(v0: i32x4): - return -} -; check: function %i32x4_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { - -function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall { - fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall - fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall -block0(v0: i64): - v1 = load.i64 v0+0 - v2 = load.i64 v0+8 - v3 = load.i64 v0+16 - v4 = load.i64 v0+24 - v5 = load.i64 v0+32 - v6 = load.i64 v0+40 - v7 = load.i64 v0+48 - v8 = load.i64 v0+56 - v9 = load.i64 v0+64 - v10 = call fn0(v1, v2, v3, v4) - store.i64 v1, v0+8 - store.i64 v2, v0+16 - store.i64 v3, v0+24 - store.i64 v4, v0+32 - store.i64 v5, v0+40 - store.i64 v6, v0+48 - store.i64 v7, v0+56 - store.i64 v8, v0+64 - store.i64 v9, v0+72 - return v10 -} -; check: function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall { -; nextln: ss0 = spill_slot 8, offset -56 -; nextln: ss1 = spill_slot 8, offset -64 -; nextln: ss2 = spill_slot 8, offset -72 -; nextln: ss3 = spill_slot 8, offset -80 -; nextln: ss4 = spill_slot 8, offset -88 -; nextln: ss5 = spill_slot 8, offset -96 -; nextln: ss6 = spill_slot 8, offset -104 -; nextln: ss7 = spill_slot 8, offset -112 -; nextln: ss8 = spill_slot 8, offset -120 -; nextln: ss9 = spill_slot 8, offset -128 -; nextln: ss10 = incoming_arg 48, offset -48 -; nextln: ss11 = explicit_slot 32, offset -160 -; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall -; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall -; nextln: fn0 = %foo sig0 -; nextln: fn1 = %foo2 sig1 -; check: block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]): -; nextln: x86_push v52 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v53 -; nextln: x86_push v54 -; nextln: x86_push v55 -; nextln: x86_push v56 -; nextln: adjust_sp_down_imm 112 -; nextln: v0 = spill v11 -; nextln: v12 = copy_to_ssa.i64 %rcx -; nextln: v13 = load.i64 v12 -; nextln: v1 = spill v13 -; nextln: v14 = fill_nop v0 -; nextln: v15 = load.i64 v14+8 -; nextln: v2 = spill v15 -; nextln: v16 = fill_nop v0 -; nextln: v17 = load.i64 v16+16 -; nextln: v3 = spill v17 -; nextln: v18 = fill_nop v0 -; nextln: v19 = load.i64 v18+24 -; nextln: v4 = spill v19 -; nextln: v20 = fill_nop v0 -; nextln: v21 = load.i64 v20+32 -; nextln: v5 = spill v21 -; nextln: v22 = fill_nop v0 -; nextln: v23 = load.i64 v22+40 -; nextln: v6 = spill v23 -; nextln: v24 = fill_nop v0 -; nextln: v25 = load.i64 v24+48 -; nextln: v7 = spill v25 -; nextln: v26 = fill_nop v0 -; nextln: v27 = load.i64 v26+56 -; nextln: v8 = spill v27 -; nextln: v28 = fill_nop v0 -; nextln: v29 = load.i64 v28+64 -; nextln: v9 = spill v29 -; nextln: v30 = fill v1 -; nextln: v31 = fill v2 -; nextln: v32 = fill v3 -; nextln: v33 = fill v4 -; nextln: regmove v30, %r15 -> %rcx -; nextln: regmove v31, %r14 -> %rdx -; nextln: regmove v32, %r13 -> %r8 -; nextln: regmove v33, %r12 -> %r9 -; nextln: v10 = call fn0(v30, v31, v32, v33) -; nextln: v34 = fill v1 -; nextln: v35 = fill v0 -; nextln: store v34, v35+8 -; nextln: v36 = fill v2 -; nextln: v37 = fill_nop v0 -; nextln: store v36, v37+16 -; nextln: v38 = fill v3 -; nextln: v39 = fill_nop v0 -; nextln: store v38, v39+24 -; nextln: v40 = fill v4 -; nextln: v41 = fill_nop v0 -; nextln: store v40, v41+32 -; nextln: v42 = fill v5 -; nextln: v43 = fill_nop v0 -; nextln: store v42, v43+40 -; nextln: v44 = fill v6 -; nextln: v45 = fill_nop v0 -; nextln: store v44, v45+48 -; nextln: v46 = fill v7 -; nextln: v47 = fill_nop v0 -; nextln: store v46, v47+56 -; nextln: v48 = fill v8 -; nextln: v49 = fill_nop v0 -; nextln: store v48, v49+64 -; nextln: v50 = fill v9 -; nextln: v51 = fill_nop v0 -; nextln: store v50, v51+72 -; nextln: adjust_sp_up_imm 112 -; nextln: v61 = x86_pop.i64 -; nextln: v60 = x86_pop.i64 -; nextln: v59 = x86_pop.i64 -; nextln: v58 = x86_pop.i64 -; nextln: v57 = x86_pop.i64 -; nextln: return v10, v57, v58, v59, v60, v61 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif deleted file mode 100644 index 547e131fbd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ /dev/null @@ -1,250 +0,0 @@ -test unwind -set opt_level=speed_and_size -set is_pic -target x86_64-windows legacy haswell - -; check the unwind information with a leaf function with no args -function %no_args_leaf() windows_fastcall { -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 4 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 1 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 - -; check the unwind information with a non-leaf function with no args -function %no_args() windows_fastcall { - fn0 = %foo() -block0: - call fn0() - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 8 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 8 -; nextln: op: SmallStackAlloc -; nextln: info: 3 - -; check a function with medium-sized stack alloc -function %medium_stack() windows_fastcall { - ss0 = explicit_slot 100000 -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 17 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 17 -; nextln: op: LargeStackAlloc -; nextln: info: 0 -; nextln: value: 12500 (u16) - -; check a function with large-sized stack alloc -function %large_stack() windows_fastcall { - ss0 = explicit_slot 524288 -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 17 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 17 -; nextln: op: LargeStackAlloc -; nextln: info: 1 -; nextln: value: 524288 (u32) - -function %fpr_with_function_call(i64, i64) windows_fastcall { - fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall; -block0(v0: i64, v1: i64): - v2 = load.f64 v0+0 - v3 = load.f64 v0+8 - v4 = load.i64 v0+16 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - call fn0(v2, v3, v4, v1, v1) - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; Only check the first unwind code here because this test specifically looks to -; see that in a function that is not a leaf, a callee-save FPR is stored in an -; area that does not overlap either the callee's shadow space or stack argument -; space. -; -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 22 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 4 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 6 -; nextln: op: PushNonvolatileRegister -; nextln: info: 15 -; nextln: -; nextln: offset: 13 -; nextln: op: LargeStackAlloc -; nextln: info: 0 -; nextln: value: 23 (u16) -; nextln: -; nextln: offset: 22 -; nextln: op: SaveXmm128 -; nextln: info: 15 -; nextln: value: 10 (u16) - -; check a function that has CSRs -function %lots_of_registers(i64, i64) windows_fastcall { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 35 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 12 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 5 -; nextln: op: PushNonvolatileRegister -; nextln: info: 3 -; nextln: -; nextln: offset: 6 -; nextln: op: PushNonvolatileRegister -; nextln: info: 6 -; nextln: -; nextln: offset: 7 -; nextln: op: PushNonvolatileRegister -; nextln: info: 7 -; nextln: -; nextln: offset: 9 -; nextln: op: PushNonvolatileRegister -; nextln: info: 12 -; nextln: -; nextln: offset: 11 -; nextln: op: PushNonvolatileRegister -; nextln: info: 13 -; nextln: -; nextln: offset: 13 -; nextln: op: PushNonvolatileRegister -; nextln: info: 14 -; nextln: -; nextln: offset: 15 -; nextln: op: PushNonvolatileRegister -; nextln: info: 15 -; nextln: -; nextln: offset: 19 -; nextln: op: SmallStackAlloc -; nextln: info: 8 -; nextln: -; nextln: offset: 24 -; nextln: op: SaveXmm128 -; nextln: info: 6 -; nextln: value: 3 (u16) -; nextln: -; nextln: offset: 29 -; nextln: op: SaveXmm128 -; nextln: info: 7 -; nextln: value: 2 (u16) -; nextln: -; nextln: offset: 35 -; nextln: op: SaveXmm128 -; nextln: info: 8 -; nextln: value: 1 (u16) diff --git a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif deleted file mode 100644 index b58bf9bcb5..0000000000 --- a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif +++ /dev/null @@ -1,89 +0,0 @@ -test legalizer -target x86_64 legacy - -function %reverse_bits(i128) -> i128 { -block0(v0: i128): - v1 = bitrev.i128 v0 - return v1 -} - -; check: block0(v2: i64, v3: i64): -; check: v0 = iconcat v2, v3 -; check: v33 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v6 = band v2, v33 -; check: v7 = ushr_imm v6, 1 -; check: v34 = iconst.i64 0x5555_5555_5555_5555 -; check: v8 = band v2, v34 -; check: v9 = ishl_imm v8, 1 -; check: v10 = bor v7, v9 -; check: v35 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v11 = band v10, v35 -; check: v12 = ushr_imm v11, 2 -; check: v36 = iconst.i64 0x3333_3333_3333_3333 -; check: v13 = band v10, v36 -; check: v14 = ishl_imm v13, 2 -; check: v15 = bor v12, v14 -; check: v37 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v16 = band v15, v37 -; check: v17 = ushr_imm v16, 4 -; check: v38 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v18 = band v15, v38 -; check: v19 = ishl_imm v18, 4 -; check: v20 = bor v17, v19 -; check: v39 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v21 = band v20, v39 -; check: v22 = ushr_imm v21, 8 -; check: v40 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v23 = band v20, v40 -; check: v24 = ishl_imm v23, 8 -; check: v25 = bor v22, v24 -; check: v41 = iconst.i64 0xffff_0000_ffff_0000 -; check: v26 = band v25, v41 -; check: v27 = ushr_imm v26, 16 -; check: v42 = iconst.i64 0xffff_0000_ffff -; check: v28 = band v25, v42 -; check: v29 = ishl_imm v28, 16 -; check: v30 = bor v27, v29 -; check: v31 = ushr_imm v30, 32 -; check: v32 = ishl_imm v30, 32 -; check: v4 = bor v31, v32 -; check: v70 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v43 = band v3, v70 -; check: v44 = ushr_imm v43, 1 -; check: v71 = iconst.i64 0x5555_5555_5555_5555 -; check: v45 = band v3, v71 -; check: v46 = ishl_imm v45, 1 -; check: v47 = bor v44, v46 -; check: v72 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v48 = band v47, v72 -; check: v49 = ushr_imm v48, 2 -; check: v73 = iconst.i64 0x3333_3333_3333_3333 -; check: v50 = band v47, v73 -; check: v51 = ishl_imm v50, 2 -; check: v52 = bor v49, v51 -; check: v74 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v53 = band v52, v74 -; check: v54 = ushr_imm v53, 4 -; check: v75 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v55 = band v52, v75 -; check: v56 = ishl_imm v55, 4 -; check: v57 = bor v54, v56 -; check: v76 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v58 = band v57, v76 -; check: v59 = ushr_imm v58, 8 -; check: v77 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v60 = band v57, v77 -; check: v61 = ishl_imm v60, 8 -; check: v62 = bor v59, v61 -; check: v78 = iconst.i64 0xffff_0000_ffff_0000 -; check: v63 = band v62, v78 -; check: v64 = ushr_imm v63, 16 -; check: v79 = iconst.i64 0xffff_0000_ffff -; check: v65 = band v62, v79 -; check: v66 = ishl_imm v65, 16 -; check: v67 = bor v64, v66 -; check: v68 = ushr_imm v67, 32 -; check: v69 = ishl_imm v67, 32 -; check: v5 = bor v68, v69 -; check: v1 = iconcat v5, v4 -; check: return v5, v4 diff --git a/cranelift/filetests/filetests/legalizer/bitrev.clif b/cranelift/filetests/filetests/legalizer/bitrev.clif deleted file mode 100644 index 6c9ead0fe2..0000000000 --- a/cranelift/filetests/filetests/legalizer/bitrev.clif +++ /dev/null @@ -1,206 +0,0 @@ -test legalizer -target x86_64 legacy - -function %reverse_bits_8(i8) -> i8 { -block0(v0: i8): - v1 = bitrev.i8 v0 - return v1 -} -; check: v16 = uextend.i32 v0 -; check: v17 = band_imm v16, 170 -; check: v2 = ireduce.i8 v17 -; check: v18 = uextend.i32 v2 -; check: v19 = ushr_imm v18, 1 -; check: v3 = ireduce.i8 v19 -; check: v20 = uextend.i32 v0 -; check: v21 = band_imm v20, 85 -; check: v4 = ireduce.i8 v21 -; check: v22 = uextend.i32 v4 -; check: v23 = ishl_imm v22, 1 -; check: v5 = ireduce.i8 v23 -; check: v24 = uextend.i32 v3 -; check: v25 = uextend.i32 v5 -; check: v26 = bor v24, v25 -; check: v6 = ireduce.i8 v26 -; check: v27 = uextend.i32 v6 -; check: v28 = band_imm v27, 204 -; check: v7 = ireduce.i8 v28 -; check: v29 = uextend.i32 v7 -; check: v30 = ushr_imm v29, 2 -; check: v8 = ireduce.i8 v30 -; check: v31 = uextend.i32 v6 -; check: v32 = band_imm v31, 51 -; check: v9 = ireduce.i8 v32 -; check: v33 = uextend.i32 v9 -; check: v34 = ishl_imm v33, 2 -; check: v10 = ireduce.i8 v34 -; check: v35 = uextend.i32 v8 -; check: v36 = uextend.i32 v10 -; check: v37 = bor v35, v36 -; check: v11 = ireduce.i8 v37 -; check: v38 = uextend.i32 v11 -; check: v39 = band_imm v38, 240 -; check: v12 = ireduce.i8 v39 -; check: v40 = uextend.i32 v12 -; check: v41 = ushr_imm v40, 4 -; check: v13 = ireduce.i8 v41 -; check: v42 = uextend.i32 v11 -; check: v43 = band_imm v42, 15 -; check: v14 = ireduce.i8 v43 -; check: v44 = uextend.i32 v14 -; check: v45 = ishl_imm v44, 4 -; check: v15 = ireduce.i8 v45 -; check: v46 = uextend.i32 v13 -; check: v47 = uextend.i32 v15 -; check: v48 = bor v46, v47 -; check: v1 = ireduce.i8 v48 -; check: return v1 - -function %reverse_bits_16(i16) -> i16 { -block0(v0: i16): - v1 = bitrev.i16 v0 - return v1 -} -; check: v21 = uextend.i32 v0 -; check: v22 = band_imm v21, 0xaaaa -; check: v2 = ireduce.i16 v22 -; check: v23 = uextend.i32 v2 -; check: v24 = ushr_imm v23, 1 -; check: v3 = ireduce.i16 v24 -; check: v25 = uextend.i32 v0 -; check: v26 = band_imm v25, 0x5555 -; check: v4 = ireduce.i16 v26 -; check: v27 = uextend.i32 v4 -; check: v28 = ishl_imm v27, 1 -; check: v5 = ireduce.i16 v28 -; check: v29 = uextend.i32 v3 -; check: v30 = uextend.i32 v5 -; check: v31 = bor v29, v30 -; check: v6 = ireduce.i16 v31 -; check: v32 = uextend.i32 v6 -; check: v33 = band_imm v32, 0xcccc -; check: v7 = ireduce.i16 v33 -; check: v34 = uextend.i32 v7 -; check: v35 = ushr_imm v34, 2 -; check: v8 = ireduce.i16 v35 -; check: v36 = uextend.i32 v6 -; check: v37 = band_imm v36, 0x3333 -; check: v9 = ireduce.i16 v37 -; check: v38 = uextend.i32 v9 -; check: v39 = ishl_imm v38, 2 -; check: v10 = ireduce.i16 v39 -; check: v40 = uextend.i32 v8 -; check: v41 = uextend.i32 v10 -; check: v42 = bor v40, v41 -; check: v11 = ireduce.i16 v42 -; check: v43 = uextend.i32 v11 -; check: v44 = band_imm v43, 0xf0f0 -; check: v12 = ireduce.i16 v44 -; check: v45 = uextend.i32 v12 -; check: v46 = ushr_imm v45, 4 -; check: v13 = ireduce.i16 v46 -; check: v47 = uextend.i32 v11 -; check: v48 = band_imm v47, 3855 -; check: v14 = ireduce.i16 v48 -; check: v49 = uextend.i32 v14 -; check: v50 = ishl_imm v49, 4 -; check: v15 = ireduce.i16 v50 -; check: v51 = uextend.i32 v13 -; check: v52 = uextend.i32 v15 -; check: v53 = bor v51, v52 -; check: v16 = ireduce.i16 v53 -; check: v54 = uextend.i32 v16 -; check: v55 = band_imm v54, 0xff00 -; check: v17 = ireduce.i16 v55 -; check: v56 = uextend.i32 v17 -; check: v57 = ushr_imm v56, 8 -; check: v18 = ireduce.i16 v57 -; check: v58 = uextend.i32 v16 -; check: v59 = band_imm v58, 255 -; check: v19 = ireduce.i16 v59 -; check: v60 = uextend.i32 v19 -; check: v61 = ishl_imm v60, 8 -; check: v20 = ireduce.i16 v61 -; check: v62 = uextend.i32 v18 -; check: v63 = uextend.i32 v20 -; check: v64 = bor v62, v63 -; check: v1 = ireduce.i16 v64 -; check: return v1 - -function %reverse_bits_32(i32) -> i32 { -block0(v0: i32): - v1 = bitrev.i32 v0 - return v1 -} -; check: v24 = iconst.i32 0xaaaa_aaaa -; check: v2 = band v0, v24 -; check: v3 = ushr_imm v2, 1 -; check: v4 = band_imm v0, 0x5555_5555 -; check: v5 = ishl_imm v4, 1 -; check: v6 = bor v3, v5 -; check: v25 = iconst.i32 0xcccc_cccc -; check: v7 = band v6, v25 -; check: v8 = ushr_imm v7, 2 -; check: v9 = band_imm v6, 0x3333_3333 -; check: v10 = ishl_imm v9, 2 -; check: v11 = bor v8, v10 -; check: v26 = iconst.i32 0xf0f0_f0f0 -; check: v12 = band v11, v26 -; check: v13 = ushr_imm v12, 4 -; check: v14 = band_imm v11, 0x0f0f_0f0f -; check: v15 = ishl_imm v14, 4 -; check: v16 = bor v13, v15 -; check: v27 = iconst.i32 0xff00_ff00 -; check: v17 = band v16, v27 -; check: v18 = ushr_imm v17, 8 -; check: v19 = band_imm v16, 0x00ff_00ff -; check: v20 = ishl_imm v19, 8 -; check: v21 = bor v18, v20 -; check: v22 = ushr_imm v21, 16 -; check: v23 = ishl_imm v21, 16 -; check: v1 = bor v22, v23 - - -function %reverse_bits_64(i64) -> i64 { -block0(v0: i64): - v1 = bitrev.i64 v0 - return v1 -} -; check: v29 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v2 = band v0, v29 -; check: v3 = ushr_imm v2, 1 -; check: v30 = iconst.i64 0x5555_5555_5555_5555 -; check: v4 = band v0, v30 -; check: v5 = ishl_imm v4, 1 -; check: v6 = bor v3, v5 -; check: v31 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v7 = band v6, v31 -; check: v8 = ushr_imm v7, 2 -; check: v32 = iconst.i64 0x3333_3333_3333_3333 -; check: v9 = band v6, v32 -; check: v10 = ishl_imm v9, 2 -; check: v11 = bor v8, v10 -; check: v33 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v12 = band v11, v33 -; check: v13 = ushr_imm v12, 4 -; check: v34 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v14 = band v11, v34 -; check: v15 = ishl_imm v14, 4 -; check: v16 = bor v13, v15 -; check: v35 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v17 = band v16, v35 -; check: v18 = ushr_imm v17, 8 -; check: v36 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v19 = band v16, v36 -; check: v20 = ishl_imm v19, 8 -; check: v21 = bor v18, v20 -; check: v37 = iconst.i64 0xffff_0000_ffff_0000 -; check: v22 = band v21, v37 -; check: v23 = ushr_imm v22, 16 -; check: v38 = iconst.i64 0xffff_0000_ffff -; check: v24 = band v21, v38 -; check: v25 = ishl_imm v24, 16 -; check: v26 = bor v23, v25 -; check: v27 = ushr_imm v26, 32 -; check: v28 = ishl_imm v26, 32 -; check: v1 = bor v27, v28 diff --git a/cranelift/filetests/filetests/legalizer/br_table_cond.clif b/cranelift/filetests/filetests/legalizer/br_table_cond.clif deleted file mode 100644 index db464ae4d4..0000000000 --- a/cranelift/filetests/filetests/legalizer/br_table_cond.clif +++ /dev/null @@ -1,64 +0,0 @@ -test legalizer -set enable_probestack=false -set enable_jump_tables=false -target x86_64 legacy - -; Test that when jump_tables_enables is false, all jump tables are eliminated. -; regex: V=v\d+ -; regex: BB=block\d+ - -function u0:0(i64 vmctx) baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - jt0 = jump_table [block2, block2, block7] - jt1 = jump_table [block8, block8] - -block0(v0: i64): - jump block5 - -block5: - v1 = global_value.i64 gv1 - v2 = load.i64 v1 - trapnz v2, interrupt - v3 = iconst.i32 0 - br_table v3, block3, jt0 -; check: block5: -; check: $(val0=$V) = iconst.i32 0 -; nextln: $(cmp0=$V) = icmp_imm eq $val0, 0 -; nextln: brnz $cmp0, block2 -; nextln: jump $(fail0=$BB) -; check: $fail0: -; nextln: $(cmp1=$V) = icmp_imm.i32 eq $val0, 1 -; nextln: brnz $cmp1, block2 -; nextln: jump $(fail1=$BB) -; check: $fail1: -; nextln: $(cmp2=$V) = icmp_imm.i32 eq $val0, 2 -; nextln: brnz $cmp2, block7 -; nextln: jump block3 - -block7: - v4 = iconst.i32 0 - br_table v4, block3, jt1 -; check: block7: -; check: $(val1=$V) = iconst.i32 0 -; nextln: $(cmp3=$V) = icmp_imm eq $val1, 0 -; nextln: brnz $cmp3, block8 -; nextln: jump $(fail3=$BB) -; check: $fail3: -; nextln: $(cmp4=$V) = icmp_imm.i32 eq $val1, 1 -; nextln: brnz $cmp4, block8 -; nextln: jump block3 - -block8: - jump block5 - -block3: - jump block2 - -block2: - jump block1 - -block1: - fallthrough_return -} -; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/empty_br_table.clif b/cranelift/filetests/filetests/legalizer/empty_br_table.clif deleted file mode 100644 index d320155470..0000000000 --- a/cranelift/filetests/filetests/legalizer/empty_br_table.clif +++ /dev/null @@ -1,17 +0,0 @@ -test legalizer -set enable_probestack=false -set enable_jump_tables=false -target x86_64 legacy - -function u0:0(i64) { - jt0 = jump_table [] - -block0(v0: i64): - br_table v0, block1, jt0 -; check: block0(v0: i64): -; nextln: jump block1 - -block1: - return -} -; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif b/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif deleted file mode 100644 index 6d72cc6499..0000000000 --- a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif +++ /dev/null @@ -1,23 +0,0 @@ -test legalizer -target x86_64 legacy - -function %icmp_imm_i128(i128) -> i8 { -block0(v0: i128): - v1 = icmp_imm.i128 eq v0, 1 - v2 = bint.i8 v1 - return v2 -} - -; check: function %icmp_imm_i128(i64 [%rdi], i64 [%rsi]) -> i8 [%rax] fast { -; nextln: block0(v3: i64, v4: i64): -; nextln: v7 -> v3 -; nextln: v8 -> v4 -; nextln: [-] v0 = iconcat v3, v4 -; nextln: [RexOp1pu_id#b8] v5 = iconst.i64 1 -; nextln: [RexOp1pu_id#b8] v6 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v9 = icmp eq v7, v5 -; nextln: [RexOp1icscc#8039] v10 = icmp eq v8, v6 -; nextln: [RexOp1rr#21] v1 = band v9, v10 -; nextln: [RexOp2urm_noflags#4b6] v2 = bint.i8 v1 -; nextln: [Op1ret#c3] return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif b/cranelift/filetests/filetests/legalizer/pass_by_ref.clif deleted file mode 100644 index 141330cf01..0000000000 --- a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -target x86_64 legacy - -function %legalize_entry(i128) -> i64 windows_fastcall { -block0(v0: i128): - v1, v2 = isplit v0 - return v2 -} -; check: function %legalize_entry(i64 ptr [%rcx]) -> i64 [%rax] windows_fastcall { -; nextln: block0(v3: i64): -; nextln: v4 = load.i64 v3 -; nextln: v1 -> v4 -; nextln: v5 = load.i64 v3+8 -; nextln: v2 -> v5 -; nextln: v0 = iconcat v4, v5 -; nextln: return v2 - -function %legalize_call() { - fn0 = %foo(i32x4) windows_fastcall -block0: - v0 = vconst.i32x4 [1 2 3 4] - call fn0(v0) - return -} -; check: ss0 = explicit_slot 16 -; check: sig0 = (i64 ptr [%rcx]) windows_fastcall -; check: v0 = vconst.i32x4 const0 -; nextln: v1 = stack_addr.i64 ss0 -; nextln: store v0, v1 -; nextln: v2 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v2(v1) diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif deleted file mode 100644 index 8976ad0e25..0000000000 --- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif +++ /dev/null @@ -1,21 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -function %foo() -> i128 { -block0: - v1 = iconst.i64 0x6400000042 - v2 = iconst.i64 0x7F10100042 - v3 = iconcat v1, v2 - v4 = popcnt.i128 v3 - return v4 -} - -; check: v1 = iconst.i64 0x0064_0000_0042 -; check: v2 = iconst.i64 0x007f_1010_0042 -; check: v3 = iconcat v1, v2 -; check: v5 = popcnt v1 -; check: v6 = popcnt v2 -; check: v7 = iadd v5, v6 -; check: v8 = iconst.i64 0 -; check: v4 = iconcat v7, v8 -; check: return v7, v8 diff --git a/cranelift/filetests/filetests/licm/reject.clif b/cranelift/filetests/filetests/licm/reject.clif index eab03760b6..378a9003d1 100644 --- a/cranelift/filetests/filetests/licm/reject.clif +++ b/cranelift/filetests/filetests/licm/reject.clif @@ -7,9 +7,6 @@ block0(v0: i32): jump block1(v0) block1(v1: i32): - regmove.i32 v0, %x10 -> %x20 -; check: block1(v1: i32): -; check: regmove.i32 v0, %x10 -> %x20 v2 = iconst.i32 1 brz v1, block3(v1) jump block2 diff --git a/cranelift/filetests/filetests/parser/instruction_encoding.clif b/cranelift/filetests/filetests/parser/instruction_encoding.clif deleted file mode 100644 index 5386808482..0000000000 --- a/cranelift/filetests/filetests/parser/instruction_encoding.clif +++ /dev/null @@ -1,24 +0,0 @@ -test cat - -target riscv32 - -; regex: WS=[ \t]* - -function %foo(i32, i32) { -block1(v0: i32 [%x8], v1: i32): - [-,-] v2 = iadd v0, v1 - [-] trap heap_oob - [R#1234, %x5, %x11] v6, v7 = iadd_ifcout v2, v0 - [Rshamt#beef, %x25] v8 = ishl_imm v6, 2 -@55 v9 = iadd v8, v7 -@a5 [Iret#5] return v0, v8 -} -; sameln: function %foo(i32, i32) fast { -; nextln: block1(v0: i32 [%x8], v1: i32): -; nextln: [-,-]$WS v2 = iadd v0, v1 -; nextln: [-]$WS trap heap_oob -; nextln: [R#1234,%x5,%x11]$WS v6, v7 = iadd_ifcout v2, v0 -; nextln: [Rshamt#beef,%x25]$WS v8 = ishl_imm v6, 2 -; nextln: @0055 [-,-]$WS v9 = iadd v8, v7 -; nextln: @00a5 [Iret#05]$WS return v0, v8 -; nextln: } diff --git a/cranelift/filetests/filetests/parser/tiny.clif b/cranelift/filetests/filetests/parser/tiny.clif index 42fa5a8157..35cf075022 100644 --- a/cranelift/filetests/filetests/parser/tiny.clif +++ b/cranelift/filetests/filetests/parser/tiny.clif @@ -183,42 +183,6 @@ block0(v1: i32): ; nextln: store_complex v3, v1+v2 ; nextln: store_complex v3, v1+v2+1 -; Register diversions. -; This test file has no ISA, so we can unly use register unit numbers. -function %diversion(i32) { - ss0 = spill_slot 4 - -block0(v1: i32): - regmove v1, %10 -> %20 - regmove v1, %20 -> %10 - regspill v1, %10 -> ss0 - regfill v1, ss0 -> %10 - return -} -; sameln: function %diversion(i32) fast { -; nextln: ss0 = spill_slot 4 -; check: block0(v1: i32): -; nextln: regmove v1, %10 -> %20 -; nextln: regmove v1, %20 -> %10 -; nextln: regspill v1, %10 -> ss0 -; nextln: regfill v1, ss0 -> %10 -; nextln: return -; nextln: } - -; Register copies. -function %copy_special() { -block0: - copy_special %10 -> %20 - copy_special %20 -> %10 - return -} -; sameln: function %copy_special() fast { -; nextln: block0: -; nextln: copy_special %10 -> %20 -; nextln: copy_special %20 -> %10 -; nextln: return -; nextln: } - function %cond_traps(i32) { block0(v0: i32): trapz v0, stk_ovf diff --git a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif index bb21ec2553..a7c059f6c0 100644 --- a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif +++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif @@ -7,12 +7,15 @@ target x86_64 ;; we need to make an alias `v3 -> v2`. function %replace_inst_with_alias() -> i32 { + sig0 = (i32, i32) -> i32, i32 + fn0 = u0:0 sig0 + block0: v0 = iconst.i32 0 - v1, v2 = x86_smulx v0, v0 + v1, v2 = call fn0(v0, v0) v3 = isub v2, v0 ; check: v0 = iconst.i32 0 - ; nextln: v1, v2 = x86_smulx v0, v0 + ; nextln: v1, v2 = call fn0(v0, v0) ; nextln: v3 -> v2 return v3 } diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif deleted file mode 100644 index 55a8d03738..0000000000 --- a/cranelift/filetests/filetests/postopt/basic.clif +++ /dev/null @@ -1,125 +0,0 @@ -test postopt -target aarch64 -target i686 legacy - -; Test that compare+branch sequences are folded effectively on x86. - -function %br_icmp(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#85] brnz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp v0, v1 -; nextln: v2 = trueif slt v9 -; nextln: brif slt v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use brz instead of brnz, so the condition is inverted. - -function %br_icmp_inverse(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp_inverse -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp v0, v1 -; nextln: v2 = trueif slt v9 -; nextln: brif sge v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use icmp_imm instead of icmp. - -function %br_icmp_imm(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc_ib#7083] v2 = icmp_imm slt v0, 2 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp_imm -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp_imm v0, 2 -; nextln: v2 = trueif slt v9 -; nextln: brif sge v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use fcmp instead of icmp. - -function %br_fcmp(f32, f32) -> f32 { -block0(v0: f32, v1: f32): -[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v18 = iconst.i32 0x40a8_0000 -[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18 -[Op1ret#c3] return v8 -} -; sameln: function %br_fcmp -; nextln: block0(v0: f32, v1: f32): -; nextln: v19 = ffcmp v0, v1 -; nextln: v2 = trueff gt v19 -; nextln: brff ule v19, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v18 = iconst.i32 0x40a8_0000 -; nextln: v8 = bitcast.f32 v18 -; nextln: return v8 -; nextln: } diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif deleted file mode 100644 index acedb71087..0000000000 --- a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif +++ /dev/null @@ -1,94 +0,0 @@ -test postopt -target x86_64 legacy - -function %dual_loads(i64, i64) -> i64 { -block0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 - v4 = load.i64 v3 - v5 = uload8.i64 v3 - v6 = sload8.i64 v3 - v7 = uload16.i64 v3 - v8 = sload16.i64 v3 - v9 = uload32.i64 v3 - v10 = sload32.i64 v3 -[Op1ret#c3] return v10 -} - -; sameln: function %dual_loads -; nextln: block0(v0: i64, v1: i64): -; nextln: v3 = iadd v0, v1 -; nextln: v4 = load_complex.i64 v0+v1 -; nextln: v5 = uload8_complex.i64 v0+v1 -; nextln: v6 = sload8_complex.i64 v0+v1 -; nextln: v7 = uload16_complex.i64 v0+v1 -; nextln: v8 = sload16_complex.i64 v0+v1 -; nextln: v9 = uload32_complex v0+v1 -; nextln: v10 = sload32_complex v0+v1 -; nextln: return v10 -; nextln: } - -function %dual_loads2(i64, i64) -> i64 { -block0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 - v4 = load.i64 v3+1 - v5 = uload8.i64 v3+1 - v6 = sload8.i64 v3+1 - v7 = uload16.i64 v3+1 - v8 = sload16.i64 v3+1 - v9 = uload32.i64 v3+1 - v10 = sload32.i64 v3+1 -[Op1ret#c3] return v10 -} - -; sameln: function %dual_loads2 -; nextln: block0(v0: i64, v1: i64): -; nextln: v3 = iadd v0, v1 -; nextln: v4 = load_complex.i64 v0+v1+1 -; nextln: v5 = uload8_complex.i64 v0+v1+1 -; nextln: v6 = sload8_complex.i64 v0+v1+1 -; nextln: v7 = uload16_complex.i64 v0+v1+1 -; nextln: v8 = sload16_complex.i64 v0+v1+1 -; nextln: v9 = uload32_complex v0+v1+1 -; nextln: v10 = sload32_complex v0+v1+1 -; nextln: return v10 -; nextln: } - -function %dual_stores(i64, i64, i64) { -block0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 -[RexOp1st#8089] store.i64 v2, v3 -[RexOp1st#88] istore8.i64 v2, v3 -[RexMp1st#189] istore16.i64 v2, v3 -[RexOp1st#89] istore32.i64 v2, v3 -[Op1ret#c3] return -} - -; sameln: function %dual_stores -; nextln: block0(v0: i64, v1: i64, v2: i64): -; nextln: v3 = iadd v0, v1 -; nextln: store_complex v2, v0+v1 -; nextln: istore8_complex v2, v0+v1 -; nextln: istore16_complex v2, v0+v1 -; nextln: istore32_complex v2, v0+v1 -; nextln: return -; nextln: } - -function %dual_stores2(i64, i64, i64) { -block0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 -[RexOp1stDisp8#8089] store.i64 v2, v3+1 -[RexOp1stDisp8#88] istore8.i64 v2, v3+1 -[RexMp1stDisp8#189] istore16.i64 v2, v3+1 -[RexOp1stDisp8#89] istore32.i64 v2, v3+1 -[Op1ret#c3] return -} - -; sameln: function %dual_stores2 -; nextln: block0(v0: i64, v1: i64, v2: i64): -; nextln: v3 = iadd v0, v1 -; nextln: store_complex v2, v0+v1+1 -; nextln: istore8_complex v2, v0+v1+1 -; nextln: istore16_complex v2, v0+v1+1 -; nextln: istore32_complex v2, v0+v1+1 -; nextln: return -; nextln: } diff --git a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif deleted file mode 100644 index 84ddf3b884..0000000000 --- a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif +++ /dev/null @@ -1,32 +0,0 @@ -test postopt -target x86_64 legacy - -; Fold the immediate of an iadd_imm into an address offset. - -function u0:0(i64 vmctx) -> i64 { -block0(v0: i64): - v1 = iadd_imm.i64 v0, 16 -[RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v1 -[Op1ret#c3] return v2 -} - -; sameln: function u0:0(i64 vmctx) -> i64 fast { -; nextln: block0(v0: i64): -; nextln: v1 = iadd_imm v0, 16 -; nextln: [RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v0+16 -; nextln: [Op1ret#c3] return v2 -; nextln: } - -function u0:1(i64, i64 vmctx) { -block0(v3: i64, v0: i64): - v1 = iadd_imm.i64 v0, 16 -[RexOp1stDisp8#8089] store.i64 notrap aligned v3, v1 -[Op1ret#c3] return -} - -; sameln: function u0:1(i64, i64 vmctx) fast { -; nextln: block0(v3: i64, v0: i64): -; nextln: v1 = iadd_imm v0, 16 -; nextln: [RexOp1stDisp8#8089] store notrap aligned v3, v0+16 -; nextln: [Op1ret#c3] return -; nextln: } diff --git a/cranelift/filetests/filetests/regalloc/aliases.clif b/cranelift/filetests/filetests/regalloc/aliases.clif deleted file mode 100644 index e3dcfbad90..0000000000 --- a/cranelift/filetests/filetests/regalloc/aliases.clif +++ /dev/null @@ -1,35 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v { - gv0 = vmctx - heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v1: f32, v2: i64): - v3 = iconst.i32 0 - jump block3(v3) - -block3(v4: i32): - v5 = heap_addr.i64 heap0, v4, 1 - v6 = load.f32 v5 - v7 -> v1 - v8 = fdiv v6, v7 - v9 = heap_addr.i64 heap0, v4, 1 - store v8, v9 - v10 = iconst.i32 4 - v11 = iadd v4, v10 - v12 -> v0 - v13 = icmp ult v11, v12 - v14 = bint.i32 v13 - brnz v14, block3(v11) - jump block4 - -block4: - jump block2 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/basic.clif b/cranelift/filetests/filetests/regalloc/basic.clif deleted file mode 100644 index 48111253ae..0000000000 --- a/cranelift/filetests/filetests/regalloc/basic.clif +++ /dev/null @@ -1,80 +0,0 @@ -test regalloc - -; We can add more ISAs once they have defined encodings. -target riscv32 - -; regex: RX=%x\d+ - -function %add(i32, i32) { -block0(v1: i32, v2: i32): - v3 = iadd v1, v2 -; check: [R#0c,%x5] -; sameln: iadd - return -} - -; Function with a dead argument. -function %dead_arg(i32, i32) -> i32{ -block0(v1: i32, v2: i32): -; not: regmove -; check: return v1 - return v1 -} - -; Return a value from a different register. -function %move1(i32, i32) -> i32 { -block0(v1: i32, v2: i32): -; not: regmove -; check: regmove v2, %x11 -> %x10 -; nextln: return v2 - return v2 -} - -; Swap two registers. -function %swap(i32, i32) -> i32, i32 { -block0(v1: i32, v2: i32): -; not: regmove -; check: regmove v2, %x11 -> $(tmp=$RX) -; nextln: regmove v1, %x10 -> %x11 -; nextln: regmove v2, $tmp -> %x10 -; nextln: return v2, v1 - return v2, v1 -} - -; Return a block argument. -function %retblock(i32, i32) -> i32 { -block0(v1: i32, v2: i32): - brnz v1, block1(v1) - jump block1(v2) - -block1(v10: i32): - return v10 -} - -; Pass a block argument as a function argument. -function %callblock(i32, i32) -> i32 { - fn0 = %foo(i32) -> i32 - -block0(v1: i32, v2: i32): - brnz v1, block1(v1) - jump block1(v2) - -block1(v10: i32): - v11 = call fn0(v10) - return v11 -} - -; Pass a block argument as a jump argument. -function %jumpblock(i32, i32) -> i32 { - fn0 = %foo(i32) -> i32 - -block0(v1: i32, v2: i32): - brnz v1, block1(v1, v2) - jump block1(v2, v1) - -block1(v10: i32, v11: i32): - jump block2(v10, v11) - -block2(v20: i32, v21: i32): - return v21 -} diff --git a/cranelift/filetests/filetests/regalloc/coalesce.clif b/cranelift/filetests/filetests/regalloc/coalesce.clif deleted file mode 100644 index 48395da1b3..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalesce.clif +++ /dev/null @@ -1,157 +0,0 @@ -test regalloc -target riscv32 - -; Test the coalescer. -; regex: V=v\d+ -; regex: WS=\s+ -; regex: LOC=%\w+ -; regex: BB=block\d+ - -; This function is already CSSA, so no copies should be inserted. -function %cssa(i32) -> i32 { -block0(v0: i32): - ; not: copy - ; v0 is used by the branch and passed as an arg - that's no conflict. - brnz v0, block1(v0) - jump block2 - -block2: - ; v0 is live across the branch above. That's no conflict. - v1 = iadd_imm v0, 7 - jump block1(v1) - -block1(v10: i32): - v11 = iadd_imm v10, 7 - return v11 -} - -function %trivial(i32) -> i32 { -block0(v0: i32): - ; check: brnz v0, $(splitEdge=$BB) - brnz v0, block1(v0) - jump block2 - -block2: - ; not: copy - v1 = iadd_imm v0, 7 - jump block1(v1) - - ; check: $splitEdge: - ; nextln: $(cp1=$V) = copy.i32 v0 - ; nextln: jump block1($cp1) - -block1(v10: i32): - ; Use v0 in the destination block causes a conflict. - v11 = iadd v10, v0 - return v11 -} - -; A value is used as an SSA argument twice in the same branch. -function %dualuse(i32) -> i32 { -block0(v0: i32): - ; check: brnz v0, $(splitEdge=$BB) - brnz v0, block1(v0, v0) - jump block2 - -block2: - v1 = iadd_imm v0, 7 - v2 = iadd_imm v1, 56 - jump block1(v1, v2) - - ; check: $splitEdge: - ; check: $(cp1=$V) = copy.i32 v0 - ; nextln: jump block1($cp1, v0) - -block1(v10: i32, v11: i32): - v12 = iadd v10, v11 - return v12 -} - -; Interference away from the branch -; The interference can be broken with a copy at either branch. -function %interference(i32) -> i32 { -block0(v0: i32): - ; not: copy - ; check: brnz v0, $(splitEdge=$BB) - ; not: copy - brnz v0, block1(v0) - jump block2 - -block2: - v1 = iadd_imm v0, 7 - ; v1 and v0 interfere here: - v2 = iadd_imm v0, 8 - ; check: $(cp0=$V) = copy v1 - ; check: jump block1($cp0) - jump block1(v1) - - ; check: $splitEdge: - ; not: copy - ; nextln: jump block1(v0) - -block1(v10: i32): - ; not: copy - v11 = iadd_imm v10, 7 - return v11 -} - -; A loop where one induction variable is used as a backedge argument. -function %fibonacci(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 1 - v2 = iconst.i32 2 - jump block1(v1, v2) - - ; check: $(splitEdge=$BB): - ; check: $(nv11b=$V) = copy.i32 v11 - ; not: copy - ; check: jump block1($nv11b, v12) - -block1(v10: i32, v11: i32): - ; v11 needs to be isolated because it interferes with v10. - ; check: block1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC]) - ; check: v11 = copy $nv11a - v12 = iadd v10, v11 - v13 = icmp ult v12, v0 - ; check: brnz v13, $splitEdge - brnz v13, block1(v11, v12) - jump block2 - -block2: - return v12 -} - -; Function arguments passed on the stack aren't allowed to be part of a virtual -; register, at least for now. This is because the other values in the virtual -; register would need to be spilled to the incoming_arg stack slot which we treat -; as belonging to the caller. -function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { -; check: ss0 = incoming_arg 4 -; not: incoming_arg -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): - ; check: fill v8 - ; not: v8 - jump block1(v8) - -block1(v10: i32): - v11 = iadd_imm v10, 1 - return v11 -} - -function %gvn_unremovable_phi(i32) system_v { -block0(v0: i32): - v2 = iconst.i32 0 - jump block2(v2, v0) - -block2(v3: i32, v4: i32): - brnz v3, block2(v3, v4) - jump block3 - -block3: - v5 = iconst.i32 1 - brnz v3, block2(v2, v5) - jump block4 - -block4: - return -} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-207.clif b/cranelift/filetests/filetests/regalloc/coalescing-207.clif deleted file mode 100644 index c549cbd3d2..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalescing-207.clif +++ /dev/null @@ -1,1527 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Reported as https://github.com/bytecodealliance/cranelift/issues/207 -; -; The coalescer creates a virtual register with two interfering values. -function %pr207(i64 vmctx, i32, i32) -> i32 system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, -8 - heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 - sig0 = (i64 vmctx, i32, i32) -> i32 system_v - sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v - sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v - fn0 = u0:2 sig0 - fn1 = u0:0 sig1 - fn2 = u0:1 sig2 - -block0(v0: i64, v1: i32, v2: i32): - v3 = iconst.i32 0 - v4 = iconst.i32 0 - v5 = iconst.i32 0 - v6 = iconst.i32 0x4ffe - v7 = icmp uge v5, v6 - brz v7, block1 - jump block100 - -block100: - trap heap_oob - -block1: - v8 = uextend.i64 v5 - v9 = iadd_imm.i64 v0, -8 - v10 = load.i64 v9 - v11 = iadd v10, v8 - v12 = load.i32 v11+4 - v13 = iconst.i32 1056 - v14 = isub v12, v13 - v15 = iconst.i32 0x4ffe - v16 = icmp.i32 uge v4, v15 - brz v16, block2 - jump block101 - -block101: - trap heap_oob - -block2: - v17 = uextend.i64 v4 - v18 = iadd_imm.i64 v0, -8 - v19 = load.i64 v18 - v20 = iadd v19, v17 - store.i32 v14, v20+4 - v21 = iconst.i32 0x4ffe - v22 = icmp.i32 uge v2, v21 - brz v22, block3 - jump block102 - -block102: - trap heap_oob - -block3: - v23 = uextend.i64 v2 - v24 = iadd_imm.i64 v0, -8 - v25 = load.i64 v24 - v26 = iadd v25, v23 - v27 = sload8.i32 v26 - v28 = iconst.i32 255 - v29 = band v27, v28 - v30 = iconst.i32 0 - v31 = icmp eq v29, v30 - v32 = bint.i32 v31 - brnz v32, block90(v14, v1) - jump block103 - -block103: - v33 = call fn0(v0, v1, v27) - v34 = iconst.i32 0 - v35 = iconst.i32 0 - v36 = icmp eq v33, v35 - v37 = bint.i32 v36 - brnz v37, block90(v14, v34) - jump block104 - -block104: - v38 = iconst.i32 0x4ffe - v39 = icmp.i32 uge v2, v38 - brz v39, block4 - jump block105 - -block105: - trap heap_oob - -block4: - v40 = uextend.i64 v2 - v41 = iadd_imm.i64 v0, -8 - v42 = load.i64 v41 - v43 = iadd v42, v40 - v44 = uload8.i32 v43+1 - v45 = iconst.i32 0 - v46 = icmp eq v44, v45 - v47 = bint.i32 v46 - brnz v47, block56(v33, v14) - jump block106 - -block106: - v48 = iconst.i32 0x4ffe - v49 = icmp.i32 uge v33, v48 - brz v49, block5 - jump block107 - -block107: - trap heap_oob - -block5: - v50 = uextend.i64 v33 - v51 = iadd_imm.i64 v0, -8 - v52 = load.i64 v51 - v53 = iadd v52, v50 - v54 = uload8.i32 v53+1 - v55 = iconst.i32 0 - v56 = icmp eq v54, v55 - v57 = bint.i32 v56 - brnz v57, block90(v14, v34) - jump block108 - -block108: - v58 = iconst.i32 0x4ffe - v59 = icmp.i32 uge v2, v58 - brz v59, block6 - jump block109 - -block109: - trap heap_oob - -block6: - v60 = uextend.i64 v2 - v61 = iadd_imm.i64 v0, -8 - v62 = load.i64 v61 - v63 = iadd v62, v60 - v64 = uload8.i32 v63+2 - v65 = iconst.i32 0 - v66 = icmp eq v64, v65 - v67 = bint.i32 v66 - brnz v67, block42 - jump block110 - -block110: - v68 = iconst.i32 0x4ffe - v69 = icmp.i32 uge v33, v68 - brz v69, block7 - jump block111 - -block111: - trap heap_oob - -block7: - v70 = uextend.i64 v33 - v71 = iadd_imm.i64 v0, -8 - v72 = load.i64 v71 - v73 = iadd v72, v70 - v74 = uload8.i32 v73+2 - v75 = iconst.i32 0 - v76 = icmp eq v74, v75 - v77 = bint.i32 v76 - brnz v77, block90(v14, v34) - jump block112 - -block112: - v78 = iconst.i32 0x4ffe - v79 = icmp.i32 uge v2, v78 - brz v79, block8 - jump block113 - -block113: - trap heap_oob - -block8: - v80 = uextend.i64 v2 - v81 = iadd_imm.i64 v0, -8 - v82 = load.i64 v81 - v83 = iadd v82, v80 - v84 = uload8.i32 v83+3 - v85 = iconst.i32 0 - v86 = icmp eq v84, v85 - v87 = bint.i32 v86 - brnz v87, block46 - jump block114 - -block114: - v88 = iconst.i32 0x4ffe - v89 = icmp.i32 uge v33, v88 - brz v89, block9 - jump block115 - -block115: - trap heap_oob - -block9: - v90 = uextend.i64 v33 - v91 = iadd_imm.i64 v0, -8 - v92 = load.i64 v91 - v93 = iadd v92, v90 - v94 = uload8.i32 v93+3 - v95 = iconst.i32 0 - v96 = icmp eq v94, v95 - v97 = bint.i32 v96 - brnz v97, block90(v14, v34) - jump block116 - -block116: - v98 = iconst.i32 0x4ffe - v99 = icmp.i32 uge v2, v98 - brz v99, block10 - jump block117 - -block117: - trap heap_oob - -block10: - v100 = uextend.i64 v2 - v101 = iadd_imm.i64 v0, -8 - v102 = load.i64 v101 - v103 = iadd v102, v100 - v104 = uload8.i32 v103+4 - v105 = iconst.i32 0 - v106 = icmp eq v104, v105 - v107 = bint.i32 v106 - brnz v107, block54 - jump block118 - -block118: - v108 = iconst.i32 1 - v109 = iadd.i32 v2, v108 - v110 = iconst.i32 1048 - v111 = iadd.i32 v14, v110 - v112 = iconst.i64 0 - v113 = iconst.i32 0x4ffe - v114 = icmp uge v111, v113 - brz v114, block11 - jump block119 - -block119: - trap heap_oob - -block11: - v115 = uextend.i64 v111 - v116 = iadd_imm.i64 v0, -8 - v117 = load.i64 v116 - v118 = iadd v117, v115 - store.i64 v112, v118 - v119 = iconst.i32 1040 - v120 = iadd.i32 v14, v119 - v121 = iconst.i64 0 - v122 = iconst.i32 0x4ffe - v123 = icmp uge v120, v122 - brz v123, block12 - jump block120 - -block120: - trap heap_oob - -block12: - v124 = uextend.i64 v120 - v125 = iadd_imm.i64 v0, -8 - v126 = load.i64 v125 - v127 = iadd v126, v124 - store.i64 v121, v127 - v128 = iconst.i64 0 - v129 = iconst.i32 0x4ffe - v130 = icmp.i32 uge v14, v129 - brz v130, block13 - jump block121 - -block121: - trap heap_oob - -block13: - v131 = uextend.i64 v14 - v132 = iadd_imm.i64 v0, -8 - v133 = load.i64 v132 - v134 = iadd v133, v131 - store.i64 v128, v134+1032 - v135 = iconst.i64 0 - v136 = iconst.i32 0x4ffe - v137 = icmp.i32 uge v14, v136 - brz v137, block14 - jump block122 - -block122: - trap heap_oob - -block14: - v138 = uextend.i64 v14 - v139 = iadd_imm.i64 v0, -8 - v140 = load.i64 v139 - v141 = iadd v140, v138 - store.i64 v135, v141+1024 - v142 = iconst.i32 -1 - jump block15(v142, v27) - -block15(v143: i32, v144: i32): - v145 = iadd.i32 v33, v143 - v146 = iconst.i32 1 - v147 = iadd v145, v146 - v148 = iconst.i32 0x4ffe - v149 = icmp uge v147, v148 - brz v149, block16 - jump block123 - -block123: - trap heap_oob - -block16: - v150 = uextend.i64 v147 - v151 = iadd_imm.i64 v0, -8 - v152 = load.i64 v151 - v153 = iadd v152, v150 - v154 = uload8.i32 v153 - v155 = iconst.i32 0 - v156 = icmp eq v154, v155 - v157 = bint.i32 v156 - brnz v157, block89(v14) - jump block124 - -block124: - v158 = iconst.i32 255 - v159 = band.i32 v144, v158 - v160 = iconst.i32 2 - v161 = ishl v159, v160 - v162 = iadd.i32 v14, v161 - v163 = iconst.i32 2 - v164 = iadd.i32 v143, v163 - v165 = iconst.i32 0x4ffe - v166 = icmp uge v162, v165 - brz v166, block17 - jump block125 - -block125: - trap heap_oob - -block17: - v167 = uextend.i64 v162 - v168 = iadd_imm.i64 v0, -8 - v169 = load.i64 v168 - v170 = iadd v169, v167 - store.i32 v164, v170 - v171 = iconst.i32 1024 - v172 = iadd.i32 v14, v171 - v173 = iconst.i32 3 - v174 = ushr.i32 v159, v173 - v175 = iconst.i32 28 - v176 = band v174, v175 - v177 = iadd v172, v176 - v178 = iconst.i32 0x4ffe - v179 = icmp uge v177, v178 - brz v179, block18 - jump block126 - -block126: - trap heap_oob - -block18: - v180 = uextend.i64 v177 - v181 = iadd_imm.i64 v0, -8 - v182 = load.i64 v181 - v183 = iadd v182, v180 - v184 = load.i32 v183 - v185 = iconst.i32 1 - v186 = iconst.i32 31 - v187 = band.i32 v144, v186 - v188 = ishl v185, v187 - v189 = bor v184, v188 - v190 = iconst.i32 0x4ffe - v191 = icmp.i32 uge v177, v190 - brz v191, block19 - jump block127 - -block127: - trap heap_oob - -block19: - v192 = uextend.i64 v177 - v193 = iadd_imm.i64 v0, -8 - v194 = load.i64 v193 - v195 = iadd v194, v192 - store.i32 v189, v195 - v196 = iadd.i32 v109, v143 - v197 = iconst.i32 1 - v198 = iadd.i32 v143, v197 - v199 = iconst.i32 1 - v200 = iadd v196, v199 - v201 = iconst.i32 0x4ffe - v202 = icmp uge v200, v201 - brz v202, block20 - jump block128 - -block128: - trap heap_oob - -block20: - v203 = uextend.i64 v200 - v204 = iadd_imm.i64 v0, -8 - v205 = load.i64 v204 - v206 = iadd v205, v203 - v207 = uload8.i32 v206 - brnz v207, block15(v198, v207) - jump block21 - -block21: - v208 = iconst.i32 -1 - v209 = iconst.i32 1 - v210 = iconst.i32 -1 - v211 = iconst.i32 1 - v212 = iconst.i32 1 - v213 = iadd.i32 v198, v212 - v214 = iconst.i32 2 - v215 = icmp ult v213, v214 - v216 = bint.i32 v215 - brnz v216, block38(v2, v211, v209, v210, v208, v198, v213, v33, v14) - jump block129 - -block129: - v217 = iconst.i32 -1 - v218 = iconst.i32 0 - v219 = iconst.i32 1 - v220 = iconst.i32 1 - v221 = iconst.i32 1 - v222 = copy.i32 v44 - jump block22(v217, v221, v44, v220, v218, v219, v213, v222, v198, v33, v14) - -block22(v223: i32, v224: i32, v225: i32, v226: i32, v227: i32, v228: i32, v229: i32, v230: i32, v231: i32, v232: i32, v233: i32): - v234 = copy v228 - v235 = iadd v223, v224 - v236 = iadd.i32 v2, v235 - v237 = iconst.i32 0x4ffe - v238 = icmp uge v236, v237 - brz v238, block23 - jump block130 - -block130: - trap heap_oob - -block23: - v239 = uextend.i64 v236 - v240 = iadd_imm.i64 v0, -8 - v241 = load.i64 v240 - v242 = iadd v241, v239 - v243 = uload8.i32 v242 - v244 = iconst.i32 255 - v245 = band.i32 v225, v244 - v246 = icmp ne v243, v245 - v247 = bint.i32 v246 - brnz v247, block24 - jump block131 - -block131: - v248 = icmp.i32 ne v224, v226 - v249 = bint.i32 v248 - brnz v249, block25 - jump block132 - -block132: - v250 = iadd.i32 v227, v226 - v251 = iconst.i32 1 - jump block27(v251, v250, v223, v226) - -block24: - v252 = icmp.i32 ule v243, v245 - v253 = bint.i32 v252 - brnz v253, block26 - jump block133 - -block133: - v254 = isub.i32 v234, v223 - v255 = iconst.i32 1 - jump block27(v255, v234, v223, v254) - -block25: - v256 = iconst.i32 1 - v257 = iadd.i32 v224, v256 - v258 = copy.i32 v227 - jump block27(v257, v258, v223, v226) - -block26: - v259 = iconst.i32 1 - v260 = iconst.i32 1 - v261 = iadd.i32 v227, v260 - v262 = iconst.i32 1 - v263 = copy.i32 v227 - jump block27(v259, v261, v263, v262) - -block27(v264: i32, v265: i32, v266: i32, v267: i32): - v268 = iadd v264, v265 - v269 = icmp uge v268, v229 - v270 = bint.i32 v269 - brnz v270, block29 - jump block134 - -block134: - v271 = iadd.i32 v2, v268 - v272 = iconst.i32 0x4ffe - v273 = icmp uge v271, v272 - brz v273, block28 - jump block135 - -block135: - trap heap_oob - -block28: - v274 = uextend.i64 v271 - v275 = iadd_imm.i64 v0, -8 - v276 = load.i64 v275 - v277 = iadd v276, v274 - v278 = uload8.i32 v277 - v279 = copy.i32 v265 - jump block22(v266, v264, v278, v267, v279, v268, v229, v230, v231, v232, v233) - -block29: - jump block30 - -block30: - v280 = iconst.i32 -1 - v281 = iconst.i32 0 - v282 = iconst.i32 1 - v283 = iconst.i32 1 - v284 = iconst.i32 1 - jump block31(v280, v284, v230, v283, v281, v282, v229, v267, v266, v231, v232, v233) - -block31(v285: i32, v286: i32, v287: i32, v288: i32, v289: i32, v290: i32, v291: i32, v292: i32, v293: i32, v294: i32, v295: i32, v296: i32): - v297 = copy v290 - v298 = iadd v285, v286 - v299 = iadd.i32 v2, v298 - v300 = iconst.i32 0x4ffe - v301 = icmp uge v299, v300 - brz v301, block32 - jump block136 - -block136: - trap heap_oob - -block32: - v302 = uextend.i64 v299 - v303 = iadd_imm.i64 v0, -8 - v304 = load.i64 v303 - v305 = iadd v304, v302 - v306 = uload8.i32 v305 - v307 = iconst.i32 255 - v308 = band.i32 v287, v307 - v309 = icmp ne v306, v308 - v310 = bint.i32 v309 - brnz v310, block33 - jump block137 - -block137: - v311 = icmp.i32 ne v286, v288 - v312 = bint.i32 v311 - brnz v312, block34 - jump block138 - -block138: - v313 = iadd.i32 v289, v288 - v314 = iconst.i32 1 - jump block36(v314, v313, v285, v288) - -block33: - v315 = icmp.i32 uge v306, v308 - v316 = bint.i32 v315 - brnz v316, block35 - jump block139 - -block139: - v317 = isub.i32 v297, v285 - v318 = iconst.i32 1 - jump block36(v318, v297, v285, v317) - -block34: - v319 = iconst.i32 1 - v320 = iadd.i32 v286, v319 - v321 = copy.i32 v289 - jump block36(v320, v321, v285, v288) - -block35: - v322 = iconst.i32 1 - v323 = iconst.i32 1 - v324 = iadd.i32 v289, v323 - v325 = iconst.i32 1 - v326 = copy.i32 v289 - jump block36(v322, v324, v326, v325) - -block36(v327: i32, v328: i32, v329: i32, v330: i32): - v331 = iadd v327, v328 - v332 = icmp uge v331, v291 - v333 = bint.i32 v332 - brnz v333, block38(v2, v330, v292, v329, v293, v294, v291, v295, v296) - jump block140 - -block140: - v334 = iadd.i32 v2, v331 - v335 = iconst.i32 0x4ffe - v336 = icmp uge v334, v335 - brz v336, block37 - jump block141 - -block141: - trap heap_oob - -block37: - v337 = uextend.i64 v334 - v338 = iadd_imm.i64 v0, -8 - v339 = load.i64 v338 - v340 = iadd v339, v337 - v341 = uload8.i32 v340 - v342 = copy.i32 v328 - jump block31(v329, v327, v341, v330, v342, v331, v291, v292, v293, v294, v295, v296) - -block38(v343: i32, v344: i32, v345: i32, v346: i32, v347: i32, v348: i32, v349: i32, v350: i32, v351: i32): - v352 = iconst.i32 1 - v353 = iadd v346, v352 - v354 = iconst.i32 1 - v355 = iadd v347, v354 - v356 = icmp ugt v353, v355 - v357 = bint.i32 v356 - brnz v357, block39(v344) - jump block142 - -block142: - v358 = copy v345 - jump block39(v358) - -block39(v359: i32): - v360 = iadd.i32 v343, v359 - brnz.i32 v357, block40(v346) - jump block143 - -block143: - v361 = copy.i32 v347 - jump block40(v361) - -block40(v362: i32): - v363 = iconst.i32 1 - v364 = iadd v362, v363 - v365 = call fn1(v0, v343, v360, v364) - v366 = iconst.i32 0 - v367 = icmp eq v365, v366 - v368 = bint.i32 v367 - brnz v368, block63 - jump block144 - -block144: - v369 = iconst.i32 1 - v370 = iadd v362, v369 - v371 = isub.i32 v348, v370 - v372 = iconst.i32 1 - v373 = iadd v371, v372 - v374 = icmp ugt v362, v373 - v375 = bint.i32 v374 - v376 = copy v362 - brnz v375, block41(v376) - jump block145 - -block145: - v377 = copy v373 - jump block41(v377) - -block41(v378: i32): - v379 = iconst.i32 1 - v380 = iadd v378, v379 - v381 = iconst.i32 0 - jump block64(v380, v381) - -block42: - v382 = iconst.i32 8 - v383 = ishl.i32 v29, v382 - v384 = bor v383, v44 - v385 = iconst.i32 0x4ffe - v386 = icmp.i32 uge v33, v385 - brz v386, block43 - jump block146 - -block146: - trap heap_oob - -block43: - v387 = uextend.i64 v33 - v388 = iadd_imm.i64 v0, -8 - v389 = load.i64 v388 - v390 = iadd v389, v387 - v391 = uload8.i32 v390 - jump block44(v391, v54, v33) - -block44(v392: i32, v393: i32, v394: i32): - v395 = iconst.i32 8 - v396 = ishl v392, v395 - v397 = iconst.i32 0xff00 - v398 = band v396, v397 - v399 = iconst.i32 255 - v400 = band v393, v399 - v401 = bor v398, v400 - v402 = icmp eq v401, v384 - v403 = bint.i32 v402 - brnz v403, block56(v394, v14) - jump block147 - -block147: - v404 = iconst.i32 2 - v405 = iadd v394, v404 - v406 = iconst.i32 1 - v407 = iadd v394, v406 - v408 = iconst.i32 0x4ffe - v409 = icmp uge v405, v408 - brz v409, block45 - jump block148 - -block148: - trap heap_oob - -block45: - v410 = uextend.i64 v405 - v411 = iadd_imm.i64 v0, -8 - v412 = load.i64 v411 - v413 = iadd v412, v410 - v414 = uload8.i32 v413 - brnz v414, block44(v401, v414, v407) - jump block90(v14, v34) - -block46: - v415 = iconst.i32 8 - v416 = ishl.i32 v74, v415 - v417 = iconst.i32 16 - v418 = ishl.i32 v54, v417 - v419 = bor v416, v418 - v420 = iconst.i32 0x4ffe - v421 = icmp.i32 uge v33, v420 - brz v421, block47 - jump block149 - -block149: - trap heap_oob - -block47: - v422 = uextend.i64 v33 - v423 = iadd_imm.i64 v0, -8 - v424 = load.i64 v423 - v425 = iadd v424, v422 - v426 = uload8.i32 v425 - v427 = iconst.i32 24 - v428 = ishl v426, v427 - v429 = bor.i32 v419, v428 - v430 = iconst.i32 16 - v431 = ishl.i32 v44, v430 - v432 = iconst.i32 24 - v433 = ishl.i32 v29, v432 - v434 = bor v431, v433 - v435 = iconst.i32 8 - v436 = ishl.i32 v64, v435 - v437 = bor v434, v436 - v438 = icmp eq v429, v437 - v439 = bint.i32 v438 - brnz v439, block56(v33, v14) - jump block48(v33, v429) - -block48(v440: i32, v441: i32): - v442 = iconst.i32 1 - v443 = iadd v440, v442 - v444 = iconst.i32 3 - v445 = iadd v440, v444 - v446 = iconst.i32 0x4ffe - v447 = icmp uge v445, v446 - brz v447, block49 - jump block150 - -block150: - trap heap_oob - -block49: - v448 = uextend.i64 v445 - v449 = iadd_imm.i64 v0, -8 - v450 = load.i64 v449 - v451 = iadd v450, v448 - v452 = uload8.i32 v451 - v453 = iconst.i32 0 - v454 = icmp eq v452, v453 - v455 = bint.i32 v454 - brnz v455, block51(v14) - jump block151 - -block151: - v456 = bor.i32 v441, v452 - v457 = iconst.i32 8 - v458 = ishl v456, v457 - v459 = icmp ne v458, v437 - v460 = bint.i32 v459 - v461 = copy.i32 v443 - brnz v460, block48(v461, v458) - jump block50 - -block50: - jump block51(v14) - -block51(v462: i32): - v463 = iconst.i32 0 - v464 = iconst.i32 1056 - v465 = iadd v462, v464 - v466 = iconst.i32 0x4ffe - v467 = icmp uge v463, v466 - brz v467, block52 - jump block152 - -block152: - trap heap_oob - -block52: - v468 = uextend.i64 v463 - v469 = iadd_imm.i64 v0, -8 - v470 = load.i64 v469 - v471 = iadd v470, v468 - store.i32 v465, v471+4 - v472 = iconst.i32 0 - brnz.i32 v452, block53(v443) - jump block153 - -block153: - v473 = copy v472 - jump block53(v473) - -block53(v474: i32): - return v474 - -block54: - v475 = iconst.i32 8 - v476 = ishl.i32 v74, v475 - v477 = iconst.i32 16 - v478 = ishl.i32 v54, v477 - v479 = bor v476, v478 - v480 = bor v479, v94 - v481 = iconst.i32 0x4ffe - v482 = icmp.i32 uge v33, v481 - brz v482, block55 - jump block154 - -block154: - trap heap_oob - -block55: - v483 = uextend.i64 v33 - v484 = iadd_imm.i64 v0, -8 - v485 = load.i64 v484 - v486 = iadd v485, v483 - v487 = uload8.i32 v486 - v488 = iconst.i32 24 - v489 = ishl v487, v488 - v490 = bor.i32 v480, v489 - v491 = iconst.i32 16 - v492 = ishl.i32 v44, v491 - v493 = iconst.i32 24 - v494 = ishl.i32 v29, v493 - v495 = bor v492, v494 - v496 = iconst.i32 8 - v497 = ishl.i32 v64, v496 - v498 = bor v495, v497 - v499 = bor v498, v84 - v500 = icmp ne v490, v499 - v501 = bint.i32 v500 - brnz v501, block57 - jump block56(v33, v14) - -block56(v502: i32, v503: i32): - v504 = copy v502 - jump block90(v503, v504) - -block57: - jump block58(v33, v490) - -block58(v505: i32, v506: i32): - v507 = iconst.i32 4 - v508 = iadd v505, v507 - v509 = iconst.i32 1 - v510 = iadd v505, v509 - v511 = iconst.i32 0x4ffe - v512 = icmp uge v508, v511 - brz v512, block59 - jump block155 - -block155: - trap heap_oob - -block59: - v513 = uextend.i64 v508 - v514 = iadd_imm.i64 v0, -8 - v515 = load.i64 v514 - v516 = iadd v515, v513 - v517 = uload8.i32 v516 - v518 = iconst.i32 0 - v519 = icmp eq v517, v518 - v520 = bint.i32 v519 - brnz v520, block61(v14) - jump block156 - -block156: - v521 = iconst.i32 8 - v522 = ishl.i32 v506, v521 - v523 = bor v522, v517 - v524 = icmp ne v523, v499 - v525 = bint.i32 v524 - brnz v525, block58(v510, v523) - jump block60 - -block60: - jump block61(v14) - -block61(v526: i32): - v527 = iconst.i32 0 - brnz.i32 v517, block62(v510) - jump block157 - -block157: - v528 = copy v527 - jump block62(v528) - -block62(v529: i32): - v530 = copy v529 - jump block90(v526, v530) - -block63: - v531 = isub.i32 v348, v359 - v532 = iconst.i32 1 - v533 = iadd v531, v532 - jump block64(v359, v533) - -block64(v534: i32, v535: i32): - v536 = iconst.i32 1 - v537 = iadd.i32 v343, v536 - v538 = iconst.i32 0 - v539 = isub v538, v362 - v540 = iconst.i32 63 - v541 = bor.i32 v349, v540 - v542 = isub.i32 v348, v534 - v543 = iconst.i32 1 - v544 = iadd v542, v543 - v545 = iconst.i32 0 - v546 = copy.i32 v350 - jump block65(v350, v546, v349, v541, v348, v351, v544, v534, v545, v535, v343, v364, v537, v539, v362) - -block65(v547: i32, v548: i32, v549: i32, v550: i32, v551: i32, v552: i32, v553: i32, v554: i32, v555: i32, v556: i32, v557: i32, v558: i32, v559: i32, v560: i32, v561: i32): - v562 = copy v556 - v563 = isub v547, v548 - v564 = icmp uge v563, v549 - v565 = bint.i32 v564 - brnz v565, block67(v547) - jump block158 - -block158: - v566 = iconst.i32 0 - v567 = call fn2(v0, v547, v566, v550) - brnz v567, block66 - jump block159 - -block159: - v568 = iadd v547, v550 - jump block67(v568) - -block66: - v569 = isub.i32 v567, v548 - v570 = icmp ult v569, v549 - v571 = bint.i32 v570 - brnz v571, block89(v552) - jump block160 - -block160: - v572 = copy.i32 v567 - jump block67(v572) - -block67(v573: i32): - v574 = iconst.i32 1 - v575 = iadd.i32 v548, v551 - v576 = iconst.i32 0x4ffe - v577 = icmp uge v575, v576 - brz v577, block68 - jump block161 - -block161: - trap heap_oob - -block68: - v578 = uextend.i64 v575 - v579 = iadd_imm.i64 v0, -8 - v580 = load.i64 v579 - v581 = iadd v580, v578 - v582 = uload8.i32 v581 - v583 = iconst.i32 31 - v584 = band v582, v583 - v585 = ishl.i32 v574, v584 - v586 = iconst.i32 1024 - v587 = iadd.i32 v552, v586 - v588 = iconst.i32 3 - v589 = ushr v582, v588 - v590 = iconst.i32 28 - v591 = band v589, v590 - v592 = iadd v587, v591 - v593 = iconst.i32 0x4ffe - v594 = icmp uge v592, v593 - brz v594, block69 - jump block162 - -block162: - trap heap_oob - -block69: - v595 = uextend.i64 v592 - v596 = iadd_imm.i64 v0, -8 - v597 = load.i64 v596 - v598 = iadd v597, v595 - v599 = load.i32 v598 - v600 = band.i32 v585, v599 - v601 = iconst.i32 0 - v602 = icmp eq v600, v601 - v603 = bint.i32 v602 - brnz v603, block74 - jump block163 - -block163: - v604 = iconst.i32 2 - v605 = ishl.i32 v582, v604 - v606 = iadd.i32 v552, v605 - v607 = iconst.i32 0x4ffe - v608 = icmp uge v606, v607 - brz v608, block70 - jump block164 - -block164: - trap heap_oob - -block70: - v609 = uextend.i64 v606 - v610 = iadd_imm.i64 v0, -8 - v611 = load.i64 v610 - v612 = iadd v611, v609 - v613 = load.i32 v612 - v614 = isub.i32 v551, v613 - v615 = iconst.i32 -1 - v616 = icmp eq v614, v615 - v617 = bint.i32 v616 - brnz v617, block75 - jump block165 - -block165: - v618 = iconst.i32 1 - v619 = iadd v614, v618 - v620 = icmp ult v619, v554 - v621 = bint.i32 v620 - v622 = copy.i32 v553 - brnz v621, block71(v622) - jump block166 - -block166: - v623 = copy v619 - jump block71(v623) - -block71(v624: i32): - v625 = copy v624 - brnz.i32 v555, block72(v625) - jump block72(v619) - -block72(v626: i32): - brnz.i32 v562, block73(v626) - jump block73(v619) - -block73(v627: i32): - v628 = copy.i32 v554 - v629 = copy.i32 v562 - jump block87(v548, v627, v573, v549, v550, v551, v552, v553, v628, v629, v557, v558, v559, v560, v561) - -block74: - v630 = copy.i32 v549 - v631 = copy.i32 v554 - v632 = copy.i32 v562 - jump block87(v548, v630, v573, v549, v550, v551, v552, v553, v631, v632, v557, v558, v559, v560, v561) - -block75: - v633 = icmp.i32 ugt v558, v555 - v634 = bint.i32 v633 - v635 = copy.i32 v558 - brnz v634, block76(v635) - jump block167 - -block167: - v636 = copy.i32 v555 - jump block76(v636) - -block76(v637: i32): - v638 = iadd.i32 v557, v637 - v639 = iconst.i32 0x4ffe - v640 = icmp uge v638, v639 - brz v640, block77 - jump block168 - -block168: - trap heap_oob - -block77: - v641 = uextend.i64 v638 - v642 = iadd_imm.i64 v0, -8 - v643 = load.i64 v642 - v644 = iadd v643, v641 - v645 = uload8.i32 v644 - v646 = iconst.i32 0 - v647 = icmp eq v645, v646 - v648 = bint.i32 v647 - brnz v648, block82(v548, v549, v551, v552) - jump block169 - -block169: - v649 = iadd.i32 v548, v637 - v650 = iadd.i32 v559, v637 - v651 = iadd.i32 v560, v637 - jump block78(v645, v649, v651, v650) - -block78(v652: i32, v653: i32, v654: i32, v655: i32): - v656 = iconst.i32 255 - v657 = band v652, v656 - v658 = iconst.i32 0x4ffe - v659 = icmp uge v653, v658 - brz v659, block79 - jump block170 - -block170: - trap heap_oob - -block79: - v660 = uextend.i64 v653 - v661 = iadd_imm.i64 v0, -8 - v662 = load.i64 v661 - v663 = iadd v662, v660 - v664 = uload8.i32 v663 - v665 = icmp.i32 ne v657, v664 - v666 = bint.i32 v665 - v667 = copy.i32 v554 - v668 = copy.i32 v562 - brnz v666, block87(v548, v654, v573, v549, v550, v551, v552, v553, v667, v668, v557, v558, v559, v560, v561) - jump block171 - -block171: - v669 = iconst.i32 1 - v670 = iadd.i32 v653, v669 - v671 = iconst.i32 1 - v672 = iadd.i32 v654, v671 - v673 = iconst.i32 0x4ffe - v674 = icmp.i32 uge v655, v673 - brz v674, block80 - jump block172 - -block172: - trap heap_oob - -block80: - v675 = uextend.i64 v655 - v676 = iadd_imm.i64 v0, -8 - v677 = load.i64 v676 - v678 = iadd v677, v675 - v679 = uload8.i32 v678 - v680 = iconst.i32 1 - v681 = iadd.i32 v655, v680 - brnz v679, block78(v679, v670, v672, v681) - jump block81 - -block81: - jump block82(v548, v549, v551, v552) - -block82(v682: i32, v683: i32, v684: i32, v685: i32): - v686 = icmp.i32 ule v558, v555 - v687 = bint.i32 v686 - brnz v687, block90(v685, v682) - jump block173 - -block173: - v688 = copy.i32 v561 - jump block83(v688) - -block83(v689: i32): - v690 = iadd.i32 v557, v689 - v691 = iconst.i32 0x4ffe - v692 = icmp uge v690, v691 - brz v692, block84 - jump block174 - -block174: - trap heap_oob - -block84: - v693 = uextend.i64 v690 - v694 = iadd_imm.i64 v0, -8 - v695 = load.i64 v694 - v696 = iadd v695, v693 - v697 = uload8.i32 v696 - v698 = iadd.i32 v682, v689 - v699 = iconst.i32 0x4ffe - v700 = icmp uge v698, v699 - brz v700, block85 - jump block175 - -block175: - trap heap_oob - -block85: - v701 = uextend.i64 v698 - v702 = iadd_imm.i64 v0, -8 - v703 = load.i64 v702 - v704 = iadd v703, v701 - v705 = uload8.i32 v704 - v706 = icmp.i32 ne v697, v705 - v707 = bint.i32 v706 - brnz v707, block86 - jump block176 - -block176: - v708 = icmp.i32 ule v689, v555 - v709 = bint.i32 v708 - v710 = iconst.i32 -1 - v711 = iadd.i32 v689, v710 - v712 = iconst.i32 0 - v713 = icmp eq v709, v712 - v714 = bint.i32 v713 - brnz v714, block83(v711) - jump block90(v685, v682) - -block86: - v715 = copy.i32 v554 - v716 = copy.i32 v562 - jump block88(v682, v554, v573, v683, v550, v684, v685, v553, v715, v562, v716, v557, v558, v559, v560, v561) - -block87(v717: i32, v718: i32, v719: i32, v720: i32, v721: i32, v722: i32, v723: i32, v724: i32, v725: i32, v726: i32, v727: i32, v728: i32, v729: i32, v730: i32, v731: i32): - v732 = copy v718 - v733 = iconst.i32 0 - jump block88(v717, v732, v719, v720, v721, v722, v723, v724, v725, v733, v726, v727, v728, v729, v730, v731) - -block88(v734: i32, v735: i32, v736: i32, v737: i32, v738: i32, v739: i32, v740: i32, v741: i32, v742: i32, v743: i32, v744: i32, v745: i32, v746: i32, v747: i32, v748: i32, v749: i32): - v750 = iadd v734, v735 - v751 = copy v742 - v752 = copy v743 - v753 = copy v744 - jump block65(v736, v750, v737, v738, v739, v740, v741, v751, v752, v753, v745, v746, v747, v748, v749) - -block89(v754: i32): - v755 = iconst.i32 0 - jump block90(v754, v755) - -block90(v756: i32, v757: i32): - v758 = iconst.i32 0 - v759 = iconst.i32 1056 - v760 = iadd v756, v759 - v761 = iconst.i32 0x4ffe - v762 = icmp uge v758, v761 - brz v762, block91 - jump block177 - -block177: - trap heap_oob - -block91: - v763 = uextend.i64 v758 - v764 = iadd_imm.i64 v0, -8 - v765 = load.i64 v764 - v766 = iadd v765, v763 - store.i32 v760, v766+4 - jump block92(v757) - -block92(v767: i32): - return v767 -} - -; Same problem from musl.wasm. -function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v { - gv0 = vmctx - heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v - fn0 = u0:517 sig0 - -block0(v0: f64, v1: i64): - v3 = iconst.i64 0 - v4 = iconst.i32 0 - v131 = iconst.i64 0 - v5 = bitcast.f64 v131 - v6 = iconst.i32 0 - v7 = iconst.i32 0 - v8 = iconst.i32 0 - v132 = uextend.i64 v8 - v133 = iadd_imm v1, 0 - v134 = load.i64 v133 - v9 = iadd v134, v132 - v10 = load.i32 v9+4 - v11 = iconst.i32 16 - v12 = isub v10, v11 - v135 = uextend.i64 v7 - v136 = iadd_imm v1, 0 - v137 = load.i64 v136 - v13 = iadd v137, v135 - store v12, v13+4 - v14 = bitcast.i64 v0 - v15 = iconst.i64 63 - v16 = ushr v14, v15 - v17 = ireduce.i32 v16 - v18 = iconst.i64 32 - v19 = ushr v14, v18 - v20 = ireduce.i32 v19 - v21 = iconst.i32 0x7fff_ffff - v22 = band v20, v21 - v23 = iconst.i32 0x4086_232b - v24 = icmp ult v22, v23 - v25 = bint.i32 v24 - brnz v25, block10 - jump block178 - -block178: - v26 = iconst.i64 0x7fff_ffff_ffff_ffff - v27 = band v14, v26 - v28 = iconst.i64 0x7ff0_0000_0000_0000 - v29 = icmp ule v27, v28 - v30 = bint.i32 v29 - brnz v30, block9 - jump block2(v12, v0) - -block10: - v31 = iconst.i32 0x3fd6_2e43 - v32 = icmp.i32 ult v22, v31 - v33 = bint.i32 v32 - brnz v33, block8 - jump block179 - -block179: - v34 = iconst.i32 0x3ff0_a2b2 - v35 = icmp.i32 uge v22, v34 - v36 = bint.i32 v35 - brnz v36, block6 - jump block180 - -block180: - v37 = iconst.i32 1 - v38 = bxor.i32 v17, v37 - v39 = isub v38, v17 - jump block5(v0, v39) - -block9: - v138 = iconst.i64 0x4086_2e42_fefa_39ef - v40 = bitcast.f64 v138 - v41 = fcmp ge v40, v0 - v42 = bint.i32 v41 - v139 = fcmp.f64 uno v0, v0 - v140 = fcmp.f64 one v0, v0 - v43 = bor v139, v140 - v44 = bint.i32 v43 - v45 = bor v42, v44 - brnz v45, block7 - jump block181 - -block181: - v141 = iconst.i64 0x7fe0_0000_0000_0000 - v46 = bitcast.f64 v141 - v47 = fmul.f64 v0, v46 - jump block2(v12, v47) - -block8: - v48 = iconst.i32 0x3e30_0000 - v49 = icmp.i32 ule v22, v48 - v50 = bint.i32 v49 - brnz v50, block3 - jump block182 - -block182: - v51 = iconst.i32 0 - v142 = iconst.i64 0 - v52 = bitcast.f64 v142 - v178 = copy.f64 v0 - jump block4(v0, v178, v52, v51) - -block7: - v143 = iconst.i64 0xc086_232b_dd7a_bcd2 - v53 = bitcast.f64 v143 - v54 = fcmp.f64 ge v0, v53 - v55 = bint.i32 v54 - v56 = bor v55, v44 - brnz v56, block6 - jump block183 - -block183: - v144 = iconst.i64 0xb6a0_0000_0000_0000 - v57 = bitcast.f64 v144 - v58 = fdiv v57, v0 - v59 = fdemote.f32 v58 - v145 = uextend.i64 v12 - v146 = iadd_imm.i64 v1, 0 - v147 = load.i64 v146 - v60 = iadd v147, v145 - store v59, v60+12 - v148 = iconst.i64 0 - v61 = bitcast.f64 v148 - v149 = iconst.i64 0xc087_4910_d52d_3051 - v62 = bitcast.f64 v149 - v63 = fcmp gt v62, v0 - v64 = bint.i32 v63 - brnz v64, block2(v12, v61) - jump block6 - -block6: - v150 = iconst.i64 0x3ff7_1547_652b_82fe - v66 = bitcast.f64 v150 - v67 = fmul.f64 v0, v66 - v69 = iconst.i32 3 - v70 = ishl.i32 v17, v69 - v71 = iconst.i32 5040 - v72 = iadd v70, v71 - v151 = uextend.i64 v72 - v152 = iadd_imm.i64 v1, 0 - v153 = load.i64 v152 - v73 = iadd v153, v151 - v74 = load.f64 v73 - v75 = fadd v67, v74 - v76 = x86_cvtt2si.i32 v75 - v158 = iconst.i32 0x8000_0000 - v154 = icmp ne v76, v158 - brnz v154, block11 - jump block184 - -block184: - v155 = fcmp uno v75, v75 - brz v155, block12 - jump block185 - -block185: - trap bad_toint - -block12: - v159 = iconst.i64 0xc1e0_0000_0020_0000 - v156 = bitcast.f64 v159 - v157 = fcmp ge v156, v75 - brz v157, block13 - jump block186 - -block186: - trap int_ovf - -block13: - jump block11 - -block11: - jump block5(v0, v76) - -block5(v77: f64, v78: i32): - v79 = fcvt_from_sint.f64 v78 - v160 = iconst.i64 0xbfe6_2e42_fee0_0000 - v80 = bitcast.f64 v160 - v81 = fmul v79, v80 - v82 = fadd v77, v81 - v161 = iconst.i64 0x3dea_39ef_3579_3c76 - v83 = bitcast.f64 v161 - v84 = fmul v79, v83 - v85 = fsub v82, v84 - jump block4(v82, v85, v84, v78) - -block4(v86: f64, v87: f64, v108: f64, v113: i32): - v88 = fmul v87, v87 - v162 = iconst.i64 0x3e66_3769_72be_a4d0 - v89 = bitcast.f64 v162 - v90 = fmul v88, v89 - v163 = iconst.i64 0xbeeb_bd41_c5d2_6bf1 - v91 = bitcast.f64 v163 - v92 = fadd v90, v91 - v93 = fmul v88, v92 - v164 = iconst.i64 0x3f11_566a_af25_de2c - v94 = bitcast.f64 v164 - v95 = fadd v93, v94 - v96 = fmul v88, v95 - v165 = iconst.i64 0xbf66_c16c_16be_bd93 - v97 = bitcast.f64 v165 - v98 = fadd v96, v97 - v99 = fmul v88, v98 - v166 = iconst.i64 0x3fc5_5555_5555_553e - v100 = bitcast.f64 v166 - v101 = fadd v99, v100 - v102 = fmul v88, v101 - v103 = fsub v87, v102 - v104 = fmul v87, v103 - v167 = iconst.i64 0x4000_0000_0000_0000 - v105 = bitcast.f64 v167 - v106 = fsub v105, v103 - v107 = fdiv v104, v106 - v109 = fsub v107, v108 - v110 = fadd v86, v109 - v168 = iconst.i64 0x3ff0_0000_0000_0000 - v111 = bitcast.f64 v168 - v112 = fadd v110, v111 - v169 = iconst.i32 0 - v114 = icmp eq v113, v169 - v115 = bint.i32 v114 - brnz v115, block2(v12, v112) - jump block187 - -block187: - v116 = call fn0(v112, v113, v1) - jump block2(v12, v116) - -block3: - v170 = iconst.i64 0x7fe0_0000_0000_0000 - v117 = bitcast.f64 v170 - v118 = fadd.f64 v0, v117 - v171 = uextend.i64 v12 - v172 = iadd_imm.i64 v1, 0 - v173 = load.i64 v172 - v119 = iadd v173, v171 - store v118, v119 - v174 = iconst.i64 0x3ff0_0000_0000_0000 - v120 = bitcast.f64 v174 - v121 = fadd.f64 v0, v120 - jump block2(v12, v121) - -block2(v123: i32, v130: f64): - v122 = iconst.i32 0 - v127 = iconst.i32 16 - v128 = iadd v123, v127 - v175 = uextend.i64 v122 - v176 = iadd_imm.i64 v1, 0 - v177 = load.i64 v176 - v129 = iadd v177, v175 - store v128, v129+4 - jump block1(v130) - -block1(v2: f64): - return v2 -} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-216.clif b/cranelift/filetests/filetests/regalloc/coalescing-216.clif deleted file mode 100644 index 4c9b27d6b0..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalescing-216.clif +++ /dev/null @@ -1,87 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Reported as https://github.com/bytecodealliance/cranelift/issues/216 from the Binaryen fuzzer. -; -; The (old) coalescer creates a virtual register with two identical values. -function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { -block0(v0: i32, v1: i64): - v3 = iconst.i64 0 - v5 = iconst.i32 0 - brz v5, block3(v3) - jump block4(v3, v3) - -block4(v11: i64, v29: i64): - v6 = iconst.i32 0 - brz v6, block14 - jump block15 - -block15: - v9 = iconst.i32 -17 - v12 = iconst.i32 0xffff_ffff_ffff_8000 - jump block9(v12) - -block9(v10: i32): - brnz v10, block8(v9, v11, v11) - jump block16 - -block16: - brz.i32 v9, block13 - jump block17 - -block17: - v13 = iconst.i32 0 - brnz v13, block6(v11, v11) - jump block18 - -block18: - v14 = iconst.i32 0 - brz v14, block12 - jump block11 - -block12: - jump block4(v11, v11) - -block11: - jump block10(v11) - -block13: - v15 = iconst.i64 1 - jump block10(v15) - -block10(v21: i64): - v16 = iconst.i32 0 - brnz v16, block6(v21, v11) - jump block19 - -block19: - v17 = iconst.i32 0xffff_ffff_ffff_9f35 - jump block8(v17, v21, v11) - -block8(v8: i32, v23: i64, v28: i64): - jump block7(v8, v23, v28) - -block14: - v18 = iconst.i32 0 - jump block7(v18, v11, v29) - -block7(v7: i32, v22: i64, v27: i64): - jump block6(v22, v27) - -block6(v20: i64, v25: i64): - v19 = iconst.i32 0xffc7 - brnz v19, block4(v20, v25) - jump block5 - -block5: - jump block3(v25) - -block3(v24: i64): - jump block2(v24) - -block2(v4: i64): - jump block1(v4) - -block1(v2: i64): - return v2 -} diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif deleted file mode 100644 index d47a905637..0000000000 --- a/cranelift/filetests/filetests/regalloc/coloring-227.clif +++ /dev/null @@ -1,115 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v { - gv0 = vmctx - heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - - block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): -[RexOp1pu_id#b8] v5 = iconst.i32 0 -[RexOp1pu_id#b8] v6 = iconst.i32 0 -[RexOp1tjccb#74] brz v6, block10 -[Op1jmpb#eb] jump block3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) - - block3(v15: i32, v17: i32, v25: i32, v31: i32, v40: i32, v47: i32, v54: i32, v61: i32, v68: i32, v75: i32): -[Op1jmpb#eb] jump block6 - - block6: -[RexOp1pu_id#b8] v8 = iconst.i32 0 -[RexOp1tjccb#75] brnz v8, block5 -[Op1jmpb#eb] jump block20 - - block20: -[RexOp1pu_id#b8] v9 = iconst.i32 0 -[RexOp1pu_id#b8] v11 = iconst.i32 0 -[DynRexOp1icscc#39] v12 = icmp.i32 eq v15, v11 -[RexOp2urm_noflags#4b6] v13 = bint.i32 v12 -[DynRexOp1rr#21] v14 = band v9, v13 -[RexOp1tjccb#75] brnz v14, block6 -[Op1jmpb#eb] jump block7 - - block7: -[RexOp1tjccb#74] brz.i32 v17, block8 -[Op1jmpb#eb] jump block17 - - block17: -[RexOp1pu_id#b8] v18 = iconst.i32 0 -[RexOp1tjccb#74] brz v18, block9 -[Op1jmpb#eb] jump block16 - - block16: -[RexOp1pu_id#b8] v21 = iconst.i32 0 -[RexOp1umr#89] v79 = uextend.i64 v5 -[RexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v81 = load.i64 v80 -[RexOp1rr#8001] v22 = iadd v81, v79 -[RexMp1st#189] istore16 v21, v22 -[Op1jmpb#eb] jump block9 - - block9: -[Op1jmpb#eb] jump block8 - - block8: -[RexOp1pu_id#b8] v27 = iconst.i32 3 -[RexOp1pu_id#b8] v28 = iconst.i32 4 -[DynRexOp1rr#09] v35 = bor.i32 v31, v13 -[RexOp1tjccb#75] brnz v35, block15(v27) -[Op1jmpb#eb] jump block15(v28) - - block15(v36: i32): -[Op1jmpb#eb] jump block3(v25, v36, v25, v31, v40, v47, v54, v61, v68, v75) - - block5: -[Op1jmpb#eb] jump block4 - - block4: -[Op1jmpb#eb] jump block2(v40, v47, v54, v61, v68, v75) - - block10: -[RexOp1pu_id#b8] v43 = iconst.i32 0 -[Op1jmpb#eb] jump block2(v43, v5, v0, v1, v2, v3) - - block2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): -[RexOp1pu_id#b8] v44 = iconst.i32 0 -[RexOp1tjccb#74] brz v44, block12 -[Op1jmpb#eb] jump block18 - - block18: -[RexOp1pu_id#b8] v50 = iconst.i32 11 -[RexOp1tjccb#74] brz v50, block14 -[Op1jmpb#eb] jump block19 - - block19: -[RexOp1umr#89] v82 = uextend.i64 v52 -[RexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v84 = load.i64 v83 -[RexOp1rr#8001] v57 = iadd v84, v82 -[RexOp1ld#8b] v58 = load.i32 v57 -[RexOp1umr#89] v85 = uextend.i64 v58 -[RexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v87 = load.i64 v86 -[RexOp1rr#8001] v64 = iadd v87, v85 -[RexOp1st#88] istore8 v59, v64 -[RexOp1pu_id#b8] v65 = iconst.i32 0 -[Op1jmpb#eb] jump block13(v65) - - block14: -[Op1jmpb#eb] jump block13(v66) - - block13(v51: i32): -[RexOp1umr#89] v88 = uextend.i64 v45 -[RexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v90 = load.i64 v89 -[RexOp1rr#8001] v71 = iadd v90, v88 -[RexOp1st#89] store v51, v71 -[Op1jmpb#eb] jump block12 - - block12: -[Op1jmpb#eb] jump block11 - - block11: -[Op1jmpb#eb] jump block1 - - block1: -[Op1ret#c3] return -} diff --git a/cranelift/filetests/filetests/regalloc/constraints.clif b/cranelift/filetests/filetests/regalloc/constraints.clif deleted file mode 100644 index 60cd731ed8..0000000000 --- a/cranelift/filetests/filetests/regalloc/constraints.clif +++ /dev/null @@ -1,82 +0,0 @@ -test regalloc -target i686 - -; regex: V=v\d+ -; regex: REG=%r([abcd]x|[sd]i) - -; Tied operands, both are killed at instruction. -function %tied_easy() -> i32 { -block0: - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; not: copy - ; check: isub - v2 = isub v0, v1 - return v2 -} - -; Tied operand is live after instruction. -function %tied_alive() -> i32 { -block0: - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; check: $(v0c=$V) = copy v0 - ; check: v2 = isub $v0c, v1 - v2 = isub v0, v1 - ; check: v3 = iadd v2, v0 - v3 = iadd v2, v0 - return v3 -} - -; Fixed register constraint. -function %fixed_op() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - v2 = ishl v1, v0 - return v2 -} - -; Fixed register constraint twice. -function %fixed_op_twice() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - v2 = ishl v1, v0 - ; check: regmove v0, %rcx -> $REG - ; check: regmove v2, $REG -> %rcx - v3 = ishl v0, v2 - - return v3 -} - -; Tied use of a diverted register. -function %fixed_op_twice() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - ; check: v2 = ishl v1, v0 - v2 = ishl v1, v0 - - ; Now v0 is globally allocated to %rax, but diverted to %rcx. - ; Check that the tied def gets the diverted register. - v3 = isub v0, v2 - ; not: regmove - ; check: ,%rcx] - ; sameln: isub - ; Move it into place for the return value. - ; check: regmove v3, %rcx -> %rax - return v3 -} diff --git a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif deleted file mode 100644 index 90650aa4f0..0000000000 --- a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif +++ /dev/null @@ -1,23 +0,0 @@ -test regalloc -target x86_64 legacy - -; Test that fallthrough returns are visited by reload and coloring. - -function %foo() -> f64 { - fn0 = %bar() - -block0: - v0 = f64const 0.0 - call fn0() - fallthrough_return v0 -} -; check: fill v0 - -function %foo() -> f64 { - fn0 = %bar() -> f64, f64 - -block0: - v0, v1 = call fn0() - fallthrough_return v1 -} -; check: regmove v1, %xmm1 -> %xmm0 diff --git a/cranelift/filetests/filetests/regalloc/ghost-param.clif b/cranelift/filetests/filetests/regalloc/ghost-param.clif deleted file mode 100644 index 1d569727dd..0000000000 --- a/cranelift/filetests/filetests/regalloc/ghost-param.clif +++ /dev/null @@ -1,45 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; This test case would create a block parameter that was a ghost value. -; The coalescer would insert a copy of the ghost value, leading to verifier errors. -; -; We don't allow block parameters to be ghost values any longer. -; -; Test case by binaryen fuzzer! - -function %pr215(i64 vmctx [%rdi]) system_v { -block0(v0: i64): - v10 = iconst.i64 0 - v1 = bitcast.f64 v10 - jump block5(v1) - -block5(v9: f64): - v11 = iconst.i64 0xffff_ffff_ff9a_421a - v4 = bitcast.f64 v11 - v6 = iconst.i32 0 - v7 = iconst.i32 1 - brnz v7, block4(v6) - jump block8 - -block8: - v8 = iconst.i32 0 - jump block7(v8) - -block7(v5: i32): - brnz v5, block3(v4) - jump block5(v4) - -block4(v3: i32): - brnz v3, block2 - jump block3(v9) - -block3(v2: f64): - jump block2 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/global-constraints.clif b/cranelift/filetests/filetests/regalloc/global-constraints.clif deleted file mode 100644 index 1fe89ae823..0000000000 --- a/cranelift/filetests/filetests/regalloc/global-constraints.clif +++ /dev/null @@ -1,30 +0,0 @@ -test regalloc -target i686 legacy - -; This test covers the troubles when values with global live ranges are defined -; by instructions with constrained register classes. -; -; The icmp_imm instrutions write their b1 result to the ABCD register class on -; 32-bit x86. So if we define 5 live values, they can't all fit. -function %global_constraints(i32) { -block0(v0: i32): - v1 = icmp_imm eq v0, 1 - v2 = icmp_imm ugt v0, 2 - v3 = icmp_imm sle v0, 3 - v4 = icmp_imm ne v0, 4 - v5 = icmp_imm sge v0, 5 - brnz v5, block1 - jump block2 - -block2: - return - -block1: - ; Make sure v1-v5 are live in. - v10 = band v1, v2 - v11 = bor v3, v4 - v12 = bor v10, v11 - v13 = bor v12, v5 - trapnz v13, user0 - return -} diff --git a/cranelift/filetests/filetests/regalloc/global-fixed.clif b/cranelift/filetests/filetests/regalloc/global-fixed.clif deleted file mode 100644 index 6d31f7511a..0000000000 --- a/cranelift/filetests/filetests/regalloc/global-fixed.clif +++ /dev/null @@ -1,16 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %foo() system_v { -block4: - v3 = iconst.i32 0 - jump block3 - -block3: - v9 = udiv v3, v3 - jump block1 - -block1: - v19 = iadd.i32 v9, v9 - jump block3 -} diff --git a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif deleted file mode 100644 index c4534b0f8b..0000000000 --- a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif +++ /dev/null @@ -1,44 +0,0 @@ -test regalloc -target x86_64 legacy - -function u0:587() fast { -block0: - v97 = iconst.i32 0 - v169 = iconst.i32 0 - v1729 = iconst.i32 0 - jump block100(v97, v97, v97, v97, v97) - -block100(v1758: i32, v1784: i32, v1845: i32, v1856: i32, v1870: i32): - v1762 = iconst.i32 0 - v1769 = iconst.i32 0 - v1774 = iconst.i32 0 - v1864 = iconst.i32 0 - v1897 = iconst.i32 0 - jump block102(v1774, v1784, v1845, v1856, v1870, v1758, v1762, v169, v1729, v97, v169, v169, v169, v169) - -block102(v1785: i32, v1789: i32, v1843: i32, v1854: i32, v1868: i32, v1882: i32, v1890: i32, v1901: i32, v1921: i32, v1933: i32, v2058: i32, v2124: i32, v2236: i32, v2366: i32): - v1929 = iconst.i32 0 - v1943 = iconst.i32 0 - v1949 = iconst.i32 0 - jump block123(v1897, v1769) - -block123(v1950: i32, v1979: i32): - v1955 = iconst.i32 0 - brz v1955, block125 - jump block122(v1929, v1843, v1864, v2058, v1882, v1897, v1943, v1868, v2124, v1901) - -block125: - v1961 = iadd_imm.i32 v1949, 0 - v1952 = iconst.i32 0 - v1962 = iconst.i64 0 - v1963 = load.i32 v1962 - brz v1963, block123(v1952, v1961) - jump block127 - -block127: - v1966 = iconst.i32 0 - jump block122(v1963, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966) - -block122(v1967: i32, v1971: i32, v1972: i32, v1978: i32, v2032: i32, v2041: i32, v2053: i32, v2076: i32, v2085: i32, v2096: i32): - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/infinite-interference.clif b/cranelift/filetests/filetests/regalloc/infinite-interference.clif deleted file mode 100644 index b7a7736405..0000000000 --- a/cranelift/filetests/filetests/regalloc/infinite-interference.clif +++ /dev/null @@ -1,37 +0,0 @@ -test regalloc -target riscv32 - -; Here, the coalescer initially builds vreg0 = [v1, v2, v3] -; -; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to -; resolve that conflict since v1 will just interfere with the inserted copy too. - -;function %c1(i32) -> i32 { -;block0(v0: i32): -; v1 = iadd_imm v0, 1 -; v2 = iconst.i32 1 -; brz v1, block1(v2) -; jump block2 -; -;block1(v3: i32): -; return v3 -; -;block2: -; jump block1(v1) -;} - -; Same thing with v1 and v2 swapped to reverse the order of definitions. - -function %c2(i32) -> i32 { -block0(v0: i32): - v1 = iadd_imm v0, 1 - v2 = iconst.i32 1 - brz v2, block1(v1) - jump block2 - -block1(v3: i32): - return v3 - -block2: - jump block1(v2) -} diff --git a/cranelift/filetests/filetests/regalloc/iterate.clif b/cranelift/filetests/filetests/regalloc/iterate.clif deleted file mode 100644 index 3272199bca..0000000000 --- a/cranelift/filetests/filetests/regalloc/iterate.clif +++ /dev/null @@ -1,164 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { -block0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64): - v32 = iconst.i32 0 - v6 = bitcast.f32 v32 - v7 = iconst.i64 0 - v33 = iconst.i64 0 - v8 = bitcast.f64 v33 - v34 = iconst.i32 0xbe99_999a - v9 = bitcast.f32 v34 - v10 = iconst.i32 40 - v11 = iconst.i32 -7 - v35 = iconst.i32 0x40b0_0000 - v12 = bitcast.f32 v35 - v13 = iconst.i64 6 - v36 = iconst.i64 0x4020_0000_0000_0000 - v14 = bitcast.f64 v36 - v44 = iconst.i64 0 - v37 = icmp slt v0, v44 - brnz v37, block2 - jump block11 - -block11: - v38 = fcvt_from_sint.f64 v0 - jump block3(v38) - -block2: - v45 = iconst.i32 1 - v39 = ushr.i64 v0, v45 - v40 = band_imm.i64 v0, 1 - v41 = bor v39, v40 - v42 = fcvt_from_sint.f64 v41 - v43 = fadd v42, v42 - jump block3(v43) - -block3(v15: f64): - v16 = fpromote.f64 v9 - v46 = uextend.i64 v10 - v17 = fcvt_from_sint.f64 v46 - v18 = fcvt_from_sint.f64 v11 - v19 = fpromote.f64 v12 - v54 = iconst.i64 0 - v47 = icmp.i64 slt v13, v54 - brnz v47, block4 - jump block12 - -block12: - v48 = fcvt_from_sint.f64 v13 - jump block5(v48) - -block4: - v55 = iconst.i32 1 - v49 = ushr.i64 v13, v55 - v50 = band_imm.i64 v13, 1 - v51 = bor v49, v50 - v52 = fcvt_from_sint.f64 v51 - v53 = fadd v52, v52 - jump block5(v53) - -block5(v20: f64): - v63 = iconst.i64 0 - v56 = icmp.i64 slt v7, v63 - brnz v56, block6 - jump block13 - -block13: - v57 = fcvt_from_sint.f64 v7 - jump block7(v57) - -block6: - v64 = iconst.i32 1 - v58 = ushr.i64 v7, v64 - v59 = band_imm.i64 v7, 1 - v60 = bor v58, v59 - v61 = fcvt_from_sint.f64 v60 - v62 = fadd v61, v61 - jump block7(v62) - -block7(v21: f64): - v22 = fadd v21, v14 - v23 = fadd.f64 v20, v22 - v24 = fadd.f64 v19, v23 - v25 = fadd.f64 v18, v24 - v26 = fadd.f64 v17, v25 - v27 = fadd.f64 v2, v26 - v28 = fadd.f64 v16, v27 - v29 = fadd.f64 v15, v28 - v30 = x86_cvtt2si.i64 v29 - v69 = iconst.i64 0x8000_0000_0000_0000 - v65 = icmp ne v30, v69 - brnz v65, block8 - jump block15 - -block15: - v66 = fcmp uno v29, v29 - brz v66, block9 - jump block16 - -block16: - trap bad_toint - -block9: - v70 = iconst.i64 0xc3e0_0000_0000_0000 - v67 = bitcast.f64 v70 - v68 = fcmp gt v67, v29 - brz v68, block10 - jump block17 - -block17: - trap int_ovf - -block10: - jump block8 - -block8: - jump block1(v30) - -block1(v31: i64): - return v31 -} - -function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, 48 - sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash_system_v - -block0(v0: i64): - v1 = iconst.i32 32 - v2 = iconst.i64 64 - v3 = iconst.i32 9 - v4 = iconst.i64 1063 - v5 = iadd_imm v0, 48 - v6 = load.i32 v5 - v7 = icmp uge v3, v6 - ; If we're unlucky, there are no ABCD registers available for v7 at this branch. - brz v7, block2 - jump block4 - -block4: - trap heap_oob - -block2: - v8 = load.i64 v5+8 - v9 = uextend.i64 v3 - v16 = iconst.i64 16 - v10 = imul v9, v16 - v11 = iadd v8, v10 - v12 = load.i64 v11 - brnz v12, block3 - jump block5 - -block5: - trap icall_null - -block3: - v13 = load.i64 v11+8 - v14 = call_indirect.i64 sig0, v12(v1, v2, v13, v4) - jump block1(v14) - -block1(v15: i64): - return v15 -} diff --git a/cranelift/filetests/filetests/regalloc/multi-constraints.clif b/cranelift/filetests/filetests/regalloc/multi-constraints.clif deleted file mode 100644 index 0a6b160f09..0000000000 --- a/cranelift/filetests/filetests/regalloc/multi-constraints.clif +++ /dev/null @@ -1,51 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Test combinations of constraints. -; -; The x86 ushr instruction requires its second operand to be passed in %rcx and its output is -; tied to the first input operand. -; -; If we pass the same value to both operands, both constraints must be satisfied. - -; Found by the Binaryen fuzzer in PR221. -; -; Conditions triggering the problem: -; -; - The same value used for a tied operand and a fixed operand. -; - The common value is already in %rcx. -; - The tied output value is live outside the block. -; -; Under these conditions, Solver::add_tied_input() would create a variable for the tied input -; without considering the fixed constraint. -function %pr221(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - v4 = ushr v3, v3 - jump block1 - -block1: - return v4 -} - -; Found by the Binaryen fuzzer in PR218. -; -; This is a similar situation involving combined constraints on the ushr instruction: -; -; - The %rcx register is already in use by a globally live value. -; - The ushr x, x result is also a globally live value. -; -; Since the ushr x, x result is forced to be placed in %rcx, we must set the replace_global_defines -; flag so it can be reassigned to a different global register. -function %pr218(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - ; check: regmove v3, %rcx -> - v4 = ushr v0, v0 - ; check: v4 = copy - jump block1 - -block1: - ; v3 is globally live in %rcx. - ; v4 is also globally live. Needs to be assigned something else for the trip across the CFG edge. - v5 = iadd v3, v4 - return v5 -} diff --git a/cranelift/filetests/filetests/regalloc/multiple-returns.clif b/cranelift/filetests/filetests/regalloc/multiple-returns.clif deleted file mode 100644 index 8825a4df72..0000000000 --- a/cranelift/filetests/filetests/regalloc/multiple-returns.clif +++ /dev/null @@ -1,23 +0,0 @@ -test regalloc -target x86_64 legacy - -; Return the same value twice. This needs a copy so that each value can be -; allocated its own register. -function %multiple_returns() -> i64, i64 { -block0: - v2 = iconst.i64 0 - return v2, v2 -} -; check: v2 = iconst.i64 0 -; check: v3 = copy v2 -; check: return v2, v3 - -; Same thing, now with a fallthrough_return. -function %multiple_returns() -> i64, i64 { -block0: - v2 = iconst.i64 0 - fallthrough_return v2, v2 -} -; check: v2 = iconst.i64 0 -; check: v3 = copy v2 -; check: fallthrough_return v2, v3 diff --git a/cranelift/filetests/filetests/regalloc/output-interference.clif b/cranelift/filetests/filetests/regalloc/output-interference.clif deleted file mode 100644 index 1ba797f6c8..0000000000 --- a/cranelift/filetests/filetests/regalloc/output-interference.clif +++ /dev/null @@ -1,14 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %test(i64) -> i64 system_v { -block0(v0: i64): - v2 = iconst.i64 12 - ; This division clobbers two of its fixed input registers on x86. - ; These are FixedTied constraints that the spiller needs to resolve. - v5 = udiv v0, v2 - v6 = iconst.i64 13 - v9 = udiv v0, v6 - v10 = iadd v5, v9 - return v10 -} diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif deleted file mode 100644 index 5e6a7e9864..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload-208.clif +++ /dev/null @@ -1,112 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; regex: V=v\d+ -; regex: BB=block\d+ - -; Filed as https://github.com/bytecodealliance/cranelift/issues/208 -; -; The verifier complains about a branch argument that is not in the same virtual register as the -; corresponding block argument. -; -; The problem was the reload pass rewriting block arguments on "brnz v9, block3(v9)" - -function %pr208(i64 vmctx [%rdi]) system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, -8 - heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 - sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v - sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v - fn0 = u0:1 sig0 - fn1 = u0:3 sig1 - -block0(v0: i64): - v1 = iconst.i32 0 - v2 = call fn0(v0) - v20 = iconst.i32 0x4ffe - v16 = icmp uge v2, v20 - brz v16, block5 - jump block9 - -block9: - trap heap_oob - -block5: - v17 = uextend.i64 v2 - v18 = iadd_imm.i64 v0, -8 - v19 = load.i64 v18 - v3 = iadd v19, v17 - v4 = load.i32 v3 - v21 = iconst.i32 0 - v5 = icmp eq v4, v21 - v6 = bint.i32 v5 - brnz v6, block2 - jump block3(v4) - - ; check: block5: - ; check: jump block3(v4) - ; check: $(splitEdge=$BB): - ; nextln: jump block3(v9) - -block3(v7: i32): - call fn1(v0, v7) - v26 = iconst.i32 0x4ffe - v22 = icmp uge v7, v26 - brz v22, block6 - jump block10 - -block10: - trap heap_oob - -block6: - v23 = uextend.i64 v7 - v24 = iadd_imm.i64 v0, -8 - v25 = load.i64 v24 - v8 = iadd v25, v23 - v9 = load.i32 v8+56 - ; check: v9 = spill - ; check: brnz $V, $splitEdge - brnz v9, block3(v9) - jump block4 - -block4: - jump block2 - -block2: - v10 = iconst.i32 0 - v31 = iconst.i32 0x4ffe - v27 = icmp uge v10, v31 - brz v27, block7 - jump block11 - -block11: - trap heap_oob - -block7: - v28 = uextend.i64 v10 - v29 = iadd_imm.i64 v0, -8 - v30 = load.i64 v29 - v11 = iadd v30, v28 - v12 = load.i32 v11+12 - call fn1(v0, v12) - v13 = iconst.i32 0 - v36 = iconst.i32 0x4ffe - v32 = icmp uge v13, v36 - brz v32, block8 - jump block12 - -block12: - trap heap_oob - -block8: - v33 = uextend.i64 v13 - v34 = iadd_imm.i64 v0, -8 - v35 = load.i64 v34 - v14 = iadd v35, v33 - v15 = load.i32 v14+12 - call fn1(v0, v15) - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif deleted file mode 100644 index 5dafe32b5c..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload-779.clif +++ /dev/null @@ -1,23 +0,0 @@ -test compile -target x86_64 legacy - -; Filed as https://github.com/bytecodealliance/cranelift/issues/779 -; -; The copy_nop optimisation to reload (see Issue 773) was creating -; copy_nop instructions for types for which there were no encoding. - -function u0:0(i64, i64, i64) system_v { - sig0 = () system_v - sig1 = (i16) system_v - fn1 = u0:94 sig0 - fn2 = u0:95 sig1 - -block0(v0: i64, v1: i64, v2: i64): - v3 = iconst.i16 0 - jump block1(v3) - -block1(v4: i16): - call fn1() - call fn2(v4) - jump block1(v4) -} diff --git a/cranelift/filetests/filetests/regalloc/reload.clif b/cranelift/filetests/filetests/regalloc/reload.clif deleted file mode 100644 index 1ae755a988..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload.clif +++ /dev/null @@ -1,46 +0,0 @@ -test regalloc -target riscv32 legacy enable_e - -; regex: V=v\d+ - -; Check that we can handle a function return value that got spilled. -function %spill_return() -> i32 { - fn0 = %foo() -> i32 system_v - -block0: - v0 = call fn0() - ; check: $(reg=$V) = call fn0 - ; check: v0 = spill $reg - v2 = call fn0() - ; check: v2 = call fn0 - return v0 - ; check: $(reload=$V) = fill v0 - ; check: return $reload -} - -; Check that copies where the arg has been spilled are replaced with fills. -; -; RV32E has 6 registers for function arguments so the 7th, v6, will be placed -; on the stack. -function %spilled_copy_arg(i32, i32, i32, i32, i32, i32, i32) -> i32 { - -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): - ; not: copy - ; check: v10 = fill v6 - v10 = copy v6 - return v10 -} - -; Check that copies where the result has been spilled are replaced with spills. -; -; v1 is live across a call so it will be spilled. -function %spilled_copy_result(i32) -> i32 { - fn0 = %foo(i32) - -block0(v0: i32): - ; not: copy - ; check: v1 = spill v0 - v1 = copy v0 - call fn0(v1) - return v1 -} diff --git a/cranelift/filetests/filetests/regalloc/schedule-moves.clif b/cranelift/filetests/filetests/regalloc/schedule-moves.clif deleted file mode 100644 index 701a91a15a..0000000000 --- a/cranelift/filetests/filetests/regalloc/schedule-moves.clif +++ /dev/null @@ -1,39 +0,0 @@ -test regalloc -target i686 legacy haswell - -function %pr165() system_v { -block0: - v0 = iconst.i32 0x0102_0304 - v1 = iconst.i32 0x1102_0304 - v2 = iconst.i32 0x2102_0304 - v20 = ishl v1, v0 - v21 = ishl v2, v0 - v22 = sshr v1, v0 - v23 = sshr v2, v0 - v24 = ushr v1, v0 - v25 = ushr v2, v0 - istore8 v0, v1+0x2710 - istore8 v1, v0+0x2710 - return -} - -; Same as above, but use so many registers that spilling is required. -; Note: This is also a candidate for using xchg instructions. -function %emergency_spill() system_v { -block0: - v0 = iconst.i32 0x0102_0304 - v1 = iconst.i32 0x1102_0304 - v2 = iconst.i32 0x2102_0304 - v3 = iconst.i32 0x3102_0304 - v4 = iconst.i32 0x4102_0304 - v20 = ishl v1, v0 - v21 = ishl v2, v3 - v22 = sshr v1, v0 - v23 = sshr v2, v0 - v24 = ushr v1, v0 - v25 = ushr v2, v0 - istore8 v0, v1+0x2710 - istore8 v1, v0+0x2710 - istore8 v3, v4+0x2710 - return -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif deleted file mode 100644 index b280db086f..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif +++ /dev/null @@ -1,100 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v236 = iconst.i32 0x4de9_bd37 - v424 = iconst.i32 0 - jump block37(v424) - -block37(v65: i32): - v433 = iconst.i32 0 - jump block40(v433) - -block40(v70: i32): - v75 = iconst.i32 0 - v259 = iconst.i32 0 - v78 -> v259 - v449 = iconst.i32 0 - v450, v451 = x86_sdivmodx v75, v449, v259 - v79 -> v450 - v269 = iconst.i32 0 - v270 = ushr_imm v269, 31 - v271 = iadd v269, v270 - v98 -> v271 - v100 = iconst.i32 -31 - v272 = iconst.i32 0x4de9_bd37 - v490, v273 = x86_smulx v100, v272 - v493 = iconst.i32 0 - jump block61(v493) - -block61(v103: i32): - v104 = iconst.i32 -23 - v105 = iconst.i32 -23 - v106 = popcnt v105 - v500 = sshr_imm v104, 31 - v501 = iconst.i32 0 - jump block64(v501) - -block64(v107: i32): - v108 = iconst.i32 0 - v109 = iconst.i32 0 - v278 = iconst.i32 0 - v507, v279 = x86_smulx v109, v278 - v280 = isub v279, v109 - v281 = sshr_imm v280, 11 - v282 = iconst.i32 0 - v283 = iadd v281, v282 - v111 -> v283 - v112 = rotr v108, v283 - jump block65 - -block65: - v509 = iconst.i32 0 - v510, v511 = x86_sdivmodx v107, v509, v112 - v113 -> v510 - v114 = iconst.i32 0 - v517 = iconst.i32 0 - v518, v519 = x86_sdivmodx v103, v517, v114 - v115 -> v518 - v534 = iconst.i32 0 - v122 -> v534 - v541 = iconst.i32 0 - v542, v543 = x86_sdivmodx v271, v541, v122 - v123 -> v542 - v289 = iconst.i32 0 - v125 -> v289 - v550 = iconst.i32 0 - v551, v552 = x86_sdivmodx v79, v550, v289 - v126 -> v551 - v130 = iconst.i32 0 - v558 = iconst.i32 0 - v559, v560 = x86_sdivmodx v70, v558, v130 - v131 -> v559 - v305 = iconst.i32 0 - v140 -> v305 - v577 = iconst.i32 0 - v578, v579 = x86_sdivmodx v65, v577, v305 - v141 -> v578 - v166 = iconst.i32 0 - v167 = iconst.i32 -31 - v318 = iconst.i32 0x4de9_bd37 - v650, v319 = x86_smulx v167, v318 - v320 = isub v319, v167 - v321 = sshr_imm v320, 4 - v322 = iconst.i32 0 - v323 = iadd v321, v322 - v169 -> v323 - v652 = iconst.i32 0 - v653, v654 = x86_sdivmodx v166, v652, v323 - v170 -> v653 - v171 = iconst.i32 -23 - v172 = iconst.i32 -23 - v173 = popcnt v172 - v174 = popcnt v173 - v660 = sshr_imm v171, 31 - v661, v662 = x86_sdivmodx v171, v660, v174 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif deleted file mode 100644 index 1c2d1b2bc0..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif +++ /dev/null @@ -1,137 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v5 = iconst.i32 -8 - v114 = iconst.i32 0 - v16 = iconst.i32 -8 - v17 = popcnt v16 - v192 = ifcmp_imm v17, -1 - trapif ne v192, user0 - jump block12 - -block12: - v122 = iconst.i32 0 - v123 = ushr_imm v122, 31 - v124 = iadd v122, v123 - v20 -> v124 - v25 = iconst.i32 -19 - v204 = iconst.i32 0 - v31 -> v204 - v210 = ifcmp_imm v31, -1 - trapif ne v210, user0 - jump block18 - -block18: - v215 = iconst.i32 0 - jump block19(v215) - -block19(v32: i32): - v35 = iconst.i32 0 - v218 = ifcmp_imm v35, -1 - trapif ne v218, user0 - jump block21 - -block21: - v223 = iconst.i32 0 - jump block22(v223) - -block22(v36: i32): - v136 = iconst.i32 0 - v40 -> v136 - v227 = ifcmp_imm v136, -1 - trapif ne v227, user0 - jump block24 - -block24: - v232 = iconst.i32 0 - jump block25(v232) - -block25(v41: i32): - v142 = iconst.i32 0 - v45 -> v142 - v236 = ifcmp_imm v142, -1 - trapif ne v236, user0 - jump block27 - -block27: - v241 = iconst.i32 0 - jump block28(v241) - -block28(v46: i32): - v49 = iconst.i32 0 - v244 = ifcmp_imm v49, -1 - trapif ne v244, user0 - jump block30 - -block30: - v254 = iconst.i32 0 - v53 -> v254 - v54 = iconst.i32 -23 - v55 = popcnt v54 - v143 = iconst.i32 0x4de9_bd37 - v260, v144 = x86_smulx v55, v143 - v145 = iconst.i32 0 - v146 = sshr_imm v145, 4 - v147 = iconst.i32 0 - v148 = iadd v146, v147 - v57 -> v148 - v58 = ishl v53, v148 - jump block35 - -block35: - v262 = iconst.i32 0 - v263, v264 = x86_sdivmodx v46, v262, v58 - v59 -> v263 - v270 = iconst.i32 0 - v271, v272 = x86_sdivmodx v41, v270, v59 - v60 -> v271 - v61 = f32const 0.0 - v280 = iconst.i32 0 - v281 = ffcmp v61, v61 - trapff ord v281, user0 - jump block41(v280) - -block41(v62: i32): - v157 = iconst.i32 0 - v158 = sshr_imm v157, 4 - v159 = iconst.i32 0 - v160 = iadd v158, v159 - v75 -> v160 - v308 = ifcmp_imm v160, -1 - trapif ne v308, user0 - jump block52 - -block52: - v87 = iconst.i32 -23 - v88 = iconst.i32 -23 - v89 = popcnt v88 - v161 = iconst.i32 0x4de9_bd37 - v324, v162 = x86_smulx v89, v161 - v163 = isub v162, v89 - v164 = sshr_imm v163, 4 - v165 = iconst.i32 0 - v166 = iadd v164, v165 - v91 -> v166 - v326 = iconst.i32 0 - v327, v328 = x86_sdivmodx v87, v326, v166 - v92 -> v327 - v351 = iconst.i32 0 - v99 -> v351 - v358 = iconst.i32 0 - v359, v360 = x86_sdivmodx v36, v358, v99 - v100 -> v359 - v102 = iconst.i32 0 - v103 = rotr.i32 v32, v102 - v366 = iconst.i32 0 - v367, v368 = x86_sdivmodx v25, v366, v103 - v104 -> v367 - v383 = iconst.i32 0 - v107 -> v383 - v390 = iconst.i32 0 - v391, v392 = x86_sdivmodx v124, v390, v107 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif deleted file mode 100644 index 1aec10354f..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif +++ /dev/null @@ -1,173 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -;; Test for the issue #1123; https://github.com/bytecodealliance/cranelift/issues/1123 - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v351 = iconst.i32 0x4de9_bd37 - v31 = iconst.i32 -23 - v35 = iconst.i32 0 - v36 = iconst.i32 -31 - v357 = iconst.i32 0x4de9_bd37 - v530, v358 = x86_smulx v36, v357 - v359 = isub v358, v36 - v360 = sshr_imm v359, 4 - v361 = iconst.i32 0 - v362 = iadd v360, v361 - v38 -> v362 - v532 = sshr_imm v35, 31 - v533, v534 = x86_sdivmodx v35, v532, v362 - v39 -> v533 - v53 = iconst.i32 0 - v547 = ifcmp_imm v53, -1 - trapif ne v547, user0 - jump block30 - -block30: - v75 = iconst.i32 0 - v581 = ifcmp_imm v75, -1 - trapif ne v581, user0 - jump block42 - -block42: - v136 = iconst.i32 0 - v691 = ifcmp_imm v136, -1 - trapif ne v691, user0 - jump block81 - -block81: - v158 = iconst.i32 0 - v725 = ifcmp_imm v158, -1 - trapif ne v725, user0 - jump block93 - -block93: - v760 = iconst.i32 0 - jump block106(v760) - -block106(v175: i32): - v179 = iconst.i32 0 - v180 = icmp_imm eq v179, 0 - v183 = iconst.i32 0 - v766 = ifcmp_imm v183, -1 - trapif ne v766, user0 - jump block108 - -block108: - v771 = iconst.i32 0 - jump block109(v771) - -block109(v184: i32): - v785 = iconst.i32 0 - v193 -> v785 - v791 = ifcmp_imm v193, -1 - trapif ne v791, user0 - jump block117 - -block117: - v796 = iconst.i32 0 - jump block118(v796) - -block118(v194: i32): - v203 = iconst.i32 -63 - v809 = iconst.i32 0 - v207 -> v809 - v815 = ifcmp_imm v207, -1 - trapif ne v815, user0 - jump block126 - -block126: - v209 = iconst.i32 0 - v823 = ifcmp_imm v209, -1 - trapif ne v823, user0 - jump block129 - -block129: - v213 = iconst.i32 -23 - v214 = iconst.i32 -19 - v215 = icmp_imm eq v214, 0 - v216 = bint.i32 v215 - v217 = popcnt v216 - v435 = iconst.i32 0x7df7_df7d - v831, v436 = x86_smulx v217, v435 - v437 = isub v436, v217 - v438 = sshr_imm v437, 5 - v439 = ushr_imm v438, 31 - v440 = iadd v438, v439 - v219 -> v440 - v220 = rotr v213, v440 - v229 = iconst.i32 0 - v841 = iconst.i32 0 - v842, v843 = x86_sdivmodx v194, v841, v229 - v230 -> v842 - v849 = iconst.i32 0 - v850, v851 = x86_sdivmodx v184, v849, v230 - v231 -> v850 - v232 = iconst.i32 0 - v857 = iconst.i32 0 - v858, v859 = x86_sdivmodx v175, v857, v232 - v233 -> v858 - v915 = iconst.i32 0 - jump block163(v915) - -block163(v253: i32): - v255 = iconst.i32 0 - v256 = iconst.i32 -23 - v257 = iconst.i32 -19 - v258 = icmp_imm eq v257, 0 - v259 = bint.i32 v258 - v260 = popcnt v259 - v447 = iconst.i32 0x7df7_df7d - v921, v448 = x86_smulx v260, v447 - v449 = isub v448, v260 - v450 = sshr_imm v449, 5 - v451 = ushr_imm v450, 31 - v452 = iadd v450, v451 - v262 -> v452 - v263 = rotr v256, v452 - v264 = popcnt v263 - v265 = popcnt v264 - v266 = popcnt v265 - v267 = rotr v255, v266 - v268 = popcnt v267 - v923 = iconst.i32 0 - v924, v925 = x86_sdivmodx v253, v923, v268 - v269 -> v924 - v276 = iconst.i32 0 - v277 = iconst.i32 -63 - v278 = popcnt v277 - v947 = iconst.i32 0 - v948, v949 = x86_sdivmodx v276, v947, v278 - v279 -> v948 - v309 = iconst.i32 0 - v310 = iconst.i32 0 - v311 = iconst.i32 0 - v312 = icmp_imm eq v311, 0 - v313 = bint.i32 v312 - v314 = rotr v310, v313 - v315 = iconst.i32 -31 - v464 = iconst.i32 0 - v1020, v465 = x86_smulx v315, v464 - v466 = isub v465, v315 - v467 = sshr_imm v466, 4 - v468 = iconst.i32 0 - v469 = iadd v467, v468 - v317 -> v469 - v1022 = iconst.i32 0 - v1023, v1024 = x86_sdivmodx v314, v1022, v469 - v318 -> v1023 - v320 = iconst.i32 0 - v321 = iconst.i32 -19 - v322 = popcnt v321 - v1030 = iconst.i32 0 - v1031, v1032 = x86_sdivmodx v320, v1030, v322 - v323 -> v1031 - v1047 = iconst.i32 0 - v325 -> v1047 - v1054 = sshr_imm v309, 31 - v1055, v1056 = x86_sdivmodx v309, v1054, v325 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/spill-noregs.clif b/cranelift/filetests/filetests/regalloc/spill-noregs.clif deleted file mode 100644 index e3540f6a59..0000000000 --- a/cranelift/filetests/filetests/regalloc/spill-noregs.clif +++ /dev/null @@ -1,175 +0,0 @@ -test regalloc -target x86_64 legacy - -; Test case found by the Binaryen fuzzer. -; -; The spiller panics with a -; 'Ran out of GPR registers when inserting copy before v68 = icmp.i32 eq v66, v67', -; cranelift-codegen/src/regalloc/spilling.rs:425:28 message. -; -; The process_reg_uses() function is trying to insert a copy before the icmp instruction in block4 -; and runs out of registers to spill. Note that block7 has a lot of dead parameter values. -; -; The spiller was not releasing register pressure for dead block parameters. - -function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { -block0(v0: i32, v1: i64): - v2 = iconst.i32 0 - v3 = iconst.i64 0 - v4 = iconst.i32 0xffff_ffff_bb3f_4a2c - brz v4, block5 - jump block1 - -block1: - v5 = iconst.i32 0 - v6 = copy.i64 v3 - v7 = copy.i64 v3 - v8 = copy.i64 v3 - v9 = copy.i64 v3 - v10 = copy.i64 v3 - v11 = copy.i64 v3 - v12 = copy.i64 v3 - v13 = copy.i64 v3 - v14 = copy.i64 v3 - v15 = copy.i64 v3 - v16 = copy.i64 v3 - brnz v5, block4(v2, v3, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) - jump block2 - -block2: - v17 = iconst.i32 0 - v18 = copy.i64 v3 - v19 = copy.i64 v3 - v20 = copy.i64 v3 - v21 = copy.i64 v3 - v22 = copy.i64 v3 - v23 = copy.i64 v3 - v24 = copy.i64 v3 - v25 = copy.i64 v3 - v26 = copy.i64 v3 - v27 = copy.i64 v3 - v28 = copy.i64 v3 - brnz v17, block4(v2, v3, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) - jump block3 - -block3: - jump block1 - -block4(v29: i32, v30: i64, v31: i64, v32: i64, v33: i64, v34: i64, v35: i64, v36: i64, v37: i64, v38: i64, v39: i64, v40: i64, v41: i64): - jump block7(v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) - -block5: - jump block6 - -block6: - v42 = copy.i64 v3 - v43 = copy.i64 v3 - v44 = copy.i64 v3 - v45 = copy.i64 v3 - v46 = copy.i64 v3 - v47 = copy.i64 v3 - v48 = copy.i64 v3 - v49 = copy.i64 v3 - v50 = copy.i64 v3 - v51 = copy.i64 v3 - v52 = copy.i64 v3 - jump block7(v2, v3, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) - -block7(v53: i32, v54: i64, v55: i64, v56: i64, v57: i64, v58: i64, v59: i64, v60: i64, v61: i64, v62: i64, v63: i64, v64: i64, v65: i64): - v66 = iconst.i32 0 - v67 = iconst.i32 0 - v68 = icmp eq v66, v67 - v69 = bint.i32 v68 - jump block8 - -block8: - jump block9 - -block9: - v70 = iconst.i32 0xffff_ffff_ffff_912f - brz v70, block10 - jump block35 - -block10: - v71 = iconst.i32 0 - brz v71, block11 - jump block27 - -block11: - jump block12 - -block12: - jump block13 - -block13: - jump block14 - -block14: - jump block15 - -block15: - jump block16 - -block16: - jump block17 - -block17: - jump block18 - -block18: - jump block19 - -block19: - jump block20 - -block20: - jump block21 - -block21: - jump block22 - -block22: - jump block23 - -block23: - jump block24 - -block24: - jump block25 - -block25: - jump block26 - -block26: - jump block27 - -block27: - jump block28 - -block28: - jump block29 - -block29: - jump block30 - -block30: - jump block31 - -block31: - jump block32 - -block32: - jump block33 - -block33: - jump block34 - -block34: - jump block35 - -block35: - jump block36 - -block36: - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/spill.clif b/cranelift/filetests/filetests/regalloc/spill.clif deleted file mode 100644 index 2a3f2ad959..0000000000 --- a/cranelift/filetests/filetests/regalloc/spill.clif +++ /dev/null @@ -1,223 +0,0 @@ -test regalloc - -; Test the spiler on an ISA with few registers. -; RV32E has 16 registers, where: -; - %x0 is hardwired to zero. -; - %x1 is the return address. -; - %x2 is the stack pointer. -; - %x3 is the global pointer. -; - %x4 is the thread pointer. -; - %x10-%x15 are function arguments. -; -; regex: V=v\d+ -; regex: WS=\s+ - -target riscv32 legacy enable_e - -; In straight-line code, the first value defined is spilled. -; That is in order: -; 1. The argument v1. -; 2. The link register. -; 3. The first computed value, v2 -function %pyramid(i32) -> i32 { -; check: ss0 = spill_slot 4 -; check: ss1 = spill_slot 4 -; check: ss2 = spill_slot 4 -; not: spill_slot -block0(v1: i32): -; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) - ; check: ,ss0]$WS v1 = spill $rv1 - ; nextln: ,ss1]$WS $(link=$V) = spill $rlink - ; not: spill - v2 = iadd_imm v1, 12 - ; check: $(r1v2=$V) = iadd_imm - ; nextln: ,ss2]$WS v2 = spill $r1v2 - ; not: spill - v3 = iadd_imm v2, 12 - v4 = iadd_imm v3, 12 - v5 = iadd_imm v4, 12 - v6 = iadd_imm v5, 12 - v7 = iadd_imm v6, 12 - v8 = iadd_imm v7, 12 - v9 = iadd_imm v8, 12 - v10 = iadd_imm v9, 12 - v11 = iadd_imm v10, 12 - v12 = iadd_imm v11, 12 - v13 = iadd_imm v12, 12 - v14 = iadd_imm v13, 12 - v33 = iadd v13, v14 - ; check: iadd v13 - v32 = iadd v33, v12 - v31 = iadd v32, v11 - v30 = iadd v31, v10 - v29 = iadd v30, v9 - v28 = iadd v29, v8 - v27 = iadd v28, v7 - v26 = iadd v27, v6 - v25 = iadd v26, v5 - v24 = iadd v25, v4 - v23 = iadd v24, v3 - v22 = iadd v23, v2 - ; check: $(r2v2=$V) = fill v2 - ; check: v22 = iadd v23, $r2v2 - v21 = iadd v22, v1 - ; check: $(r2v1=$V) = fill v1 - ; check: v21 = iadd v22, $r2v1 - ; check: $(rlink2=$V) = fill $link - return v21 - ; check: return v21, $rlink2 -} - -; All values live across a call must be spilled -function %across_call(i32) { - fn0 = %foo(i32) -block0(v1: i32): - ; check: v1 = spill - call fn0(v1) - ; check: call fn0 - call fn0(v1) - ; check: fill v1 - ; check: call fn0 - return -} - -; The same value used for two function arguments. -function %doubleuse(i32) { - fn0 = %xx(i32, i32) -block0(v0: i32): - ; check: $(c=$V) = copy v0 - call fn0(v0, v0) - ; check: call fn0(v0, $c) - return -} - -; The same value used as indirect callee and argument. -function %doubleuse_icall1(i32) { - sig0 = (i32) system_v -block0(v0: i32): - ; not:copy - call_indirect sig0, v0(v0) - return -} - -; The same value used as indirect callee and two arguments. -function %doubleuse_icall2(i32) { - sig0 = (i32, i32) system_v -block0(v0: i32): - ; check: $(c=$V) = copy v0 - call_indirect sig0, v0(v0, v0) - ; check: call_indirect sig0, v0(v0, $c) - return -} - -; Two arguments on the stack. -function %stackargs(i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { -; check: ss0 = incoming_arg 4 -; check: ss1 = incoming_arg 4, offset 4 -; not: incoming_arg -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32): - ; unordered: fill v6 - ; unordered: fill v7 - v10 = iadd v6, v7 - return v10 -} - -; More block arguments than registers. -function %blockargs(i32) -> i32 { -block0(v1: i32): - ; check: v1 = spill - v2 = iconst.i32 1 - jump block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) - -block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): - v22 = iadd v10, v11 - v23 = iadd v22, v12 - v24 = iadd v23, v13 - v25 = iadd v24, v14 - v26 = iadd v25, v15 - v27 = iadd v26, v16 - v28 = iadd v27, v17 - v29 = iadd v28, v18 - v30 = iadd v29, v19 - v31 = iadd v30, v20 - v32 = iadd v31, v21 - v33 = iadd v32, v1 - return v33 -} - -; Spilling a block argument to make room for a branch operand. -function %brargs(i32) -> i32 { -block0(v1: i32): - ; check: v1 = spill - v2 = iconst.i32 1 - brnz v1, block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) - jump block2 - -block2: - return v1 - -block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): - v22 = iadd v10, v11 - v23 = iadd v22, v12 - v24 = iadd v23, v13 - v25 = iadd v24, v14 - v26 = iadd v25, v15 - v27 = iadd v26, v16 - v28 = iadd v27, v17 - v29 = iadd v28, v18 - v30 = iadd v29, v19 - v31 = iadd v30, v20 - v32 = iadd v31, v21 - v33 = iadd v32, v1 - return v33 -} - -; In straight-line code, the first value defined is spilled. -; That is in order: -; 1. The argument v1. -; 2. The link register. -; 3. The first computed value, v2 -function %use_spilled_value(i32) -> i32 { -; check: ss0 = spill_slot 4 -; check: ss1 = spill_slot 4 -; check: ss2 = spill_slot 4 -block0(v1: i32): -; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) - ; check: ,ss0]$WS v1 = spill $rv1 - ; nextln: ,ss1]$WS $(link=$V) = spill $rlink - ; not: spill - v2 = iadd_imm v1, 12 - ; check: $(r1v2=$V) = iadd_imm - ; nextln: ,ss2]$WS v2 = spill $r1v2 - v3 = iadd_imm v2, 12 - v4 = iadd_imm v3, 12 - v5 = iadd_imm v4, 12 - v6 = iadd_imm v5, 12 - v7 = iadd_imm v6, 12 - v8 = iadd_imm v7, 12 - v9 = iadd_imm v8, 12 - v10 = iadd_imm v9, 12 - v11 = iadd_imm v10, 12 - v12 = iadd_imm v11, 12 - v13 = iadd_imm v12, 12 - v14 = iadd_imm v13, 12 - - ; Here we have maximum register pressure, and v2 has been spilled. - ; What happens if we use it? - v33 = iadd v2, v14 - v32 = iadd v33, v12 - v31 = iadd v32, v11 - v30 = iadd v31, v10 - v29 = iadd v30, v9 - v28 = iadd v29, v8 - v27 = iadd v28, v7 - v26 = iadd v27, v6 - v25 = iadd v26, v5 - v24 = iadd v25, v4 - v23 = iadd v24, v3 - v22 = iadd v23, v2 - v21 = iadd v22, v1 - v20 = iadd v21, v13 - v19 = iadd v20, v2 - return v21 -} diff --git a/cranelift/filetests/filetests/regalloc/unreachable_code.clif b/cranelift/filetests/filetests/regalloc/unreachable_code.clif deleted file mode 100644 index 219a299880..0000000000 --- a/cranelift/filetests/filetests/regalloc/unreachable_code.clif +++ /dev/null @@ -1,47 +0,0 @@ -; Use "test compile" here otherwise the dead blocks won't be eliminated. -test compile - -set enable_probestack=0 -target x86_64 legacy haswell - -; This function contains unreachable blocks which trip up the register -; allocator if they don't get cleared out. -function %unreachable_blocks(i64 vmctx) -> i32 baldrdash_system_v { -block0(v0: i64): - v1 = iconst.i32 0 - v2 = iconst.i32 0 - jump block2 - -block2: - jump block4 - -block4: - jump block2 - -; Everything below this point is unreachable. - -block3(v3: i32): - v5 = iadd.i32 v2, v3 - jump block6 - -block6: - jump block6 - -block7(v6: i32): - v7 = iadd.i32 v5, v6 - jump block8 - -block8: - jump block10 - -block10: - jump block8 - -block9(v8: i32): - v10 = iadd.i32 v7, v8 - jump block1(v10) - -block1(v11: i32): - return v11 -} - diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif deleted file mode 100644 index 935b33c5b7..0000000000 --- a/cranelift/filetests/filetests/regalloc/x86-regres.clif +++ /dev/null @@ -1,49 +0,0 @@ -test regalloc -target i686 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -; The value v9 appears both as the branch control and one of the block arguments -; in the brnz instruction in block2. It also happens that v7 and v9 are assigned -; to the same register, so v9 doesn't need to be moved before the brnz. -; -; This ended up confusong the constraint solver which had not made a record of -; the fixed register assignment for v9 since it was already in the correct -; register. -function %pr147(i32) -> i32 system_v { -block0(v0: i32): - v1 = iconst.i32 0 - v2 = iconst.i32 1 - v3 = iconst.i32 0 - jump block2(v3, v2, v0) - - ; check: $(splitEdge=$BB): - ; check: jump block2($V, $V, v9) - -block2(v4: i32, v5: i32, v7: i32): - ; check: block2 - v6 = iadd v4, v5 - v8 = iconst.i32 -1 - ; v7 is killed here and v9 gets the same register. - v9 = iadd v7, v8 - ; check: v9 = iadd v7, v8 - ; Here v9 the brnz control appears to interfere with v9 the block argument, - ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The - ; add_var sanity checks got confused when no fixed assignment could be - ; found for v9. - ; - ; We should be able to handle this situation without making copies of v9. - brnz v9, block2(v5, v6, v9) - ; check: brnz v9, $splitEdge - jump block3 - -block3: - return v5 -} - -function %select_i64(i64, i64, i32) -> i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = select v2, v0, v1 - return v3 -} diff --git a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif deleted file mode 100644 index fd95cc2f4c..0000000000 --- a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif +++ /dev/null @@ -1,57 +0,0 @@ -test compile -target aarch64 -target x86_64 legacy - -; This checks that code shrink is allowed while relaxing code, when code shrink -; has not run. - -function u0:0(i64, i64) -> i64 system_v { - ss1 = explicit_slot 8 - sig0 = (i64) -> i64 system_v - fn0 = u0:8 sig0 - -block0(v0: i64, v1: i64): - v3 = stack_addr.i64 ss1 - v5 = call fn0(v1) - v6 = iconst.i64 0 - v8 = iconst.i64 0 - jump block3(v6, v1, v8) - -block3(v39: i64, v40: i64, v42: i64): - v9 = load.i64 v3 - v11 = icmp_imm ugt v9, 1 - v12 = bint.i8 v11 - v13 = uextend.i32 v12 - v14 = icmp_imm eq v13, 0 - brnz v14, block4 - jump block5 - -block4: - v18 = icmp_imm.i64 eq v40, 0 - v19 = bint.i8 v18 - v20 = uextend.i32 v19 - brz v20, block6 - jump block7 - -block7: - trap user0 - -block5: - v22 = iconst.i32 1 - v23 = ishl.i64 v39, v22 - v25 = iconst.i64 1 - v26 = band.i64 v42, v25 - v27 = bor v23, v26 - v28 = iconst.i32 1 - v29 = ushr.i64 v42, v28 - v30 = load.i64 v3 - v31 = iconst.i32 1 - v32 = ushr v30, v31 - store v32, v3 - jump block3(v27, v40, v29) - -block6: - v38 = iconst.i64 0 - return v38 -} - diff --git a/cranelift/filetests/filetests/runtests/alias.clif b/cranelift/filetests/filetests/runtests/alias.clif index cf5e99ca95..61ee5af491 100644 --- a/cranelift/filetests/filetests/runtests/alias.clif +++ b/cranelift/filetests/filetests/runtests/alias.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 function %alias(i8) -> i8 { block0(v0: i8): diff --git a/cranelift/filetests/filetests/runtests/arithmetic.clif b/cranelift/filetests/filetests/runtests/arithmetic.clif index c3cfe07c4c..28936f45e4 100644 --- a/cranelift/filetests/filetests/runtests/arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/arithmetic.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 function %add_i64(i64, i64) -> i64 { block0(v0: i64,v1: i64): diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif b/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif index 2213c72be3..b697a9279e 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw-2.clif @@ -1,6 +1,7 @@ test run target aarch64 -target x86_64 machinst +target aarch64 has_lse +target x86_64 ; TODO: Merge this with atomic-rmw.clif when s390x supports it diff --git a/cranelift/filetests/filetests/runtests/atomic-rmw.clif b/cranelift/filetests/filetests/runtests/atomic-rmw.clif index eb9ed2c4d3..57e18a0dbe 100644 --- a/cranelift/filetests/filetests/runtests/atomic-rmw.clif +++ b/cranelift/filetests/filetests/runtests/atomic-rmw.clif @@ -1,6 +1,7 @@ test run target aarch64 -target x86_64 machinst +target aarch64 has_lse +target x86_64 target s390x ; We can't test that these instructions are right regarding atomicity, but we can diff --git a/cranelift/filetests/filetests/runtests/bextend.clif b/cranelift/filetests/filetests/runtests/bextend.clif new file mode 100644 index 0000000000..9f78fd9d2b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/bextend.clif @@ -0,0 +1,84 @@ +test interpret + +function %bextend_b1_b8(b1) -> b8 { +block0(v0: b1): + v1 = bextend.b8 v0 + return v1 +} +; run: %bextend_b1_b8(true) == true +; run: %bextend_b1_b8(false) == false + +function %bextend_b1_b16(b1) -> b16 { +block0(v0: b1): + v1 = bextend.b16 v0 + return v1 +} +; run: %bextend_b1_b16(true) == true +; run: %bextend_b1_b16(false) == false + +function %bextend_b1_b32(b1) -> b32 { +block0(v0: b1): + v1 = bextend.b32 v0 + return v1 +} +; run: %bextend_b1_b32(true) == true +; run: %bextend_b1_b32(false) == false + +function %bextend_b1_b64(b1) -> b64 { +block0(v0: b1): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b1_b64(true) == true +; run: %bextend_b1_b64(false) == false + + +function %bextend_b8_b16(b8) -> b16 { +block0(v0: b8): + v1 = bextend.b16 v0 + return v1 +} +; run: %bextend_b8_b16(true) == true +; run: %bextend_b8_b16(false) == false + +function %bextend_b8_b32(b8) -> b32 { +block0(v0: b8): + v1 = bextend.b32 v0 + return v1 +} +; run: %bextend_b8_b32(true) == true +; run: %bextend_b8_b32(false) == false + +function %bextend_b8_b64(b8) -> b64 { +block0(v0: b8): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b8_b64(true) == true +; run: %bextend_b8_b64(false) == false + + +function %bextend_b16_b32(b16) -> b32 { +block0(v0: b16): + v1 = bextend.b32 v0 + return v1 +} +; run: %bextend_b16_b32(true) == true +; run: %bextend_b16_b32(false) == false + +function %bextend_b16_b64(b16) -> b64 { +block0(v0: b16): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b16_b64(true) == true +; run: %bextend_b16_b64(false) == false + + +function %bextend_b32_b64(b32) -> b64 { +block0(v0: b32): + v1 = bextend.b64 v0 + return v1 +} +; run: %bextend_b32_b64(true) == true +; run: %bextend_b32_b64(false) == false diff --git a/cranelift/filetests/filetests/runtests/bint.clif b/cranelift/filetests/filetests/runtests/bint.clif index 30bb91be11..cce35d1c7f 100644 --- a/cranelift/filetests/filetests/runtests/bint.clif +++ b/cranelift/filetests/filetests/runtests/bint.clif @@ -1,6 +1,6 @@ test run target aarch64 -target x86_64 machinst +target x86_64 function %bint_b8_i16_true() -> i16 { block0: diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index f84e276f47..1524e04bb5 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -2,8 +2,7 @@ test run target aarch64 target arm target s390x -; target x86_64 machinst TODO: Not yet implemented on x86_64 -target x86_64 legacy +; target x86_64 TODO: Not yet implemented on x86_64 function %bnot_band() -> b1 { diff --git a/cranelift/filetests/filetests/runtests/bmask.clif b/cranelift/filetests/filetests/runtests/bmask.clif new file mode 100644 index 0000000000..d68e59ec00 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/bmask.clif @@ -0,0 +1,161 @@ +test interpret + +function %bmask_b64_i64(b64) -> i64 { +block0(v0: b64): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b64_i64(true) == -1 +; run: %bmask_b64_i64(false) == 0 + +function %bmask_b64_i32(b64) -> i32 { +block0(v0: b64): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b64_i32(true) == -1 +; run: %bmask_b64_i32(false) == 0 + +function %bmask_b64_i16(b64) -> i16 { +block0(v0: b64): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b64_i16(true) == -1 +; run: %bmask_b64_i16(false) == 0 + +function %bmask_b64_i8(b64) -> i8 { +block0(v0: b64): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b64_i8(true) == -1 +; run: %bmask_b64_i8(false) == 0 + +function %bmask_b32_i64(b32) -> i64 { +block0(v0: b32): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b32_i64(true) == -1 +; run: %bmask_b32_i64(false) == 0 + +function %bmask_b32_i32(b32) -> i32 { +block0(v0: b32): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b32_i32(true) == -1 +; run: %bmask_b32_i32(false) == 0 + +function %bmask_b32_i16(b32) -> i16 { +block0(v0: b32): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b32_i16(true) == -1 +; run: %bmask_b32_i16(false) == 0 + +function %bmask_b32_i8(b32) -> i8 { +block0(v0: b32): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b32_i8(true) == -1 +; run: %bmask_b32_i8(false) == 0 + +function %bmask_b16_i64(b16) -> i64 { +block0(v0: b16): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b16_i64(true) == -1 +; run: %bmask_b16_i64(false) == 0 + +function %bmask_b16_i32(b16) -> i32 { +block0(v0: b16): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b16_i32(true) == -1 +; run: %bmask_b16_i32(false) == 0 + +function %bmask_b16_i16(b16) -> i16 { +block0(v0: b16): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b16_i16(true) == -1 +; run: %bmask_b16_i16(false) == 0 + +function %bmask_b16_i8(b16) -> i8 { +block0(v0: b16): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b16_i8(true) == -1 +; run: %bmask_b16_i8(false) == 0 + +function %bmask_b8_i64(b8) -> i64 { +block0(v0: b8): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b8_i64(true) == -1 +; run: %bmask_b8_i64(false) == 0 + +function %bmask_b8_i32(b8) -> i32 { +block0(v0: b8): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b8_i32(true) == -1 +; run: %bmask_b8_i32(false) == 0 + +function %bmask_b8_i16(b8) -> i16 { +block0(v0: b8): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b8_i16(true) == -1 +; run: %bmask_b8_i16(false) == 0 + +function %bmask_b8_i8(b8) -> i8 { +block0(v0: b8): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b8_i8(true) == -1 +; run: %bmask_b8_i8(false) == 0 + +function %bmask_b1_i64(b1) -> i64 { +block0(v0: b1): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b1_i64(true) == -1 +; run: %bmask_b1_i64(false) == 0 + +function %bmask_b1_i32(b1) -> i32 { +block0(v0: b1): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b1_i32(true) == -1 +; run: %bmask_b1_i32(false) == 0 + +function %bmask_b1_i16(b1) -> i16 { +block0(v0: b1): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b1_i16(true) == -1 +; run: %bmask_b1_i16(false) == 0 + +function %bmask_b1_i8(b1) -> i8 { +block0(v0: b1): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b1_i8(true) == -1 +; run: %bmask_b1_i8(false) == 0 diff --git a/cranelift/filetests/filetests/runtests/br.clif b/cranelift/filetests/filetests/runtests/br.clif index 2d56d9ae95..b6c1f1d282 100644 --- a/cranelift/filetests/filetests/runtests/br.clif +++ b/cranelift/filetests/filetests/runtests/br.clif @@ -3,8 +3,7 @@ test run target aarch64 target arm target s390x -target x86_64 machinst -target x86_64 legacy +target x86_64 function %jump() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/br_icmp.clif b/cranelift/filetests/filetests/runtests/br_icmp.clif index cfb448a33b..0806ff1adb 100644 --- a/cranelift/filetests/filetests/runtests/br_icmp.clif +++ b/cranelift/filetests/filetests/runtests/br_icmp.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 function %bricmp_eq_i64(i64, i64) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif b/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif index 71c3a43169..d05b83251d 100644 --- a/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif +++ b/cranelift/filetests/filetests/runtests/br_icmp_overflow.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 ; TODO: Merge this with the main br_icmp file when s390x supports overflows. ; See: https://github.com/bytecodealliance/wasmtime/issues/3060 diff --git a/cranelift/filetests/filetests/runtests/br_table.clif b/cranelift/filetests/filetests/runtests/br_table.clif index 16718d4e76..e58dda3cfe 100644 --- a/cranelift/filetests/filetests/runtests/br_table.clif +++ b/cranelift/filetests/filetests/runtests/br_table.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 target s390x diff --git a/cranelift/filetests/filetests/runtests/breduce.clif b/cranelift/filetests/filetests/runtests/breduce.clif new file mode 100644 index 0000000000..e436b3f800 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/breduce.clif @@ -0,0 +1,85 @@ +test interpret + +function %breduce_b8_b1(b8) -> b1 { +block0(v0: b8): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b8_b1(true) == true +; run: %breduce_b8_b1(false) == false + + +function %breduce_b16_b1(b16) -> b1 { +block0(v0: b16): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b16_b1(true) == true +; run: %breduce_b16_b1(false) == false + +function %breduce_b16_b8(b16) -> b8 { +block0(v0: b16): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b16_b8(true) == true +; run: %breduce_b16_b8(false) == false + + +function %breduce_b32_b1(b32) -> b1 { +block0(v0: b32): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b32_b1(true) == true +; run: %breduce_b32_b1(false) == false + +function %breduce_b32_b8(b32) -> b8 { +block0(v0: b32): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b32_b8(true) == true +; run: %breduce_b32_b8(false) == false + +function %breduce_b32_b16(b32) -> b16 { +block0(v0: b32): + v1 = breduce.b16 v0 + return v1 +} +; run: %breduce_b32_b16(true) == true +; run: %breduce_b32_b16(false) == false + + + +function %breduce_b64_b1(b64) -> b1 { +block0(v0: b64): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b64_b1(true) == true +; run: %breduce_b64_b1(false) == false + +function %breduce_b64_b8(b64) -> b8 { +block0(v0: b64): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b64_b8(true) == true +; run: %breduce_b64_b8(false) == false + +function %breduce_b64_b16(b64) -> b16 { +block0(v0: b64): + v1 = breduce.b16 v0 + return v1 +} +; run: %breduce_b64_b16(true) == true +; run: %breduce_b64_b16(false) == false + +function %breduce_b64_b32(b64) -> b32 { +block0(v0: b64): + v1 = breduce.b32 v0 + return v1 +} +; run: %breduce_b64_b32(true) == true +; run: %breduce_b64_b32(false) == false diff --git a/cranelift/filetests/filetests/runtests/const.clif b/cranelift/filetests/filetests/runtests/const.clif index b3067c6d8a..ec91e171bb 100644 --- a/cranelift/filetests/filetests/runtests/const.clif +++ b/cranelift/filetests/filetests/runtests/const.clif @@ -2,8 +2,7 @@ test run target aarch64 target arm target s390x -target x86_64 machinst -target x86_64 legacy +target x86_64 function %i8_iconst_0() -> i8 { block0: diff --git a/cranelift/filetests/filetests/runtests/div-checks.clif b/cranelift/filetests/filetests/runtests/div-checks.clif index 7e362598d1..beb1a077ff 100644 --- a/cranelift/filetests/filetests/runtests/div-checks.clif +++ b/cranelift/filetests/filetests/runtests/div-checks.clif @@ -3,7 +3,7 @@ target aarch64 target arm target s390x set avoid_div_traps=false -target x86_64 machinst +target x86_64 function %i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/extend.clif b/cranelift/filetests/filetests/runtests/extend.clif index 69be211032..524177de10 100644 --- a/cranelift/filetests/filetests/runtests/extend.clif +++ b/cranelift/filetests/filetests/runtests/extend.clif @@ -2,18 +2,14 @@ test run target aarch64 target arm target s390x -; target x86_64 machinst TODO: Not yet implemented on x86_64 -target i686 legacy +target x86_64 function %uextend() -> b1 { block0: v0 = iconst.i32 0xffff_ee00 v1 = uextend.i64 v0 - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0 - v6 = band v4, v5 - return v6 + v2 = icmp_imm eq v1, 0xffff_ee00 + return v2 } ; run @@ -21,10 +17,7 @@ function %sextend() -> b1 { block0: v0 = iconst.i32 0xffff_ee00 v1 = sextend.i64 v0 - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0xffff_ffff - v6 = band v4, v5 - return v6 + v2 = icmp_imm eq v1, 0xffff_ffff_ffff_ee00 + return v2 } ; run diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif index 5bd7b07ada..9bbba57559 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo-vector.clif @@ -2,7 +2,7 @@ test run ; target s390x TODO: Not yet implemented on s390x set enable_simd target aarch64 -target x86_64 machinst skylake +target x86_64 skylake function %fmin_pseudo_f32x4(f32x4, f32x4) -> f32x4 { block0(v0:f32x4, v1:f32x4): diff --git a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif index a1273f9063..628dcc15a9 100644 --- a/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif +++ b/cranelift/filetests/filetests/runtests/fmin-max-pseudo.clif @@ -1,8 +1,8 @@ test run ; target s390x TODO: Not yet implemented on s390x -; target aarch64 TODO: Not yet implemented on aarch64 +target aarch64 set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %fmin_pseudo_f32(f32, f32) -> f32 { block0(v0:f32, v1:f32): diff --git a/cranelift/filetests/filetests/runtests/heap.clif b/cranelift/filetests/filetests/runtests/heap.clif index b203705131..35d80a5151 100644 --- a/cranelift/filetests/filetests/runtests/heap.clif +++ b/cranelift/filetests/filetests/runtests/heap.clif @@ -1,5 +1,5 @@ test run -target x86_64 machinst +target x86_64 target s390x target aarch64 diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif deleted file mode 100644 index d5590b2564..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-arithmetic-legacy.clif +++ /dev/null @@ -1,20 +0,0 @@ -test run -target x86_64 legacy haswell - -function %test_imul_i128() -> b1 { -block0: - v11 = iconst.i64 0xf2347ac4503f1e24 - v12 = iconst.i64 0x0098fe985354ab06 - v1 = iconcat v11, v12 - v21 = iconst.i64 0xf606ba453589ef89 - v22 = iconst.i64 0x042e1f3054ca7432 - v2 = iconcat v21, v22 - v31 = iconst.i64 0xbe2044b2742ebd44 - v32 = iconst.i64 0xa363ce3b6849f307 - v3 = iconcat v31, v32 - v4 = imul v1, v2 - v5 = icmp eq v3, v4 - return v5 -} - -; run diff --git a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif index e5bbd08f6f..da51097ec4 100644 --- a/cranelift/filetests/filetests/runtests/i128-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/i128-arithmetic.clif @@ -1,74 +1,57 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst +target x86_64 -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser - -function %add_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = iadd v4, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %add_i128(i128, i128) -> i128 { +block0(v0: i128,v1: i128): + v2 = iadd v0, v1 + return v2 } -; run: %add_i128(0, 0, 0, 0) == [0, 0] -; run: %add_i128(0, -1, -1, 0) == [-1, -1] -; run: %add_i128(1, 0, 0, 0) == [1, 0] -; run: %add_i128(1, 0, 1, 0) == [2, 0] -; run: %add_i128(1, 0, -1, -1) == [0, 0] -; run: %add_i128(-1, 0, 1, 0) == [0, 1] +; run: %add_i128(0, 0) == 0 +; run: %add_i128(1, 0) == 1 +; run: %add_i128(1, 1) == 2 +; run: %add_i128(1, -1) == 0 +; run: %add_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == -1 +; run: %add_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 1) == 0x00000000_00000001_00000000_00000000 -; run: %add_i128(0x01234567_89ABCDEF, 0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [-1, -1] -; run: %add_i128(0x06060606_06060606, 0xA00A00A0_0A00A00A, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x36363636_36363636, 0xABBABBAB_BABBABBA] -; run: %add_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF] +; run: %add_i128(0x01234567_89ABCDEF_01234567_89ABCDEF, 0xFEDCBA98_76543210_FEDCBA98_76543210) == -1 +; run: %add_i128(0x06060606_06060606_A00A00A0_0A00A00A, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == 0x36363636_36363636_ABBABBAB_BABBABBA +; run: %add_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == 0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF -function %sub_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = isub v4, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %sub_i128(i128, i128) -> i128 { +block0(v0: i128,v1: i128): + v2 = isub v0, v1 + return v2 } -; run: %sub_i128(0, 0, 0, 0) == [0, 0] -; run: %sub_i128(1, 0, 1, 0) == [0, 0] -; run: %sub_i128(1, 0, 0, 0) == [1, 0] -; run: %sub_i128(0, 0, 1, 0) == [-1, -1] -; run: %sub_i128(0, 0, -1, -1) == [1, 0] +; run: %sub_i128(0, 0) == 0 +; run: %sub_i128(1, 1) == 0 +; run: %sub_i128(1, 0) == 1 +; run: %sub_i128(0, 1) == -1 +; run: %sub_i128(0, -1) == 1 -; run: %sub_i128(-1, -1, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [0x01234567_89ABCDEF, 0x01234567_89ABCDEF] -; run: %sub_i128(0x36363636_36363636, 0xABBABBAB_BABBABBA, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x06060606_06060606, 0xA00A00A0_0A00A00A] -; run: %sub_i128(0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE] +; run: %sub_i128(-1, 0xFEDCBA98_76543210_FEDCBA98_76543210) == 0x01234567_89ABCDEF_01234567_89ABCDEF +; run: %sub_i128(0x36363636_36363636_ABBABBAB_BABBABBA, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == 0x06060606_06060606_A00A00A0_0A00A00A +; run: %sub_i128(0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == 0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE -function %mul_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = imul v4, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %mul_i128(i128, i128) -> i128 { +block0(v0: i128,v1: i128): + v2 = imul v0, v1 + return v2 } -; run: %mul_i128(0, 0, 0, 0) == [0, 0] -; run: %mul_i128(1, 0, 1, 0) == [1, 0] -; run: %mul_i128(1, 0, 0, 0) == [0, 0] -; run: %mul_i128(0, 0, 1, 0) == [0, 0] -; run: %mul_i128(2, 0, 1, 0) == [2, 0] -; run: %mul_i128(2, 0, 2, 0) == [4, 0] -; run: %mul_i128(1, 0, -1, -1) == [-1, -1] -; run: %mul_i128(2, 0, -1, -1) == [-2, -1] +; run: %mul_i128(0, 0) == 0 +; run: %mul_i128(1, 1) == 1 +; run: %mul_i128(1, 0) == 0 +; run: %mul_i128(0, 1) == 0 +; run: %mul_i128(2, 1) == 2 +; run: %mul_i128(2, 2) == 4 +; run: %mul_i128(1, -1) == -1 +; run: %mul_i128(2, -1) == -2 -; run: %mul_i128(0x01010101_01010101, 0x01010101_01010101, 13, 0) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D] -; run: %mul_i128(13, 0, 0x01010101_01010101, 0x01010101_01010101) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D] -; run: %mul_i128(0x00000000_01234567, 0x89ABCDEF_00000000, 0x00000000_FEDCBA98, 0x76543210_00000000) == [0x0121FA00_23E20B28, 0xE2946058_00000000] -; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E] -; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E] +; run: %mul_i128(0x01010101_01010101_01010101_01010101, 13) == 0x0D0D0D0D_0D0D0D0D_0D0D0D0D_0D0D0D0D +; run: %mul_i128(13, 0x01010101_01010101_01010101_01010101) == 0x0D0D0D0D_0D0D0D0D_0D0D0D0D_0D0D0D0D +; run: %mul_i128(0x00000000_01234567_89ABCDEF_00000000, 0x00000000_FEDCBA98_76543210_00000000) == 0x2236D88F_E5618CF0_00000000_00000000 +; run: %mul_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF) == 0x5ECD38B5_9D1C2B7E_DB6B1E48_19BA1112 diff --git a/cranelift/filetests/filetests/runtests/i128-bandnot.clif b/cranelift/filetests/filetests/runtests/i128-bandnot.clif new file mode 100644 index 0000000000..bb2a4ba2ba --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bandnot.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %band_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = band_not v0, v1 + return v2 +} +; run: %band_not_i128(0, 0) == 0 +; run: %band_not_i128(-1, 0) == -1 +; run: %band_not_i128(-1, -1) == 0 +; run: %band_not_i128(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %band_not_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0xFEDCBA98_76543210_01234567_89ABCDEF +; run: %band_not_i128(0xFEEEFFFF_FEEEFFFF_F1FFFEFE_F1FFFEFE, 0x20240000_20240000_31001010_31001010) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-bextend.clif b/cranelift/filetests/filetests/runtests/i128-bextend.clif new file mode 100644 index 0000000000..34372f98e4 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bextend.clif @@ -0,0 +1,42 @@ +test interpret + +function %bextend_b1_b128(b1) -> b128 { +block0(v0: b1): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b1_b128(true) == true +; run: %bextend_b1_b128(false) == false + +function %bextend_b8_b128(b8) -> b128 { +block0(v0: b8): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b8_b128(true) == true +; run: %bextend_b8_b128(false) == false + +function %bextend_b16_b128(b16) -> b128 { +block0(v0: b16): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b16_b128(true) == true +; run: %bextend_b16_b128(false) == false + +function %bextend_b32_b128(b32) -> b128 { +block0(v0: b32): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b32_b128(true) == true +; run: %bextend_b32_b128(false) == false + + +function %bextend_b64_b128(b64) -> b128 { +block0(v0: b64): + v1 = bextend.b128 v0 + return v1 +} +; run: %bextend_b64_b128(true) == true +; run: %bextend_b64_b128(false) == false diff --git a/cranelift/filetests/filetests/runtests/i128-bint.clif b/cranelift/filetests/filetests/runtests/i128-bint.clif index 4c0ed4f890..83c9152d51 100644 --- a/cranelift/filetests/filetests/runtests/i128-bint.clif +++ b/cranelift/filetests/filetests/runtests/i128-bint.clif @@ -1,12 +1,12 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 -function %bint_b8_i128() -> i64, i64 { +function %bint_b8_i128() -> i128 { block0: v0 = bconst.b8 true v1 = bint.i128 v0 - v2, v3 = isplit.i128 v1 - return v2, v3 + return v1 } -; run: %bint_b8_i128() == [1, 0] +; run: %bint_b8_i128() == 1 diff --git a/cranelift/filetests/filetests/runtests/i128-bitops-cls.clif b/cranelift/filetests/filetests/runtests/i128-bitops-cls.clif deleted file mode 100644 index 14c82ceec4..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-bitops-cls.clif +++ /dev/null @@ -1,24 +0,0 @@ -test run -target aarch64 - -; TODO: Move this test into i128-bitops-count.clif when x86_64 supports it -function %cls_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = cls v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 -} -; run: %cls_i128(0x00000000_00000000, 0x00000000_00000000) == 127 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 63 -; run: %cls_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 127 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == 0 -; run: %cls_i128(0xFFFFFFFF_FFFFFFFF, 0x3FFFFFFF_FFFFFFFF) == 1 -; run: %cls_i128(0x7FFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 63 -; run: %cls_i128(0x80000000_00000000, 0xC0000000_00000000) == 1 -; run: %cls_i128(0x00000000_00000000, 0xC0000000_00000000) == 1 -; run: %cls_i128(0x80000000_00000000, 0x80000000_00000000) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif index 6c0f23dd1f..60d74e874b 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops-count.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops-count.clif @@ -1,63 +1,48 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst +target x86_64 -function %ctz_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = ctz v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 +function %ctz_i128(i128) -> i128 { +block0(v0: i128): + v1 = ctz v0 + return v1 } -; run: %ctz_i128(0x00000000_00000000, 0x00000000_00000000) == 128 -; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 0 -; run: %ctz_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 64 -; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 0 -; run: %ctz_i128(0xFFFFFFFF_00000000, 0xF0000000_00000000) == 32 -; run: %ctz_i128(0xF0000000_00000000, 0xFF000000_00000000) == 60 -; run: %ctz_i128(0x00000001_00000000, 0x00000000_00000000) == 32 -; run: %ctz_i128(0x00000000_00000000, 0x00000001_00000000) == 96 -; run: %ctz_i128(0x00000000_00010000, 0x00000001_00000000) == 16 -; run: %ctz_i128(0x00000000_00010000, 0x00000000_00000000) == 16 +; run: %ctz_i128(0x00000000_00000000_00000000_00000000) == 128 +; run: %ctz_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0 +; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 64 +; run: %ctz_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %ctz_i128(0xF0000000_00000000_FFFFFFFF_00000000) == 32 +; run: %ctz_i128(0xFF000000_00000000_F0000000_00000000) == 60 +; run: %ctz_i128(0x00000000_00000000_00000001_00000000) == 32 +; run: %ctz_i128(0x00000001_00000000_00000000_00000000) == 96 +; run: %ctz_i128(0x00000001_00000000_00000000_00010000) == 16 +; run: %ctz_i128(0x00000000_00000000_00000000_00010000) == 16 -function %clz_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = clz v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 +function %clz_i128(i128) -> i128 { +block0(v0: i128): + v1 = clz v0 + return v1 } -; run: %clz_i128(0x00000000_00000000, 0x00000000_00000000) == 128 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 64 -; run: %clz_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 0 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 0 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0x40000000_00000000) == 1 -; run: %clz_i128(0xFFFFFFFF_FFFFFFFF, 0x20000000_00000000) == 2 -; run: %clz_i128(0x00000000_00000000, 0x00000000_80000000) == 32 -; run: %clz_i128(0x00000000_00000000, 0x00000001_00000000) == 31 -; run: %clz_i128(0x00000000_00010000, 0x00000001_00000000) == 31 -; run: %clz_i128(0x00000000_00010000, 0x00000000_00000000) == 111 +; run: %clz_i128(0x00000000_00000000_00000000_00000000) == 128 +; run: %clz_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 64 +; run: %clz_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0 +; run: %clz_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %clz_i128(0x40000000_00000000_FFFFFFFF_FFFFFFFF) == 1 +; run: %clz_i128(0x20000000_00000000_FFFFFFFF_FFFFFFFF) == 2 +; run: %clz_i128(0x00000000_80000000_00000000_00000000) == 32 +; run: %clz_i128(0x00000001_00000000_00000000_00000000) == 31 +; run: %clz_i128(0x00000001_00000000_00000000_00010000) == 31 +; run: %clz_i128(0x00000000_00000000_00000000_00010000) == 111 -function %popcnt_i128(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = popcnt v2 - - v4, v5 = isplit v3 - v6 = iadd v4, v5 - return v6 +function %popcnt_i128(i128) -> i128 { +block0(v0: i128): + v1 = popcnt v0 + return v1 } -; run: %popcnt_i128(0x00000000_00000000, 0x00000000_00000000) == 0 -; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000000) == 64 -; run: %popcnt_i128(0x00000000_00000000, 0xFFFFFFFF_FFFFFFFF) == 64 -; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == 128 -; run: %popcnt_i128(0x55555555_55555555, 0x55555555_55555555) == 64 -; run: %popcnt_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 96 +; run: %popcnt_i128(0x00000000_00000000_00000000_00000000) == 0 +; run: %popcnt_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 64 +; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 64 +; run: %popcnt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 128 +; run: %popcnt_i128(0x55555555_55555555_55555555_55555555) == 64 +; run: %popcnt_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 96 diff --git a/cranelift/filetests/filetests/runtests/i128-bitops.clif b/cranelift/filetests/filetests/runtests/i128-bitops.clif index 1723186e6b..72db60a970 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitops.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitops.clif @@ -1,153 +1,54 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -; target x86_64 TODO: Not yet implemented on x86_64 +target x86_64 -; i128 tests -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser -function %bnot_i128(i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64): -v2 = iconcat v0, v1 - -v3 = bnot v2 - -v4, v5 = isplit v3 -return v4, v5 +function %bnot_i128(i128) -> i128 { +block0(v0: i128): + v1 = bnot v0 + return v1 } -; run: %bnot_i128(0, 0) == [-1, -1] -; run: %bnot_i128(-1, -1) == [0, 0] -; run: %bnot_i128(-1, 0) == [0, -1] - -; run: %bnot_i128(0x3F001111_3F001111, 0x21350000_21350000) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] +; run: %bnot_i128(0) == -1 +; run: %bnot_i128(-1) == 0 +; run: %bnot_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bnot_i128(0x3F001111_3F001111_21350000_21350000) == 0xC0FFEEEE_C0FFEEEE_DECAFFFF_DECAFFFF -function %band_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 -v6 = band v4, v5 - -v7, v8 = isplit v6 -return v7, v8 +function %band_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = band v0, v1 + return v2 } -; run: %band_i128(0, 0, 0, 0) == [0, 0] -; run: %band_i128(-1, -1, 0, 0) == [0, 0] -; run: %band_i128(-1, -1, -1, -1) == [-1, -1] -; run: %band_i128(-1, -1, 0, -1) == [0, -1] - -; run: %band_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0, 0] -; run: %band_i128(0xF1FFFEFE_F1FFFEFE, 0xFEEEFFFF_FEEEFFFF, 0xCEFFEFEF_CEFFEFEF, 0xDFDBFFFF_DFDBFFFF) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] +; run: %band_i128(0, 0) == 0 +; run: %band_i128(-1, 0) == 0 +; run: %band_i128(-1, -1) == -1 +; run: %band_i128(-1, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %band_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0 +; run: %band_i128(0xFEEEFFFF_FEEEFFFF_F1FFFEFE_F1FFFEFE, 0xDFDBFFFF_DFDBFFFF_CEFFEFEF_CEFFEFEF) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE -function %bor_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bor v4, v5 - -v7, v8 = isplit v6 -return v7, v8 +function %bor_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bor v0, v1 + return v2 } -; run: %bor_i128(0, 0, 0, 0) == [0, 0] -; run: %bor_i128(-1, -1, 0, 0) == [-1, -1] -; run: %bor_i128(-1, -1, -1, -1) == [-1, -1] -; run: %bor_i128(0, 0, 0, -1) == [0, -1] - -; run: %bor_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [-1, -1] -; run: %bor_i128(0x80AAAAAA_80AAAAAA, 0x8A8AAAAA_8A8AAAAA, 0x40554444_40554444, 0x54405555_54405555) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] +; run: %bor_i128(0, 0) == 0 +; run: %bor_i128(-1, 0) == -1 +; run: %bor_i128(-1, -1) == -1 +; run: %bor_i128(0, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bor_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == -1 +; run: %bor_i128(0x8A8AAAAA_8A8AAAAA_80AAAAAA_80AAAAAA, 0x54405555_54405555_40554444_40554444) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE -function %bxor_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bxor v4, v5 - -v7, v8 = isplit v6 -return v7, v8 +function %bxor_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor v0, v1 + return v2 } -; run: %bxor_i128(0, 0, 0, 0) == [0, 0] -; run: %bxor_i128(-1, -1, 0, 0) == [-1, -1] -; run: %bxor_i128(-1, -1, -1, -1) == [0, 0] -; run: %bxor_i128(-1, -1, 0, -1) == [-1, 0] - -; run: %bxor_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [-1, -1] -; run: %bxor_i128(0x8FA50A64_8FA50A64, 0x9440A07D_9440A07D, 0x4F5AE48A_4F5AE48A, 0x4A8A5F82_4A8A5F82) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %band_not_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = band_not v4, v5 - -v7, v8 = isplit v6 -return v7, v8 -} -; run: %band_not_i128(0, 0, 0, 0) == [0, 0] -; run: %band_not_i128(-1, -1, 0, 0) == [-1, -1] -; run: %band_not_i128(-1, -1, -1, -1) == [0, 0] -; run: %band_not_i128(-1, -1, 0, -1) == [-1, 0] - -; run: %band_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0x01234567_89ABCDEF, 0xFEDCBA98_76543210] -; run: %band_not_i128(0xF1FFFEFE_F1FFFEFE, 0xFEEEFFFF_FEEEFFFF, 0x31001010_31001010, 0x20240000_20240000) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %bor_not_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bor_not v4, v5 - -v7, v8 = isplit v6 -return v7, v8 -} -; run: %bor_not_i128(0, 0, 0, 0) == [-1, -1] -; run: %bor_not_i128(-1, -1, 0, 0) == [-1, -1] -; run: %bor_not_i128(-1, -1, -1, -1) == [-1, -1] -; run: %bor_not_i128(-1, 0, 0, -1) == [-1, 0] - -; run: %bor_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0x01234567_89ABCDEF, 0xFEDCBA98_76543210] -; run: %bor_not_i128(0x80AAAAAA_80AAAAAA, 0x8A8AAAAA_8A8AAAAA, 0xBFAABBBB_BFAABBBB, 0xABBFAAAA_ABBFAAAA) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %bxor_not_i128(i64, i64, i64, i64) -> i64, i64 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): -v4 = iconcat v0, v1 -v5 = iconcat v2, v3 - -v6 = bxor_not v4, v5 - -v7, v8 = isplit v6 -return v7, v8 -} -; run: %bxor_not_i128(0, 0, 0, 0) == [-1, -1] -; run: %bxor_not_i128(-1, -1, 0, 0) == [0, 0] -; run: %bxor_not_i128(-1, -1, -1, -1) == [-1, -1] -; run: %bxor_not_i128(-1, -1, 0, -1) == [0, -1] - -; run: %bxor_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0, 0] -; run: %bxor_not_i128(0x8FA50A64_8FA50A64, 0x9440A07D_9440A07D, 0xB0A51B75_B0A51B75, 0xB575A07D_B575A07D) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF] - - -function %bitrev_i128(i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - - v3 = bitrev v2 - - v4, v5 = isplit v3 - return v4, v5 -} -; run: %bitrev_i128(0, 0) == [0, 0] -; run: %bitrev_i128(-1, -1) == [-1, -1] -; run: %bitrev_i128(-1, 0) == [0, -1] -; run: %bitrev_i128(0, -1) == [-1, 0] -; run: %bitrev_i128(0x00000000_00000000, 0x80000000_00000000) == [1, 0] -; run: %bitrev_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x084C2A6E_195D3B7F, 0xF7B3D591_E6A2C480] -; run: %bitrev_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == [0x7777FF03_FFFF537B, 0xFFFF537B_7777FF03] +; run: %bxor_i128(0, 0) == 0 +; run: %bxor_i128(-1, 0) == -1 +; run: %bxor_i128(-1, -1) == 0 +; run: %bxor_i128(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bxor_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == -1 +; run: %bxor_i128(0x9440A07D_9440A07D_8FA50A64_8FA50A64, 0x4A8A5F82_4A8A5F82_4F5AE48A_4F5AE48A) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-bitrev.clif b/cranelift/filetests/filetests/runtests/i128-bitrev.clif index e494b32597..c685c45ee0 100644 --- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif +++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif @@ -1,7 +1,7 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst -target x86_64 legacy +target x86_64 function %reverse_bits_zero() -> b1 { block0: @@ -46,3 +46,17 @@ block0: return v7 } ; run + + +function %bitrev_i128(i128) -> i128 { +block0(v0: i128): + v1 = bitrev v0 + return v1 +} +; run: %bitrev_i128(0) == 0 +; run: %bitrev_i128(-1) == -1 +; run: %bitrev_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bitrev_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 0xFFFFFFFF_FFFFFFFF_00000000_00000000 +; run: %bitrev_i128(0x80000000_00000000_00000000_00000000) == 1 +; run: %bitrev_i128(0xFEDCBA98_76543210_01234567_89ABCDEF) == 0xF7B3D591_E6A2C480_084C2A6E_195D3B7F +; run: %bitrev_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xFFFF537B_7777FF03_7777FF03_FFFF537B diff --git a/cranelift/filetests/filetests/runtests/i128-bmask.clif b/cranelift/filetests/filetests/runtests/i128-bmask.clif new file mode 100644 index 0000000000..f87df7f2ab --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bmask.clif @@ -0,0 +1,82 @@ +test interpret + +function %bmask_b128_i128(b128) -> i128 { +block0(v0: b128): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b128_i128(true) == -1 +; run: %bmask_b128_i128(false) == 0 + +function %bmask_b128_i64(b128) -> i64 { +block0(v0: b128): + v1 = bmask.i64 v0 + return v1 +} +; run: %bmask_b128_i64(true) == -1 +; run: %bmask_b128_i64(false) == 0 + +function %bmask_b128_i32(b128) -> i32 { +block0(v0: b128): + v1 = bmask.i32 v0 + return v1 +} +; run: %bmask_b128_i32(true) == -1 +; run: %bmask_b128_i32(false) == 0 + +function %bmask_b128_i16(b128) -> i16 { +block0(v0: b128): + v1 = bmask.i16 v0 + return v1 +} +; run: %bmask_b128_i16(true) == -1 +; run: %bmask_b128_i16(false) == 0 + +function %bmask_b128_i8(b128) -> i8 { +block0(v0: b128): + v1 = bmask.i8 v0 + return v1 +} +; run: %bmask_b128_i8(true) == -1 +; run: %bmask_b128_i8(false) == 0 + + +function %bmask_b64_i128(b64) -> i128 { +block0(v0: b64): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b64_i128(true) == -1 +; run: %bmask_b64_i128(false) == 0 + +function %bmask_b32_i128(b32) -> i128 { +block0(v0: b32): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b32_i128(true) == -1 +; run: %bmask_b32_i128(false) == 0 + +function %bmask_b16_i128(b16) -> i128 { +block0(v0: b16): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b16_i128(true) == -1 +; run: %bmask_b16_i128(false) == 0 + +function %bmask_b8_i128(b8) -> i128 { +block0(v0: b8): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b8_i128(true) == -1 +; run: %bmask_b8_i128(false) == 0 + +function %bmask_b1_i128(b1) -> i128 { +block0(v0: b1): + v1 = bmask.i128 v0 + return v1 +} +; run: %bmask_b1_i128(true) == -1 +; run: %bmask_b1_i128(false) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-bornot.clif b/cranelift/filetests/filetests/runtests/i128-bornot.clif new file mode 100644 index 0000000000..e86f71915d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bornot.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %bor_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bor_not v0, v1 + return v2 +} +; run: %bor_not_i128(0, 0) == -1 +; run: %bor_not_i128(-1, 0) == -1 +; run: %bor_not_i128(-1, -1) == -1 +; run: %bor_not_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0x00000000_00000000_FFFFFFFF_FFFFFFFF +; run: %bor_not_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0xFEDCBA98_76543210_01234567_89ABCDEF +; run: %bor_not_i128(0x8A8AAAAA_8A8AAAAA_80AAAAAA_80AAAAAA, 0xABBFAAAA_ABBFAAAA_BFAABBBB_BFAABBBB) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-br.clif b/cranelift/filetests/filetests/runtests/i128-br.clif index 74b4f6fa8f..0ed0e9caf2 100644 --- a/cranelift/filetests/filetests/runtests/i128-br.clif +++ b/cranelift/filetests/filetests/runtests/i128-br.clif @@ -1,45 +1,42 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst -target x86_64 legacy +target x86_64 -function %i128_brz(i64, i64) -> b1 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - brz v2, block2 +function %i128_brz(i128) -> b1 { +block0(v0: i128): + brz v0, block2 jump block1 block1: - v3 = bconst.b1 false - return v3 + v1 = bconst.b1 false + return v1 block2: - v4 = bconst.b1 true - return v4 + v2 = bconst.b1 true + return v2 } -; run: %i128_brz(0, 0) == true -; run: %i128_brz(-1, 0) == false -; run: %i128_brz(0, -1) == false -; run: %i128_brz(-1, -1) == false +; run: %i128_brz(0) == true +; run: %i128_brz(-1) == false +; run: %i128_brz(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == false +; run: %i128_brz(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == false -function %i128_brnz(i64, i64) -> b1 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - brnz v2, block2 +function %i128_brnz(i128) -> b1 { +block0(v0: i128): + brnz v0, block2 jump block1 block1: - v3 = bconst.b1 false - return v3 + v1 = bconst.b1 false + return v1 block2: - v4 = bconst.b1 true - return v4 + v2 = bconst.b1 true + return v2 } -; run: %i128_brnz(0, 0) == false -; run: %i128_brnz(-1, 0) == true -; run: %i128_brnz(0, -1) == true -; run: %i128_brnz(-1, -1) == true +; run: %i128_brnz(0) == false +; run: %i128_brnz(-1) == true +; run: %i128_brnz(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_brnz(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true diff --git a/cranelift/filetests/filetests/runtests/i128-breduce.clif b/cranelift/filetests/filetests/runtests/i128-breduce.clif new file mode 100644 index 0000000000..93efa6c7a6 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-breduce.clif @@ -0,0 +1,41 @@ +test interpret + +function %breduce_b128_b1(b128) -> b1 { +block0(v0: b128): + v1 = breduce.b1 v0 + return v1 +} +; run: %breduce_b128_b1(true) == true +; run: %breduce_b128_b1(false) == false + +function %breduce_b128_b8(b128) -> b8 { +block0(v0: b128): + v1 = breduce.b8 v0 + return v1 +} +; run: %breduce_b128_b8(true) == true +; run: %breduce_b128_b8(false) == false + +function %breduce_b128_b16(b128) -> b16 { +block0(v0: b128): + v1 = breduce.b16 v0 + return v1 +} +; run: %breduce_b128_b16(true) == true +; run: %breduce_b128_b16(false) == false + +function %breduce_b128_b32(b128) -> b32 { +block0(v0: b128): + v1 = breduce.b32 v0 + return v1 +} +; run: %breduce_b128_b32(true) == true +; run: %breduce_b128_b32(false) == false + +function %breduce_b128_b64(b128) -> b64 { +block0(v0: b128): + v1 = breduce.b64 v0 + return v1 +} +; run: %breduce_b128_b64(true) == true +; run: %breduce_b128_b64(false) == false diff --git a/cranelift/filetests/filetests/runtests/i128-bricmp.clif b/cranelift/filetests/filetests/runtests/i128-bricmp.clif index 0f5f96536b..29f340fbdb 100644 --- a/cranelift/filetests/filetests/runtests/i128-bricmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-bricmp.clif @@ -1,321 +1,285 @@ test run target aarch64 -function %i128_bricmp_eq(i64, i64, i64, i64) -> b1 { -block0(v0: i64, v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 eq v4, v5, block2 +function %i128_bricmp_eq(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + br_icmp.i128 eq v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_eq(0, 0, 0, 0) == true -; run: %i128_bricmp_eq(-1, -1, -1, -1) == true -; run: %i128_bricmp_eq(-1, -1, 0, 0) == false -; run: %i128_bricmp_eq(-1, -1, 0, -1) == false -; run: %i128_bricmp_eq(-1, 0, -1, -1) == false -; run: %i128_bricmp_eq(0, -1, -1, -1) == false -; run: %i128_bricmp_eq(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true -; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == false -; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == false +; run: %i128_bricmp_eq(0, 0) == true +; run: %i128_bricmp_eq(-1, -1) == true +; run: %i128_bricmp_eq(-1, 0) == false +; run: %i128_bricmp_eq(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == false +; run: %i128_bricmp_eq(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == false +; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == false +; run: %i128_bricmp_eq(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true +; run: %i128_bricmp_eq(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false +; run: %i128_bricmp_eq(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false -function %i128_bricmp_ne(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ne v4, v5, block2 +function %i128_bricmp_ne(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ne v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ne(0, 0, 0, 0) == false -; run: %i128_bricmp_ne(-1, -1, -1, -1) == false -; run: %i128_bricmp_ne(-1, -1, 0, 0) == true -; run: %i128_bricmp_ne(-1, -1, 0, -1) == true -; run: %i128_bricmp_ne(-1, 0, -1, -1) == true -; run: %i128_bricmp_ne(0, -1, -1, -1) == true -; run: %i128_bricmp_ne(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == false -; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == true -; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == true +; run: %i128_bricmp_ne(0, 0) == false +; run: %i128_bricmp_ne(-1, -1) == false +; run: %i128_bricmp_ne(-1, 0) == true +; run: %i128_bricmp_ne(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_bricmp_ne(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == true +; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == true +; run: %i128_bricmp_ne(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == false +; run: %i128_bricmp_ne(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true +; run: %i128_bricmp_ne(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true -function %i128_bricmp_slt(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 slt v4, v5, block2 +function %i128_bricmp_slt(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 slt v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_slt(0, 0, 0, 0) == false -; run: %i128_bricmp_slt(1, 0, 1, 0) == false -; run: %i128_bricmp_slt(0, 0, 1, 0) == true -; run: %i128_bricmp_slt(-1, -1, 0, 0) == true -; run: %i128_bricmp_slt(0, 0, -1, -1) == false -; run: %i128_bricmp_slt(-1, -1, -1, -1) == false -; run: %i128_bricmp_slt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_slt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_slt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_slt(0, 0) == false +; run: %i128_bricmp_slt(1, 1) == false +; run: %i128_bricmp_slt(0, 1) == true +; run: %i128_bricmp_slt(-1, 0) == true +; run: %i128_bricmp_slt(0, -1) == false +; run: %i128_bricmp_slt(-1, -1) == false +; run: %i128_bricmp_slt(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_slt(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_slt(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_ult(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ult v4, v5, block2 +function %i128_bricmp_ult(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ult v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ult(0, 0, 0, 0) == false -; run: %i128_bricmp_ult(1, 0, 1, 0) == false -; run: %i128_bricmp_ult(0, 0, 1, 0) == true -; run: %i128_bricmp_ult(-1, -1, 0, 0) == false -; run: %i128_bricmp_ult(0, 0, -1, -1) == true -; run: %i128_bricmp_ult(-1, -1, -1, -1) == false -; run: %i128_bricmp_ult(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_ult(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_ult(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_ult(0, 0) == false +; run: %i128_bricmp_ult(1, 1) == false +; run: %i128_bricmp_ult(0, 1) == true +; run: %i128_bricmp_ult(-1, 0) == false +; run: %i128_bricmp_ult(0, -1) == true +; run: %i128_bricmp_ult(-1, -1) == false +; run: %i128_bricmp_ult(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_ult(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_ult(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_sle(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 sle v4, v5, block2 +function %i128_bricmp_sle(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 sle v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_sle(0, 0, 0, 0) == true -; run: %i128_bricmp_sle(1, 0, 1, 0) == true -; run: %i128_bricmp_sle(0, 0, 1, 0) == true -; run: %i128_bricmp_sle(-1, -1, 0, 0) == true -; run: %i128_bricmp_sle(0, 0, -1, -1) == false -; run: %i128_bricmp_sle(-1, -1, -1, -1) == true -; run: %i128_bricmp_sle(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_sle(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_sle(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_sle(0, 0) == true +; run: %i128_bricmp_sle(1, 1) == true +; run: %i128_bricmp_sle(0, 1) == true +; run: %i128_bricmp_sle(-1, 0) == true +; run: %i128_bricmp_sle(0, -1) == false +; run: %i128_bricmp_sle(-1, -1) == true +; run: %i128_bricmp_sle(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_sle(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_sle(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_ule(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ule v4, v5, block2 +function %i128_bricmp_ule(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ule v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ule(0, 0, 0, 0) == true -; run: %i128_bricmp_ule(1, 0, 1, 0) == true -; run: %i128_bricmp_ule(0, 0, 1, 0) == true -; run: %i128_bricmp_ule(-1, -1, 0, 0) == false -; run: %i128_bricmp_ule(0, 0, -1, -1) == true -; run: %i128_bricmp_ule(-1, -1, -1, -1) == true -; run: %i128_bricmp_ule(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_ule(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %i128_bricmp_ule(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %i128_bricmp_ule(0, 0) == true +; run: %i128_bricmp_ule(1, 1) == true +; run: %i128_bricmp_ule(0, 1) == true +; run: %i128_bricmp_ule(-1, 0) == false +; run: %i128_bricmp_ule(0, -1) == true +; run: %i128_bricmp_ule(-1, -1) == true +; run: %i128_bricmp_ule(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_ule(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %i128_bricmp_ule(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %i128_bricmp_sgt(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 sgt v4, v5, block2 +function %i128_bricmp_sgt(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 sgt v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_sgt(0, 0, 0, 0) == false -; run: %i128_bricmp_sgt(1, 0, 1, 0) == false -; run: %i128_bricmp_sgt(0, 0, 1, 0) == false -; run: %i128_bricmp_sgt(-1, -1, 0, 0) == false -; run: %i128_bricmp_sgt(0, 0, -1, -1) == true -; run: %i128_bricmp_sgt(-1, -1, -1, -1) == false -; run: %i128_bricmp_sgt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_sgt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_sgt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_sgt(0, 0) == false +; run: %i128_bricmp_sgt(1, 1) == false +; run: %i128_bricmp_sgt(0, 1) == false +; run: %i128_bricmp_sgt(-1, 0) == false +; run: %i128_bricmp_sgt(0, -1) == true +; run: %i128_bricmp_sgt(-1, -1) == false +; run: %i128_bricmp_sgt(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_sgt(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_sgt(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_ugt(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 ugt v4, v5, block2 +function %i128_bricmp_ugt(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 ugt v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_ugt(0, 0, 0, 0) == false -; run: %i128_bricmp_ugt(1, 0, 1, 0) == false -; run: %i128_bricmp_ugt(0, 0, 1, 0) == false -; run: %i128_bricmp_ugt(-1, -1, 0, 0) == true -; run: %i128_bricmp_ugt(0, 0, -1, -1) == false -; run: %i128_bricmp_ugt(-1, -1, -1, -1) == false -; run: %i128_bricmp_ugt(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_ugt(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_ugt(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_ugt(0, 0) == false +; run: %i128_bricmp_ugt(1, 1) == false +; run: %i128_bricmp_ugt(0, 1) == false +; run: %i128_bricmp_ugt(-1, 0) == true +; run: %i128_bricmp_ugt(0, -1) == false +; run: %i128_bricmp_ugt(-1, -1) == false +; run: %i128_bricmp_ugt(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_ugt(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_ugt(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_sge(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 sge v4, v5, block2 +function %i128_bricmp_sge(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 sge v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_sge(0, 0, 0, 0) == true -; run: %i128_bricmp_sge(1, 0, 1, 0) == true -; run: %i128_bricmp_sge(0, 0, 1, 0) == false -; run: %i128_bricmp_sge(-1, -1, 0, 0) == false -; run: %i128_bricmp_sge(0, 0, -1, -1) == true -; run: %i128_bricmp_sge(-1, -1, -1, -1) == true -; run: %i128_bricmp_sge(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_sge(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_sge(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_sge(0, 0) == true +; run: %i128_bricmp_sge(1, 1) == true +; run: %i128_bricmp_sge(0, 1) == false +; run: %i128_bricmp_sge(-1, 0) == false +; run: %i128_bricmp_sge(0, -1) == true +; run: %i128_bricmp_sge(-1, -1) == true +; run: %i128_bricmp_sge(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_sge(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_sge(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_uge(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 uge v4, v5, block2 +function %i128_bricmp_uge(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 uge v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_uge(0, 0, 0, 0) == true -; run: %i128_bricmp_uge(1, 0, 1, 0) == true -; run: %i128_bricmp_uge(0, 0, 1, 0) == false -; run: %i128_bricmp_uge(-1, -1, 0, 0) == true -; run: %i128_bricmp_uge(0, 0, -1, -1) == false -; run: %i128_bricmp_uge(-1, -1, -1, -1) == true -; run: %i128_bricmp_uge(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_uge(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %i128_bricmp_uge(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %i128_bricmp_uge(0, 0) == true +; run: %i128_bricmp_uge(1, 1) == true +; run: %i128_bricmp_uge(0, 1) == false +; run: %i128_bricmp_uge(-1, 0) == true +; run: %i128_bricmp_uge(0, -1) == false +; run: %i128_bricmp_uge(-1, -1) == true +; run: %i128_bricmp_uge(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_uge(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %i128_bricmp_uge(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %i128_bricmp_of(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 of v4, v5, block2 +function %i128_bricmp_of(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 of v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_of(0, 0, 0, 0) == false -; run: %i128_bricmp_of(0, 0, 1, 0) == false -; run: %i128_bricmp_of(0, 0, -1, -1) == false -; run: %i128_bricmp_of(-1, -1, -1, -1) == false -; run: %i128_bricmp_of(0x00000000_00000000, 0x80000000_00000000, 0, 0) == false -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == false -; run: %i128_bricmp_of(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == true -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == false -; run: %i128_bricmp_of(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == false -; run: %i128_bricmp_of(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == true +; run: %i128_bricmp_of(0, 0) == false +; run: %i128_bricmp_of(0, 1) == false +; run: %i128_bricmp_of(0, -1) == false +; run: %i128_bricmp_of(-1, -1) == false +; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false +; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false +; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false +; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true -function %i128_bricmp_nof(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - br_icmp.i128 nof v4, v5, block2 +function %i128_bricmp_nof(i128, i128) -> b1 { +block0(v0: i128,v1: i128): + br_icmp.i128 nof v0, v1, block2 jump block1 block1: - v6 = bconst.b1 false - return v6 + v2 = bconst.b1 false + return v2 block2: - v7 = bconst.b1 true - return v7 + v3 = bconst.b1 true + return v3 } -; run: %i128_bricmp_nof(0, 0, 0, 0) == true -; run: %i128_bricmp_nof(0, 0, 1, 0) == true -; run: %i128_bricmp_nof(0, 0, -1, -1) == true -; run: %i128_bricmp_nof(-1, -1, -1, -1) == true -; run: %i128_bricmp_nof(0x00000000_00000000, 0x80000000_00000000, 0, 0) == true -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == true -; run: %i128_bricmp_nof(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == false -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == true -; run: %i128_bricmp_nof(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == true -; run: %i128_bricmp_nof(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == false +; run: %i128_bricmp_nof(0, 0) == true +; run: %i128_bricmp_nof(0, 1) == true +; run: %i128_bricmp_nof(0, -1) == true +; run: %i128_bricmp_nof(-1, -1) == true +; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true +; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true +; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true +; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false diff --git a/cranelift/filetests/filetests/runtests/i128-bxornot.clif b/cranelift/filetests/filetests/runtests/i128-bxornot.clif new file mode 100644 index 0000000000..ecacc84387 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-bxornot.clif @@ -0,0 +1,14 @@ +test run +target aarch64 + +function %bxor_not_i128(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2 = bxor_not v0, v1 + return v2 +} +; run: %bxor_not_i128(0, 0) == -1 +; run: %bxor_not_i128(-1, 0) == 0 +; run: %bxor_not_i128(-1, -1) == -1 +; run: %bxor_not_i128(-1, 0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 0xFFFFFFFF_FFFFFFFF_00000000_00000000 +; run: %bxor_not_i128(0xFEDCBA98_76543210_01234567_89ABCDEF, 0x01234567_89ABCDEF_FEDCBA98_76543210) == 0 +; run: %bxor_not_i128(0x9440A07D_9440A07D_8FA50A64_8FA50A64, 0xB575A07D_B575A07D_B0A51B75_B0A51B75) == 0xDECAFFFF_DECAFFFF_C0FFEEEE_C0FFEEEE diff --git a/cranelift/filetests/filetests/runtests/i128-cls.clif b/cranelift/filetests/filetests/runtests/i128-cls.clif new file mode 100644 index 0000000000..212d2e5135 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-cls.clif @@ -0,0 +1,18 @@ +test run +target aarch64 + +function %cls_i128(i128) -> i128 { +block0(v0: i128): + v1 = cls v0 + return v1 +} +; run: %cls_i128(0x00000000_00000000_00000000_00000000) == 127 +; run: %cls_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == 63 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 127 +; run: %cls_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 0 +; run: %cls_i128(0x3FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == 1 +; run: %cls_i128(0xFFFFFFFF_FFFFFFFF_7FFFFFFF_FFFFFFFF) == 63 +; run: %cls_i128(0xC0000000_00000000_80000000_00000000) == 1 +; run: %cls_i128(0xC0000000_00000000_00000000_00000000) == 1 +; run: %cls_i128(0x80000000_00000000_80000000_00000000) == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-concat-split.clif b/cranelift/filetests/filetests/runtests/i128-concat-split.clif index 56abdddca5..1559148c81 100644 --- a/cranelift/filetests/filetests/runtests/i128-concat-split.clif +++ b/cranelift/filetests/filetests/runtests/i128-concat-split.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %iconcat_isplit(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/runtests/i128-const.clif b/cranelift/filetests/filetests/runtests/i128-const.clif index 604015a99f..379a928b45 100644 --- a/cranelift/filetests/filetests/runtests/i128-const.clif +++ b/cranelift/filetests/filetests/runtests/i128-const.clif @@ -1,13 +1,13 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 ; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst +target x86_64 -function %i128_const_0() -> i64, i64 { +function %i128_const_0() -> i128 { block0: v1 = iconst.i128 0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_const_0() == [0, 0] +; run: %i128_const_0() == 0 diff --git a/cranelift/filetests/filetests/runtests/i128-extend-2.clif b/cranelift/filetests/filetests/runtests/i128-extend-2.clif deleted file mode 100644 index 906d699bbc..0000000000 --- a/cranelift/filetests/filetests/runtests/i128-extend-2.clif +++ /dev/null @@ -1,69 +0,0 @@ -test run -target aarch64 -target x86_64 machinst -; TODO: Merge this file with i128-extend once the x86 legacy backend is removed - -function %i128_uextend_i32(i32) -> i64, i64 { -block0(v0: i32): - v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 -} -; run: %i128_uextend_i32(0) == [0, 0] -; run: %i128_uextend_i32(-1) == [0xffff_ffff, 0] -; run: %i128_uextend_i32(0xffff_eeee) == [0xffff_eeee, 0] - -function %i128_sextend_i32(i32) -> i64, i64 { -block0(v0: i32): - v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 -} -; run: %i128_sextend_i32(0) == [0, 0] -; run: %i128_sextend_i32(-1) == [-1, -1] -; run: %i128_sextend_i32(0x7fff_ffff) == [0x7fff_ffff, 0x0000_0000_0000_0000] -; run: %i128_sextend_i32(0xffff_eeee) == [0xffff_ffff_ffff_eeee, 0xffff_ffff_ffff_ffff] - - -function %i128_uextend_i16(i16) -> i64, i64 { -block0(v0: i16): - v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 -} -; run: %i128_uextend_i16(0) == [0, 0] -; run: %i128_uextend_i16(-1) == [0xffff, 0] -; run: %i128_uextend_i16(0xffee) == [0xffee, 0] - -function %i128_sextend_i16(i16) -> i64, i64 { -block0(v0: i16): - v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 -} -; run: %i128_sextend_i16(0) == [0, 0] -; run: %i128_sextend_i16(-1) == [-1, -1] -; run: %i128_sextend_i16(0x7fff) == [0x7fff, 0x0000_0000_0000_0000] -; run: %i128_sextend_i16(0xffee) == [0xffff_ffff_ffff_ffee, 0xffff_ffff_ffff_ffff] - - -function %i128_uextend_i8(i8) -> i64, i64 { -block0(v0: i8): - v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 -} -; run: %i128_uextend_i8(0) == [0, 0] -; run: %i128_uextend_i8(-1) == [0xff, 0] -; run: %i128_uextend_i8(0xfe) == [0xfe, 0] - -function %i128_sextend_i8(i8) -> i64, i64 { -block0(v0: i8): - v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 -} -; run: %i128_sextend_i8(0) == [0, 0] -; run: %i128_sextend_i8(-1) == [-1, -1] -; run: %i128_sextend_i8(0x7f) == [0x7f, 0x0000_0000_0000_0000] -; run: %i128_sextend_i8(0xfe) == [0xffff_ffff_ffff_fffe, 0xffff_ffff_ffff_ffff] diff --git a/cranelift/filetests/filetests/runtests/i128-extend.clif b/cranelift/filetests/filetests/runtests/i128-extend.clif index 57263be68e..a0d1b67276 100644 --- a/cranelift/filetests/filetests/runtests/i128-extend.clif +++ b/cranelift/filetests/filetests/runtests/i128-extend.clif @@ -1,26 +1,82 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst -target x86_64 legacy +target x86_64 -function %i128_uextend_i64(i64) -> i64, i64 { +function %i128_uextend_i64(i64) -> i128 { block0(v0: i64): v1 = uextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_uextend_i64(0) == [0, 0] -; run: %i128_uextend_i64(-1) == [-1, 0] -; run: %i128_uextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0] +; run: %i128_uextend_i64(0) == 0 +; run: %i128_uextend_i64(-1) == 0x00000000_00000000_ffffffff_ffffffff +; run: %i128_uextend_i64(0xffff_ffff_eeee_0000) == 0x00000000_00000000_ffff_ffff_eeee_0000 -function %i128_sextend_i64(i64) -> i64, i64 { +function %i128_sextend_i64(i64) -> i128 { block0(v0: i64): v1 = sextend.i128 v0 - v2, v3 = isplit v1 - return v2, v3 + return v1 } -; run: %i128_sextend_i64(0) == [0, 0] -; run: %i128_sextend_i64(-1) == [-1, -1] -; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == [0x7fff_ffff_ffff_ffff, 0x0000_0000_0000_0000] -; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == [0xffff_ffff_eeee_0000, 0xffff_ffff_ffff_ffff] +; run: %i128_sextend_i64(0) == 0 +; run: %i128_sextend_i64(-1) == -1 +; run: %i128_sextend_i64(0x7fff_ffff_ffff_ffff) == 0x00000000_00000000_7fffffffffffffff +; run: %i128_sextend_i64(0xffff_ffff_eeee_0000) == 0xffffffff_ffffffff_ffffffff_eeee0000 + +function %i128_uextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = uextend.i128 v0 + return v1 +} +; run: %i128_uextend_i32(0) == 0 +; run: %i128_uextend_i32(-1) == 0x00000000_00000000_00000000_ffffffff +; run: %i128_uextend_i32(0xffff_eeee) == 0x00000000_00000000_00000000_ffffeeee + +function %i128_sextend_i32(i32) -> i128 { +block0(v0: i32): + v1 = sextend.i128 v0 + return v1 +} +; run: %i128_sextend_i32(0) == 0 +; run: %i128_sextend_i32(-1) == -1 +; run: %i128_sextend_i32(0x7fff_ffff) == 0x00000000_00000000_00000000_7fffffff +; run: %i128_sextend_i32(0xffff_eeee) == 0xffffffff_ffffffff_ffffffff_ffff_eeee + + +function %i128_uextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = uextend.i128 v0 + return v1 +} +; run: %i128_uextend_i16(0) == 0 +; run: %i128_uextend_i16(-1) == 0x00000000_00000000_00000000_0000ffff +; run: %i128_uextend_i16(0xffee) == 0x00000000_00000000_00000000_0000ffee + +function %i128_sextend_i16(i16) -> i128 { +block0(v0: i16): + v1 = sextend.i128 v0 + return v1 +} +; run: %i128_sextend_i16(0) == 0 +; run: %i128_sextend_i16(-1) == -1 +; run: %i128_sextend_i16(0x7fff) == 0x00000000_00000000_00000000_00007fff +; run: %i128_sextend_i16(0xffee) == 0xffffffff_ffffffff_ffffffff_ffffffee + + +function %i128_uextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = uextend.i128 v0 + return v1 +} +; run: %i128_uextend_i8(0) == 0 +; run: %i128_uextend_i8(-1) == 0x00000000_00000000_00000000_000000ff +; run: %i128_uextend_i8(0xfe) == 0x00000000_00000000_00000000_000000fe + +function %i128_sextend_i8(i8) -> i128 { +block0(v0: i8): + v1 = sextend.i128 v0 + return v1 +} +; run: %i128_sextend_i8(0) == 0 +; run: %i128_sextend_i8(-1) == -1 +; run: %i128_sextend_i8(0x7f) == 0x00000000_00000000_00000000_0000007f +; run: %i128_sextend_i8(0xfe) == 0xffffffff_ffffffff_ffffffff_fffffffe diff --git a/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif b/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif index e91b21de50..abbcd7d2f8 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif @@ -1,46 +1,38 @@ test run target aarch64 -; target x86_64 machinst TODO: X86_64 does not implement i128 icmp overflow -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser -function %icmp_of_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = icmp.i128 of v4, v5 - return v6 +function %icmp_of_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 of v0, v1 + return v2 } -; run: %icmp_of_i128(0, 0, 0, 0) == false -; run: %icmp_of_i128(0, 0, 1, 0) == false -; run: %icmp_of_i128(0, 0, -1, -1) == false -; run: %icmp_of_i128(-1, -1, -1, -1) == false -; run: %icmp_of_i128(0x00000000_00000000, 0x80000000_00000000, 0, 0) == false -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == false -; run: %icmp_of_i128(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == true -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == false -; run: %icmp_of_i128(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == false -; run: %icmp_of_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == true +; run: %icmp_of_i128(0, 0) == false +; run: %icmp_of_i128(0, 1) == false +; run: %icmp_of_i128(0, -1) == false +; run: %icmp_of_i128(-1, -1) == false +; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0) == false +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false +; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false +; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false +; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true -function %icmp_nof_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = icmp.i128 nof v4, v5 - return v6 +function %icmp_nof_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 nof v0, v1 + return v2 } -; run: %icmp_nof_i128(0, 0, 0, 0) == true -; run: %icmp_nof_i128(0, 0, 1, 0) == true -; run: %icmp_nof_i128(0, 0, -1, -1) == true -; run: %icmp_nof_i128(-1, -1, -1, -1) == true -; run: %icmp_nof_i128(0x00000000_00000000, 0x80000000_00000000, 0, 0) == true -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0, 0) == true -; run: %icmp_nof_i128(1, 0, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == false -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 1, 0) == false -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x80000000_00000000) == true -; run: %icmp_nof_i128(0x00000000_00000000, 0x80000000_00000000, 0xFFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == true -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000000, 0x30000000_00000000) == true -; run: %icmp_nof_i128(0xFFFFFFFF_FFFFFFFF, 0x4FFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x30000000_00000000) == false +; run: %icmp_nof_i128(0, 0) == true +; run: %icmp_nof_i128(0, 1) == true +; run: %icmp_nof_i128(0, -1) == true +; run: %icmp_nof_i128(-1, -1) == true +; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0) == true +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true +; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true +; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true +; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false diff --git a/cranelift/filetests/filetests/runtests/i128-icmp.clif b/cranelift/filetests/filetests/runtests/i128-icmp.clif index e29000b931..6fb3f1808b 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp.clif @@ -1,189 +1,168 @@ test interpret test run +set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 -; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser -function %icmp_eq_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - - v6 = icmp.i128 eq v4, v5 - return v6 +function %icmp_eq_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %icmp_eq_i128(0, 0, 0, 0) == true -; run: %icmp_eq_i128(-1, -1, -1, -1) == true -; run: %icmp_eq_i128(-1, -1, 0, 0) == false -; run: %icmp_eq_i128(-1, -1, 0, -1) == false -; run: %icmp_eq_i128(-1, 0, -1, -1) == false -; run: %icmp_eq_i128(0, -1, -1, -1) == false -; run: %icmp_eq_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true -; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == false -; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == false +; run: %icmp_eq_i128(0, 0) == true +; run: %icmp_eq_i128(-1, -1) == true +; run: %icmp_eq_i128(-1, 0) == false +; run: %icmp_eq_i128(-1, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_eq_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == false +; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == false +; run: %icmp_eq_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true +; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false +; run: %icmp_eq_i128(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false -function %icmp_ne_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ne v4, v5 - return v6 +function %icmp_ne_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ne v0, v1 + return v2 } -; run: %icmp_ne_i128(0, 0, 0, 0) == false -; run: %icmp_ne_i128(-1, -1, -1, -1) == false -; run: %icmp_ne_i128(-1, -1, 0, 0) == true -; run: %icmp_ne_i128(-1, -1, 0, -1) == true -; run: %icmp_ne_i128(-1, 0, -1, -1) == true -; run: %icmp_ne_i128(0, -1, -1, -1) == true -; run: %icmp_ne_i128(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE, 0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == false -; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001) == true -; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF, 0x00000000_00000001, 0x00000000_00000001, 0x00000000_00000001) == true +; run: %icmp_ne_i128(0, 0) == false +; run: %icmp_ne_i128(-1, -1) == false +; run: %icmp_ne_i128(-1, 0) == true +; run: %icmp_ne_i128(-1, 0x00000000_00000000_FFFFFFFF_FFFFFF) == true +; run: %icmp_ne_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, -1) == true +; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, -1) == true +; run: %icmp_ne_i128(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == false +; run: %icmp_ne_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true +; run: %icmp_ne_i128(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == true -function %icmp_slt_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 slt v4, v5 - return v6 +function %icmp_slt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 slt v0, v1 + return v2 } -; run: %icmp_slt_i128(0, 0, 0, 0) == false -; run: %icmp_slt_i128(1, 0, 1, 0) == false -; run: %icmp_slt_i128(0, 0, 1, 0) == true -; run: %icmp_slt_i128(-1, -1, 0, 0) == true -; run: %icmp_slt_i128(0, 0, -1, -1) == false -; run: %icmp_slt_i128(-1, -1, -1, -1) == false -; run: %icmp_slt_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_slt_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_slt_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_slt_i128(0, 0) == false +; run: %icmp_slt_i128(1, 1) == false +; run: %icmp_slt_i128(0, 1) == true +; run: %icmp_slt_i128(-1, 0) == true +; run: %icmp_slt_i128(0, -1) == false +; run: %icmp_slt_i128(-1, -1) == false +; run: %icmp_slt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_slt_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_slt_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_ult_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ult v4, v5 - return v6 +function %icmp_ult_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ult v0, v1 + return v2 } -; run: %icmp_ult_i128(0, 0, 0, 0) == false -; run: %icmp_ult_i128(1, 0, 1, 0) == false -; run: %icmp_ult_i128(0, 0, 1, 0) == true -; run: %icmp_ult_i128(-1, -1, 0, 0) == false -; run: %icmp_ult_i128(0, 0, -1, -1) == true -; run: %icmp_ult_i128(-1, -1, -1, -1) == false -; run: %icmp_ult_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_ult_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_ult_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_ult_i128(0, 0) == false +; run: %icmp_ult_i128(1, 1) == false +; run: %icmp_ult_i128(0, 1) == true +; run: %icmp_ult_i128(-1, 0) == false +; run: %icmp_ult_i128(0, -1) == true +; run: %icmp_ult_i128(-1, -1) == false +; run: %icmp_ult_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_ult_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_ult_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_sle_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 sle v4, v5 - return v6 +function %icmp_sle_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 sle v0, v1 + return v2 } -; run: %icmp_sle_i128(0, 0, 0, 0) == true -; run: %icmp_sle_i128(1, 0, 1, 0) == true -; run: %icmp_sle_i128(0, 0, 1, 0) == true -; run: %icmp_sle_i128(-1, -1, 0, 0) == true -; run: %icmp_sle_i128(0, 0, -1, -1) == false -; run: %icmp_sle_i128(-1, -1, -1, -1) == true -; run: %icmp_sle_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_sle_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_sle_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_sle_i128(0, 0) == true +; run: %icmp_sle_i128(1, 1) == true +; run: %icmp_sle_i128(0, 1) == true +; run: %icmp_sle_i128(-1, 0) == true +; run: %icmp_sle_i128(0, -1) == false +; run: %icmp_sle_i128(-1, -1) == true +; run: %icmp_sle_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_sle_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_sle_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_ule_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ule v4, v5 - return v6 +function %icmp_ule_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ule v0, v1 + return v2 } -; run: %icmp_ule_i128(0, 0, 0, 0) == true -; run: %icmp_ule_i128(1, 0, 1, 0) == true -; run: %icmp_ule_i128(0, 0, 1, 0) == true -; run: %icmp_ule_i128(-1, -1, 0, 0) == false -; run: %icmp_ule_i128(0, 0, -1, -1) == true -; run: %icmp_ule_i128(-1, -1, -1, -1) == true -; run: %icmp_ule_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == true -; run: %icmp_ule_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == true -; run: %icmp_ule_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == false +; run: %icmp_ule_i128(0, 0) == true +; run: %icmp_ule_i128(1, 1) == true +; run: %icmp_ule_i128(0, 1) == true +; run: %icmp_ule_i128(-1, 0) == false +; run: %icmp_ule_i128(0, -1) == true +; run: %icmp_ule_i128(-1, -1) == true +; run: %icmp_ule_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_ule_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == true +; run: %icmp_ule_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == false -function %icmp_sgt_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 sgt v4, v5 - return v6 +function %icmp_sgt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 sgt v0, v1 + return v2 } -; run: %icmp_sgt_i128(0, 0, 0, 0) == false -; run: %icmp_sgt_i128(1, 0, 1, 0) == false -; run: %icmp_sgt_i128(0, 0, 1, 0) == false -; run: %icmp_sgt_i128(-1, -1, 0, 0) == false -; run: %icmp_sgt_i128(0, 0, -1, -1) == true -; run: %icmp_sgt_i128(-1, -1, -1, -1) == false -; run: %icmp_sgt_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_sgt_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_sgt_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_sgt_i128(0, 0) == false +; run: %icmp_sgt_i128(1, 1) == false +; run: %icmp_sgt_i128(0, 1) == false +; run: %icmp_sgt_i128(-1, 0) == false +; run: %icmp_sgt_i128(0, -1) == true +; run: %icmp_sgt_i128(-1, -1) == false +; run: %icmp_sgt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_sgt_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_sgt_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %icmp_ugt_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 ugt v4, v5 - return v6 +function %icmp_ugt_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 ugt v0, v1 + return v2 } -; run: %icmp_ugt_i128(0, 0, 0, 0) == false -; run: %icmp_ugt_i128(1, 0, 1, 0) == false -; run: %icmp_ugt_i128(0, 0, 1, 0) == false -; run: %icmp_ugt_i128(-1, -1, 0, 0) == true -; run: %icmp_ugt_i128(0, 0, -1, -1) == false -; run: %icmp_ugt_i128(-1, -1, -1, -1) == false -; run: %icmp_ugt_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_ugt_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_ugt_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_ugt_i128(0, 0) == false +; run: %icmp_ugt_i128(1, 1) == false +; run: %icmp_ugt_i128(0, 1) == false +; run: %icmp_ugt_i128(-1, 0) == true +; run: %icmp_ugt_i128(0, -1) == false +; run: %icmp_ugt_i128(-1, -1) == false +; run: %icmp_ugt_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_ugt_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_ugt_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %icmp_sge_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 sge v4, v5 - return v6 +function %icmp_sge_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 sge v0, v1 + return v2 } -; run: %icmp_sge_i128(0, 0, 0, 0) == true -; run: %icmp_sge_i128(1, 0, 1, 0) == true -; run: %icmp_sge_i128(0, 0, 1, 0) == false -; run: %icmp_sge_i128(-1, -1, 0, 0) == false -; run: %icmp_sge_i128(0, 0, -1, -1) == true -; run: %icmp_sge_i128(-1, -1, -1, -1) == true -; run: %icmp_sge_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_sge_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_sge_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_sge_i128(0, 0) == true +; run: %icmp_sge_i128(1, 1) == true +; run: %icmp_sge_i128(0, 1) == false +; run: %icmp_sge_i128(-1, 0) == false +; run: %icmp_sge_i128(0, -1) == true +; run: %icmp_sge_i128(-1, -1) == true +; run: %icmp_sge_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_sge_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_sge_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true -function %icmp_uge_i128(i64, i64, i64, i64) -> b1 { -block0(v0: i64,v1: i64,v2: i64,v3: i64): - v4 = iconcat v0, v1 - v5 = iconcat v2, v3 - v6 = icmp.i128 uge v4, v5 - return v6 +function %icmp_uge_i128(i128, i128) -> b1 { +block0(v0: i128, v1: i128): + v2 = icmp.i128 uge v0, v1 + return v2 } -; run: %icmp_uge_i128(0, 0, 0, 0) == true -; run: %icmp_uge_i128(1, 0, 1, 0) == true -; run: %icmp_uge_i128(0, 0, 1, 0) == false -; run: %icmp_uge_i128(-1, -1, 0, 0) == true -; run: %icmp_uge_i128(0, 0, -1, -1) == false -; run: %icmp_uge_i128(-1, -1, -1, -1) == true -; run: %icmp_uge_i128(0xFFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF, 0xFFFFFFFF_FFFFFFFF) == false -; run: %icmp_uge_i128(0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE, 0x00000000_00000000, 0xDECAFFFF_DECAFFFF) == false -; run: %icmp_uge_i128(0x00000000_00000000, 0xDECAFFFF_DECAFFFF, 0x00000000_00000000, 0xC0FFEEEE_C0FFEEEE) == true +; run: %icmp_uge_i128(0, 0) == true +; run: %icmp_uge_i128(1, 1) == true +; run: %icmp_uge_i128(0, 1) == false +; run: %icmp_uge_i128(-1, 0) == true +; run: %icmp_uge_i128(0, -1) == false +; run: %icmp_uge_i128(-1, -1) == true +; run: %icmp_uge_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFD, 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_uge_i128(0xC0FFEEEE_C0FFEEEE_00000000_00000000, 0xDECAFFFF_DECAFFFF_00000000_00000000) == false +; run: %icmp_uge_i128(0xDECAFFFF_DECAFFFF_00000000_00000000, 0xC0FFEEEE_C0FFEEEE_00000000_00000000) == true ; Icmp Imm Tests diff --git a/cranelift/filetests/filetests/runtests/i128-load-store.clif b/cranelift/filetests/filetests/runtests/i128-load-store.clif index 41046e8717..dc389bc049 100644 --- a/cranelift/filetests/filetests/runtests/i128-load-store.clif +++ b/cranelift/filetests/filetests/runtests/i128-load-store.clif @@ -1,136 +1,125 @@ test run -target x86_64 machinst +set enable_llvm_abi_extensions=true +target x86_64 target aarch64 -function %i128_stack_store_load(i64, i64) -> b1 { +function %i128_stack_store_load(i128) -> b1 { ss0 = explicit_slot 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_load.i128 ss0 - stack_store.i128 v2, ss0 - v3 = stack_load.i128 ss0 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load(0, 0) == true -; run: %i128_stack_store_load(-1, -1) == true -; run: %i128_stack_store_load(-1, 0) == true -; run: %i128_stack_store_load(0, -1) == true -; run: %i128_stack_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load(0) == true +; run: %i128_stack_store_load(-1) == true +; run: %i128_stack_store_load(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_stack_store_load_offset(i64, i64) -> b1 { +function %i128_stack_store_load_offset(i128) -> b1 { ss0 = explicit_slot 16, offset 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_load.i128 ss0 - stack_store.i128 v2, ss0 - v3 = stack_load.i128 ss0 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load_offset(0, 0) == true -; run: %i128_stack_store_load_offset(-1, -1) == true -; run: %i128_stack_store_load_offset(-1, 0) == true -; run: %i128_stack_store_load_offset(0, -1) == true -; run: %i128_stack_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load_offset(0) == true +; run: %i128_stack_store_load_offset(-1) == true +; run: %i128_stack_store_load_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_stack_store_load_inst_offset(i64, i64) -> b1 { +function %i128_stack_store_load_inst_offset(i128) -> b1 { ss0 = explicit_slot 16 ss1 = explicit_slot 16 ss2 = explicit_slot 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss1+16 + v1 = stack_load.i128 ss1+16 - stack_store.i128 v2, ss1+16 - v3 = stack_load.i128 ss1+16 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load_inst_offset(0, 0) == true -; run: %i128_stack_store_load_inst_offset(-1, -1) == true -; run: %i128_stack_store_load_inst_offset(-1, 0) == true -; run: %i128_stack_store_load_inst_offset(0, -1) == true -; run: %i128_stack_store_load_inst_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load_inst_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load_inst_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load_inst_offset(0) == true +; run: %i128_stack_store_load_inst_offset(-1) == true +; run: %i128_stack_store_load_inst_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load_inst_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load_inst_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load_inst_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load_inst_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true ; Some arches (aarch64) try to encode the offset into the load/store instructions ; test that we spill if the offset is too large and doesn't fit in the instruction -function %i128_stack_store_load_big_offset(i64, i64) -> b1 { +function %i128_stack_store_load_big_offset(i128) -> b1 { ss0 = explicit_slot 100000 ss1 = explicit_slot 8 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + stack_store.i128 v0, ss0 + v1 = stack_load.i128 ss0 - stack_store.i128 v2, ss0 - v3 = stack_load.i128 ss0 - - v4 = icmp.i128 eq v2, v3 - return v4 + v2 = icmp.i128 eq v0, v1 + return v2 } -; run: %i128_stack_store_load_big_offset(0, 0) == true -; run: %i128_stack_store_load_big_offset(-1, -1) == true -; run: %i128_stack_store_load_big_offset(-1, 0) == true -; run: %i128_stack_store_load_big_offset(0, -1) == true -; run: %i128_stack_store_load_big_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_stack_store_load_big_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_stack_store_load_big_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_stack_store_load_big_offset(0) == true +; run: %i128_stack_store_load_big_offset(-1) == true +; run: %i128_stack_store_load_big_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_stack_store_load_big_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_stack_store_load_big_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_stack_store_load_big_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_stack_store_load_big_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_store_load(i64, i64) -> b1 { +function %i128_store_load(i128) -> b1 { ss0 = explicit_slot 16 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + v1 = stack_addr.i64 ss0 + store.i128 v0, v1 + v2 = load.i128 v1 - v3 = stack_addr.i64 ss0 - store.i128 v2, v3 - v4 = load.i128 v3 - - v5 = icmp.i128 eq v2, v4 - return v5 + v3 = icmp.i128 eq v0, v2 + return v3 } -; run: %i128_store_load(0, 0) == true -; run: %i128_store_load(-1, -1) == true -; run: %i128_store_load(-1, 0) == true -; run: %i128_store_load(0, -1) == true -; run: %i128_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_store_load(0) == true +; run: %i128_store_load(-1) == true +; run: %i128_store_load(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_store_load(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_store_load(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_store_load(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_store_load(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true -function %i128_store_load_offset(i64, i64) -> b1 { +function %i128_store_load_offset(i128) -> b1 { ss0 = explicit_slot 32 -block0(v0: i64,v1: i64): - v2 = iconcat v0, v1 +block0(v0: i128): + v1 = stack_addr.i64 ss0 + store.i128 v0, v1+16 + v2 = load.i128 v1+16 - v3 = stack_addr.i64 ss0 - store.i128 v2, v3+16 - v4 = load.i128 v3+16 - - v5 = icmp.i128 eq v2, v4 - return v5 + v3 = icmp.i128 eq v0, v2 + return v3 } -; run: %i128_store_load_offset(0, 0) == true -; run: %i128_store_load_offset(-1, -1) == true -; run: %i128_store_load_offset(-1, 0) == true -; run: %i128_store_load_offset(0, -1) == true -; run: %i128_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true -; run: %i128_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true -; run: %i128_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true +; run: %i128_store_load_offset(0) == true +; run: %i128_store_load_offset(-1) == true +; run: %i128_store_load_offset(0x00000000_00000000_FFFFFFFF_FFFFFFFF) == true +; run: %i128_store_load_offset(0xFFFFFFFF_FFFFFFFF_00000000_00000000) == true +; run: %i128_store_load_offset(0xFEDCBA98_76543210_01234567_89ABCDEF) == true +; run: %i128_store_load_offset(0xA00A00A0_0A00A00A_06060606_06060606) == true +; run: %i128_store_load_offset(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == true diff --git a/cranelift/filetests/filetests/runtests/i128-reduce.clif b/cranelift/filetests/filetests/runtests/i128-reduce.clif index 08ddfad194..0554aafeda 100644 --- a/cranelift/filetests/filetests/runtests/i128-reduce.clif +++ b/cranelift/filetests/filetests/runtests/i128-reduce.clif @@ -1,43 +1,40 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 -function %ireduce_128_64(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i64 v2 - return v3 +function %ireduce_128_64(i128) -> i64 { +block0(v0: i128): + v1 = ireduce.i64 v0 + return v1 } -; run: %ireduce_128_64(0, 0) == 0 -; run: %ireduce_128_64(-1, -1) == -1 -; run: %ireduce_128_64(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xC0FFEEEE_DECAFFFF +; run: %ireduce_128_64(0) == 0 +; run: %ireduce_128_64(-1) == -1 +; run: %ireduce_128_64(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xC0FFEEEE_DECAFFFF -function %ireduce_128_32(i64, i64) -> i32 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i32 v2 - return v3 +function %ireduce_128_32(i128) -> i32 { +block0(v0: i128): + v1 = ireduce.i32 v0 + return v1 } -; run: %ireduce_128_32(0, 0) == 0 -; run: %ireduce_128_32(-1, -1) == -1 -; run: %ireduce_128_32(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xDECAFFFF +; run: %ireduce_128_32(0) == 0 +; run: %ireduce_128_32(-1) == -1 +; run: %ireduce_128_32(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xDECAFFFF -function %ireduce_128_16(i64, i64) -> i16 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i16 v2 - return v3 +function %ireduce_128_16(i128) -> i16 { +block0(v0: i128): + v1 = ireduce.i16 v0 + return v1 } -; run: %ireduce_128_16(0, 0) == 0 -; run: %ireduce_128_16(-1, -1) == -1 -; run: %ireduce_128_16(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xFFFF +; run: %ireduce_128_16(0) == 0 +; run: %ireduce_128_16(-1) == -1 +; run: %ireduce_128_16(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xFFFF -function %ireduce_128_8(i64, i64) -> i8 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i8 v2 - return v3 +function %ireduce_128_8(i128) -> i8 { +block0(v0: i128): + v1 = ireduce.i8 v0 + return v1 } -; run: %ireduce_128_8(0, 0) == 0 -; run: %ireduce_128_8(-1, -1) == -1 -; run: %ireduce_128_8(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == 0xFF +; run: %ireduce_128_8(0) == 0 +; run: %ireduce_128_8(-1) == -1 +; run: %ireduce_128_8(0xDECAFFFF_C0FFEEEE_C0FFEEEE_DECAFFFF) == 0xFF diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif index 359e9c1a7c..8e0b23d0a9 100644 --- a/cranelift/filetests/filetests/runtests/i128-rotate.clif +++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif @@ -1,60 +1,51 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -; target s390x TODO: Not yet implemented on s390x -target x86_64 machinst +target x86_64 -function %rotl(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = rotl.i128 v3, v2 - v5, v6 = isplit v4 - return v5, v6 +function %rotl(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = rotl.i128 v0, v1 + return v2 } -; run: %rotl(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotl(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotl(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020202 +; run: %rotl(0x01010101_01010101_01010101_01010101, 73) == 0x02020202_02020202_02020202_02020202 +; run: %rotl(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotl(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 -function %rotr(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = rotr.i128 v3, v2 - v5, v6 = isplit v4 - return v5, v6 +function %rotr(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = rotr.i128 v0, v1 + return v2 } -; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotr(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotr(0x01010101_01010101_01010101_01010101, 9) == 0x80808080_80808080_80808080_80808080 +; run: %rotr(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080 +; run: %rotr(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotr(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 -function %rotl_amt_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = uextend.i64 v2 - v4 = iconcat v3, v3 +function %rotl_amt_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v5 = iconcat v0, v1 - - v6 = rotl.i128 v5, v4 - v7, v8 = isplit v6 - return v7, v8 + v4 = rotl.i128 v0, v3 + return v4 } -; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x02020202_02020202, 0x02020202_02020202] -; run: %rotl_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotl_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotl_amt_i128(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020202 +; run: %rotl_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x02020202_02020202_02020202_02020202 +; run: %rotl_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotl_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 -function %rotr_amt_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = uextend.i64 v2 - v4 = iconcat v3, v3 - v5 = iconcat v0, v1 +function %rotr_amt_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v6 = rotr.i128 v5, v4 - v7, v8 = isplit v6 - return v7, v8 + v4 = rotr.i128 v0, v3 + return v4 } -; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr_amt_i128(0x01010101_01010101, 0x01010101_01010101, 73) == [0x80808080_80808080, 0x80808080_80808080] -; run: %rotr_amt_i128(0x01010101_01010101, 0x02020202_02020202, 0) == [0x01010101_01010101, 0x02020202_02020202] -; run: %rotr_amt_i128(0x01010101_01010101, 0x03030303_03030303, 128) == [0x01010101_01010101, 0x03030303_03030303] +; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 9) == 0x80808080_80808080_80808080_80808080 +; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080 +; run: %rotr_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 +; run: %rotr_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 diff --git a/cranelift/filetests/filetests/runtests/i128-select.clif b/cranelift/filetests/filetests/runtests/i128-select.clif index 0d23939673..bf643a9e2c 100644 --- a/cranelift/filetests/filetests/runtests/i128-select.clif +++ b/cranelift/filetests/filetests/runtests/i128-select.clif @@ -1,21 +1,15 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 -function %i128_select(i8, i64, i64, i64, i64) -> i64, i64 { -block0(v0: i8, v1: i64, v2: i64, v3: i64, v4: i64): - v5 = icmp_imm ne v0, 0 - - v6 = iconcat v1, v2 - v7 = iconcat v3, v4 - - v8 = select.i128 v5, v6, v7 - - v9, v10 = isplit v8 - return v9, v10 +function %i128_select(b1, i128, i128) -> i128 { +block0(v0: b1, v1: i128, v2: i128): + v3 = select.i128 v0, v1, v2 + return v3 } -; run: %i128_select(1, 0, 0, 1, 1) == [0, 0] -; run: %i128_select(0, 0, 0, 1, 1) == [1, 1] +; run: %i128_select(true, 0, 1) == 0 +; run: %i128_select(false, 0, 1) == 1 -; run: %i128_select(1, 1, 2, 3, 4) == [1, 2] -; run: %i128_select(0, 1, 2, 3, 4) == [3, 4] +; run: %i128_select(true, 0x00000000_00000000_DECAFFFF_C0FFEEEE, 0xFFFFFFFF_FFFFFFFF_C0FFEEEE_DECAFFFF) == 0x00000000_00000000_DECAFFFF_C0FFEEEE +; run: %i128_select(false, 0x00000000_00000000_DECAFFFF_C0FFEEEE, 0xFFFFFFFF_FFFFFFFF_C0FFEEEE_DECAFFFF) == 0xFFFFFFFF_FFFFFFFF_C0FFEEEE_DECAFFFF diff --git a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif index c486092703..094315426f 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif @@ -3,88 +3,82 @@ target aarch64 ; TODO: Merge this with the main i128-shifts file when x86_64 passes these. -function %ishl_i16_i128(i16, i64, i64) -> i16 { -block0(v0: i16, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i16 v0, v3 - return v4 +function %ishl_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ishl.i16 v0, v1 + return v2 } -; run: %ishl_i16_i128(0x0000, 0, 0) == 0x0000 -; run: %ishl_i16_i128(0x0000, 1, 0) == 0x0000 -; run: %ishl_i16_i128(0x000f, 0, 4) == 0x000f -; run: %ishl_i16_i128(0x000f, 4, 0) == 0x00f0 -; run: %ishl_i16_i128(0x0004, 16, 0) == 0x0004 -; run: %ishl_i16_i128(0x0004, 17, 0) == 0x0008 -; run: %ishl_i16_i128(0x0004, 18, 1) == 0x0010 +; run: %ishl_i16_i128(0x0000, 0) == 0x0000 +; run: %ishl_i16_i128(0x0000, 1) == 0x0000 +; run: %ishl_i16_i128(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i128(0x0004, 16) == 0x0004 +; run: %ishl_i16_i128(0x0004, 17) == 0x0008 +; run: %ishl_i16_i128(0x000f, 0x00000000_00000004_00000000_00000000) == 0x000f +; run: %ishl_i16_i128(0x0004, 0x00000000_00000001_00000000_00000012) == 0x0010 -function %ishl_i8_i128(i8, i64, i64) -> i8 { -block0(v0: i8, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i8 v0, v3 - return v4 +function %ishl_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ishl.i8 v0, v1 + return v2 } -; run: %ishl_i8_i128(0x00, 0, 0) == 0x00 -; run: %ishl_i8_i128(0x00, 1, 0) == 0x00 -; run: %ishl_i8_i128(0x0f, 0, 4) == 0x0f -; run: %ishl_i8_i128(0x0f, 4, 0) == 0xf0 -; run: %ishl_i8_i128(0x04, 8, 0) == 0x04 -; run: %ishl_i8_i128(0x04, 9, 0) == 0x08 -; run: %ishl_i8_i128(0x04, 10, 1) == 0x10 +; run: %ishl_i8_i128(0x00, 0) == 0x00 +; run: %ishl_i8_i128(0x00, 1) == 0x00 +; run: %ishl_i8_i128(0x0f, 4) == 0xf0 +; run: %ishl_i8_i128(0x04, 8) == 0x04 +; run: %ishl_i8_i128(0x04, 9) == 0x08 +; run: %ishl_i8_i128(0x0f, 0x00000000_00000004_00000000_00000000) == 0x0f +; run: %ishl_i8_i128(0x04, 0x00000000_00000001_00000000_0000000A) == 0x10 -function %ushr_i16_i128(i16, i64, i64) -> i16 { -block0(v0: i16, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ushr.i16 v0, v3 - return v4 +function %ushr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = ushr.i16 v0, v1 + return v2 } -; run: %ushr_i16_i128(0x1000, 0, 0) == 0x1000 -; run: %ushr_i16_i128(0x1000, 1, 0) == 0x0800 -; run: %ushr_i16_i128(0xf000, 0, 4) == 0xf000 -; run: %ushr_i16_i128(0xf000, 4, 0) == 0x0f00 -; run: %ushr_i16_i128(0x4000, 16, 0) == 0x4000 -; run: %ushr_i16_i128(0x4000, 17, 0) == 0x2000 -; run: %ushr_i16_i128(0x4000, 18, 1) == 0x1000 +; run: %ushr_i16_i128(0x1000, 0) == 0x1000 +; run: %ushr_i16_i128(0x1000, 1) == 0x0800 +; run: %ushr_i16_i128(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i128(0x4000, 16) == 0x4000 +; run: %ushr_i16_i128(0x4000, 17) == 0x2000 +; run: %ushr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 +; run: %ushr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 -function %ushr_i8_i128(i8, i64, i64) -> i8 { -block0(v0: i8, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ushr.i8 v0, v3 - return v4 +function %ushr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = ushr.i8 v0, v1 + return v2 } -; run: %ushr_i8_i128(0x10, 0, 0) == 0x10 -; run: %ushr_i8_i128(0x10, 1, 0) == 0x08 -; run: %ushr_i8_i128(0xf0, 0, 4) == 0xf0 -; run: %ushr_i8_i128(0xf0, 4, 0) == 0x0f -; run: %ushr_i8_i128(0x40, 8, 0) == 0x40 -; run: %ushr_i8_i128(0x40, 9, 0) == 0x20 -; run: %ushr_i8_i128(0x40, 10, 1) == 0x10 +; run: %ushr_i8_i128(0x10, 0) == 0x10 +; run: %ushr_i8_i128(0x10, 1) == 0x08 +; run: %ushr_i8_i128(0xf0, 4) == 0x0f +; run: %ushr_i8_i128(0x40, 8) == 0x40 +; run: %ushr_i8_i128(0x40, 9) == 0x20 +; run: %ushr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 +; run: %ushr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 -function %sshr_i16_i128(i16, i64, i64) -> i16 { -block0(v0: i16, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i16 v0, v3 - return v4 +function %sshr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = sshr.i16 v0, v1 + return v2 } -; run: %sshr_i16_i128(0x8000, 0, 0) == 0x8000 -; run: %sshr_i16_i128(0x8000, 1, 0) == 0xC000 -; run: %sshr_i16_i128(0xf000, 0, 4) == 0xf000 -; run: %sshr_i16_i128(0xf000, 4, 0) == 0xff00 -; run: %sshr_i16_i128(0x4000, 16, 0) == 0x4000 -; run: %sshr_i16_i128(0x4000, 17, 0) == 0x2000 -; run: %sshr_i16_i128(0x4000, 18, 1) == 0x1000 +; run: %sshr_i16_i128(0x8000, 0) == 0x8000 +; run: %sshr_i16_i128(0x8000, 1) == 0xC000 +; run: %sshr_i16_i128(0xf000, 4) == 0xff00 +; run: %sshr_i16_i128(0x4000, 16) == 0x4000 +; run: %sshr_i16_i128(0x4000, 17) == 0x2000 +; run: %sshr_i16_i128(0xf000, 0x00000000_00000004_00000000_00000000) == 0xf000 +; run: %sshr_i16_i128(0x4000, 0x00000000_00000001_00000000_00000012) == 0x1000 -function %sshr_i8_i128(i8, i64, i64) -> i8 { -block0(v0: i8, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i8 v0, v3 - return v4 +function %sshr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = sshr.i8 v0, v1 + return v2 } -; run: %sshr_i8_i128(0x80, 0, 0) == 0x80 -; run: %sshr_i8_i128(0x80, 1, 0) == 0xC0 -; run: %sshr_i8_i128(0xf0, 0, 4) == 0xf0 -; run: %sshr_i8_i128(0xf0, 4, 0) == 0xff -; run: %sshr_i8_i128(0x40, 8, 0) == 0x40 -; run: %sshr_i8_i128(0x40, 9, 0) == 0x20 -; run: %sshr_i8_i128(0x40, 10, 1) == 0x10 +; run: %sshr_i8_i128(0x80, 0) == 0x80 +; run: %sshr_i8_i128(0x80, 1) == 0xC0 +; run: %sshr_i8_i128(0xf0, 4) == 0xff +; run: %sshr_i8_i128(0x40, 8) == 0x40 +; run: %sshr_i8_i128(0x40, 9) == 0x20 +; run: %sshr_i8_i128(0xf0, 0x00000000_00000004_00000000_00000000) == 0xf0 +; run: %sshr_i8_i128(0x40, 0x00000000_00000001_00000000_0000000A) == 0x10 diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif index 1e862cb7e9..7cd37995a5 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif @@ -1,271 +1,232 @@ test run +set enable_llvm_abi_extensions=true target aarch64 -target x86_64 machinst +target x86_64 -function %ishl_i128_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = uextend.i64 v2 - v5 = iconcat v4, v4 +function %ishl_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v6 = ishl.i128 v3, v5 + v4 = ishl.i128 v0, v3 - v7, v8 = isplit v6 - return v7, v8 -} -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i128(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i128(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i64(i64, i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i64(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i64(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i64(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i64(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i64(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i64(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i64(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i32(i64, i64, i32) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i32(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i32(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i32(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i32(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i32(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i32(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i32(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i16(i64, i64, i16) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i16): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i16(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i16(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i16(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i16(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i16(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i16(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i16(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - -function %ishl_i128_i8(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - - v4 = ishl.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 2) == [0x04040404_04040404, 0x04040404_04040404] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 9) == [0x02020202_02020200, 0x02020202_02020202] -; run: %ishl_i128_i8(0x01010101_01010101, 0xffffffff_ffffffff, 66) == [0x00000000_00000000, 0x04040404_04040404] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ishl_i128_i8(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ishl_i128_i8(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 64) == [0x00000000_00000000, 0x12340000_00000000] -; run: %ishl_i128_i8(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_00000000, 0x00000000_12340000] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 129) == [0x02020202_02020202, 0x02020202_02020202] -; run: %ishl_i128_i8(0x01010101_01010101, 0x01010101_01010101, 130) == [0x04040404_04040404, 0x04040404_04040404] - - -function %ishl_i64_i128(i64, i64, i64) -> i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i64 v0, v3 return v4 } -; run: %ishl_i64_i128(0x00000000_00000000, 0, 0) == 0x00000000_00000000 -; run: %ishl_i64_i128(0x00000000_00000000, 1, 0) == 0x00000000_00000000 -; run: %ishl_i64_i128(0x0000000f_0000000f, 0, 4) == 0x0000000f_0000000f -; run: %ishl_i64_i128(0x0000000f_0000000f, 4, 0) == 0x000000f0_000000f0 -; run: %ishl_i64_i128(0x00000000_00000004, 64, 0) == 0x00000000_00000004 -; run: %ishl_i64_i128(0x00000000_00000004, 65, 0) == 0x00000000_00000008 -; run: %ishl_i64_i128(0x00000000_00000004, 66, 1) == 0x00000000_00000010 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i128(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i128(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i128(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i128(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i128(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i128(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i128(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 -function %ishl_i32_i128(i32, i64, i64) -> i32 { -block0(v0: i32, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ishl.i32 v0, v3 +function %ishl_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i64(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i64(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i64(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i64(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i64(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i64(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i64(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + +function %ishl_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i32(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i32(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i32(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i32(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i32(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i32(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i32(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + +function %ishl_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i16(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i16(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i16(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i16(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i16(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i16(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i16(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + +function %ishl_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ishl.i128 v0, v1 + return v2 +} +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 2) == 0x04040404_04040404_04040404_04040404 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 9) == 0x02020202_02020202_02020202_02020200 +; run: %ishl_i128_i8(0xffffffff_ffffffff_01010101_01010101, 66) == 0x04040404_04040404_00000000_00000000 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ishl_i128_i8(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ishl_i128_i8(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ishl_i128_i8(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ishl_i128_i8(0x56780000_00000000_12340000_00000000, 64) == 0x12340000_00000000_00000000_00000000 +; run: %ishl_i128_i8(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_12340000_00000000_00000000 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 129) == 0x02020202_02020202_02020202_02020202 +; run: %ishl_i128_i8(0x01010101_01010101_01010101_01010101, 130) == 0x04040404_04040404_04040404_04040404 + + +function %ishl_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = ishl.i64 v0, v1 + return v2 +} +; run: %ishl_i64_i128(0x00000000_00000000, 0) == 0x00000000_00000000 +; run: %ishl_i64_i128(0x00000000_00000000, 1) == 0x00000000_00000000 +; run: %ishl_i64_i128(0x0000000f_0000000f, 4) == 0x000000f0_000000f0 +; run: %ishl_i64_i128(0x00000000_00000004, 64) == 0x00000000_00000004 +; run: %ishl_i64_i128(0x00000000_00000004, 65) == 0x00000000_00000008 +; run: %ishl_i64_i128(0x0000000f_0000000f, 0x00000000_00000004_00000000_00000000) == 0x0000000f_0000000f +; run: %ishl_i64_i128(0x00000000_00000004, 0x00000000_00000001_00000000_00000042) == 0x00000000_00000010 + +function %ishl_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = ishl.i32 v0, v1 + return v2 +} +; run: %ishl_i32_i128(0x00000000, 0) == 0x00000000 +; run: %ishl_i32_i128(0x00000000, 1) == 0x00000000 +; run: %ishl_i32_i128(0x0000000f, 4) == 0x000000f0 +; run: %ishl_i32_i128(0x00000004, 32) == 0x00000004 +; run: %ishl_i32_i128(0x00000004, 33) == 0x00000008 +; run: %ishl_i32_i128(0x0000000f, 0x00000000_00000004_00000000_00000000) == 0x0000000f +; run: %ishl_i32_i128(0x00000004, 0x00000000_00000001_00000000_00000022) == 0x00000010 + + + +function %ushr_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 + + v4 = ushr.i128 v0, v3 return v4 } -; run: %ishl_i32_i128(0x00000000, 0, 0) == 0x00000000 -; run: %ishl_i32_i128(0x00000000, 1, 0) == 0x00000000 -; run: %ishl_i32_i128(0x0000000f, 0, 4) == 0x0000000f -; run: %ishl_i32_i128(0x0000000f, 4, 0) == 0x000000f0 -; run: %ishl_i32_i128(0x00000004, 32, 0) == 0x00000004 -; run: %ishl_i32_i128(0x00000004, 33, 0) == 0x00000008 -; run: %ishl_i32_i128(0x00000004, 34, 1) == 0x00000010 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i128(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i128(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i128(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i128(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i128(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i128(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 - - -function %ushr_i128_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = uextend.i64 v2 - v5 = iconcat v4, v4 - - v6 = ushr.i128 v3, v5 - - v7, v8 = isplit v6 - return v7, v8 +function %ushr_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i128(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i128(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i128(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i128(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i64(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i64(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i64(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i64(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i64(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i64(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i64(i64, i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i64(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i64(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i64(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i64(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i64(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i64(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i32(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i32(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i32(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i32(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i32(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i32(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i32(i64, i64, i32) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i32(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i32(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i32(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i32(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i32(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i32(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i16(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i16(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i16(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i16(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i16(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i16(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i16(i64, i64, i16) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i16): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = ushr.i128 v0, v1 + return v2 } -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i16(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i16(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i16(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i16(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i16(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i16(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 2) == 0x00404040_40404040_40404040_40404040 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 66) == 0x00000000_00000000_00404040_40404040 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 0) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 128) == 0x01010101_01010101_01010101_01010101 +; run: %ushr_i128_i8(0x00000000_00000000_00000000_00000001, 0) == 0x00000000_00000000_00000000_00000001 +; run: %ushr_i128_i8(0x00000000_00000001_00000000_00000000, 0) == 0x00000000_00000001_00000000_00000000 +; run: %ushr_i128_i8(0x56780000_00000000_12340000_00000000, 0) == 0x56780000_00000000_12340000_00000000 +; run: %ushr_i128_i8(0x56780000_00000000_12340000_00000000, 64) == 0x00000000_00000000_56780000_00000000 +; run: %ushr_i128_i8(0x56780000_00000000_12340000_00000000, 32) == 0x00000000_56780000_00000000_12340000 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 129) == 0x00808080_80808080_80808080_80808080 +; run: %ushr_i128_i8(0x01010101_01010101_01010101_01010101, 130) == 0x00404040_40404040_40404040_40404040 -function %ushr_i128_i8(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - - v4 = ushr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 +function %ushr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = ushr.i64 v0, v1 + return v2 } -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 2) == [0x40404040_40404040, 0x00404040_40404040] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 66) == [0x00404040_40404040, 0x00000000_00000000] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 0) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 128) == [0x01010101_01010101, 0x01010101_01010101] -; run: %ushr_i128_i8(0x00000000_00000001, 0x00000000_00000000, 0) == [0x00000000_00000001, 0x00000000_00000000] -; run: %ushr_i128_i8(0x00000000_00000000, 0x00000000_00000001, 0) == [0x00000000_00000000, 0x00000000_00000001] -; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 0) == [0x12340000_00000000, 0x56780000_00000000] -; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 64) == [0x56780000_00000000, 0x00000000_00000000] -; run: %ushr_i128_i8(0x12340000_00000000, 0x56780000_00000000, 32) == [0x00000000_12340000, 0x00000000_56780000] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 129) == [0x80808080_80808080, 0x00808080_80808080] -; run: %ushr_i128_i8(0x01010101_01010101, 0x01010101_01010101, 130) == [0x40404040_40404040, 0x00404040_40404040] - -function %ushr_i64_i128(i64, i64, i64) -> i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = ushr.i64 v0, v3 - return v4 -} -; run: %ushr_i64_i128(0x10000000_10000000, 0, 0) == 0x10000000_10000000 -; run: %ushr_i64_i128(0x10000000_10000000, 1, 0) == 0x08000000_08000000 -; run: %ushr_i64_i128(0xf0000000_f0000000, 0, 4) == 0xf0000000_f0000000 -; run: %ushr_i64_i128(0xf0000000_f0000000, 4, 0) == 0x0f000000_0f000000 -; run: %ushr_i64_i128(0x40000000_40000000, 64, 0) == 0x40000000_40000000 -; run: %ushr_i64_i128(0x40000000_40000000, 65, 0) == 0x20000000_20000000 -; run: %ushr_i64_i128(0x40000000_40000000, 66, 1) == 0x10000000_10000000 +; run: %ushr_i64_i128(0x10000000_10000000, 0) == 0x10000000_10000000 +; run: %ushr_i64_i128(0x10000000_10000000, 1) == 0x08000000_08000000 +; run: %ushr_i64_i128(0xf0000000_f0000000, 4) == 0x0f000000_0f000000 +; run: %ushr_i64_i128(0x40000000_40000000, 64) == 0x40000000_40000000 +; run: %ushr_i64_i128(0x40000000_40000000, 65) == 0x20000000_20000000 +; run: %ushr_i64_i128(0xf0000000_f0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000_f0000000 +; run: %ushr_i64_i128(0x40000000_40000000, 0x00000000_00000001_00000000_00000042) == 0x10000000_10000000 function %ushr_i32_i128(i32, i64, i64) -> i32 { block0(v0: i32, v1: i64, v2: i64): @@ -283,130 +244,110 @@ block0(v0: i32, v1: i64, v2: i64): -function %sshr_i128_i128(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - v4 = uextend.i64 v2 - v5 = iconcat v4, v4 +function %sshr_i128_i128(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = uextend.i64 v1 + v3 = iconcat v2, v2 - v6 = sshr.i128 v3, v5 + v4 = sshr.i128 v0, v3 - v7, v8 = isplit v6 - return v7, v8 -} -; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i128(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i128(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i128(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i128(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i128(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - -function %sshr_i128_i64(i64, i64, i64) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i64(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i64(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i64(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i64(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i64(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i64(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i64(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i64(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i64(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - - -function %sshr_i128_i32(i64, i64, i32) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i32(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i32(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i32(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i32(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i32(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i32(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i32(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i32(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i32(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - - -function %sshr_i128_i16(i64, i64, i16) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i16): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i16(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i16(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i16(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i16(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i16(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i16(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i16(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i16(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i16(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - -function %sshr_i128_i8(i64, i64, i8) -> i64, i64 { -block0(v0: i64, v1: i64, v2: i8): - v3 = iconcat v0, v1 - - v4 = sshr.i128 v3, v2 - - v5, v6 = isplit v4 - return v5, v6 -} -; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 2) == [0x40404040_40404040, 0xe0404040_40404040] -; run: %sshr_i128_i8(0x00000000_00000000, 0xffffffff_ffffffff, 32) == [0xffffffff_00000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i8(0x80000000_00000000, 0xffffffff_00000000, 32) == [0x00000000_80000000, 0xffffffff_ffffffff] -; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 66) == [0xe0040404_04040404, 0xffffffff_ffffffff] -; run: %sshr_i128_i8(0x00000000_00000000, 0x00000000_00000000, 64) == [0x00000000_00000000, 0x00000000_00000000] -; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 0) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i8(0x12345678_9abcdef0, 0x80101010_10101010, 128) == [0x12345678_9abcdef0, 0x80101010_10101010] -; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 129) == [0x80808080_80808080, 0xc0808080_80808080] -; run: %sshr_i128_i8(0x01010101_01010101, 0x81010101_01010101, 130) == [0x40404040_40404040, 0xe0404040_40404040] - - -function %sshr_i64_i128(i64, i64, i64) -> i64 { -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i64 v0, v3 return v4 } -; run: %sshr_i64_i128(0x80000000_80000000, 0, 0) == 0x80000000_80000000 -; run: %sshr_i64_i128(0x80000000_80000000, 1, 0) == 0xC0000000_40000000 -; run: %sshr_i64_i128(0xf0000000_f0000000, 0, 4) == 0xf0000000_f0000000 -; run: %sshr_i64_i128(0xf0000000_f0000000, 4, 0) == 0xff000000_0f000000 -; run: %sshr_i64_i128(0x40000000_40000000, 64, 0) == 0x40000000_40000000 -; run: %sshr_i64_i128(0x40000000_40000000, 65, 0) == 0x20000000_20000000 -; run: %sshr_i64_i128(0x40000000_40000000, 66, 1) == 0x10000000_10000000 +; run: %sshr_i128_i128(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i128(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i128(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i128(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i128(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i128(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i128(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i128(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i128(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 -function %sshr_i32_i128(i32, i64, i64) -> i32 { -block0(v0: i32, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4 = sshr.i32 v0, v3 - return v4 +function %sshr_i128_i64(i128, i64) -> i128 { +block0(v0: i128, v1: i64): + v2 = sshr.i128 v0, v1 + return v2 } -; run: %sshr_i32_i128(0x80000000, 0, 0) == 0x80000000 -; run: %sshr_i32_i128(0x80000000, 1, 0) == 0xC0000000 -; run: %sshr_i32_i128(0xf0000000, 0, 4) == 0xf0000000 -; run: %sshr_i32_i128(0xf0000000, 4, 0) == 0xff000000 -; run: %sshr_i32_i128(0x40000000, 32, 0) == 0x40000000 -; run: %sshr_i32_i128(0x40000000, 33, 0) == 0x20000000 -; run: %sshr_i32_i128(0x40000000, 34, 1) == 0x10000000 +; run: %sshr_i128_i64(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i64(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i64(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i64(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i64(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i64(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i64(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i64(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i64(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + + +function %sshr_i128_i32(i128, i32) -> i128 { +block0(v0: i128, v1: i32): + v2 = sshr.i128 v0, v1 + return v2 +} +; run: %sshr_i128_i32(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i32(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i32(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i32(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i32(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i32(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i32(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i32(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i32(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + + +function %sshr_i128_i16(i128, i16) -> i128 { +block0(v0: i128, v1: i16): + v2 = sshr.i128 v0, v1 + return v2 +} +; run: %sshr_i128_i16(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i16(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i16(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i16(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i16(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i16(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i16(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i16(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i16(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + +function %sshr_i128_i8(i128, i8) -> i128 { +block0(v0: i128, v1: i8): + v2 = sshr.i128 v0, v1 + return v2 +} +; run: %sshr_i128_i8(0x81010101_01010101_01010101_01010101, 2) == 0xe0404040_40404040_40404040_40404040 +; run: %sshr_i128_i8(0xffffffff_ffffffff_00000000_00000000, 32) == 0xffffffff_ffffffff_ffffffff_00000000 +; run: %sshr_i128_i8(0xffffffff_00000000_80000000_00000000, 32) == 0xffffffff_ffffffff_00000000_80000000 +; run: %sshr_i128_i8(0x80101010_10101010_12345678_9abcdef0, 66) == 0xffffffff_ffffffff_e0040404_04040404 +; run: %sshr_i128_i8(0x00000000_00000000_00000000_00000000, 64) == 0x00000000_00000000_00000000_00000000 +; run: %sshr_i128_i8(0x80101010_10101010_12345678_9abcdef0, 0) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i8(0x80101010_10101010_12345678_9abcdef0, 128) == 0x80101010_10101010_12345678_9abcdef0 +; run: %sshr_i128_i8(0x81010101_01010101_01010101_01010101, 129) == 0xc0808080_80808080_80808080_80808080 +; run: %sshr_i128_i8(0x81010101_01010101_01010101_01010101, 130) == 0xe0404040_40404040_40404040_40404040 + + +function %sshr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = sshr.i64 v0, v1 + return v2 +} +; run: %sshr_i64_i128(0x80000000_80000000, 0) == 0x80000000_80000000 +; run: %sshr_i64_i128(0x80000000_80000000, 1) == 0xC0000000_40000000 +; run: %sshr_i64_i128(0xf0000000_f0000000, 4) == 0xff000000_0f000000 +; run: %sshr_i64_i128(0x40000000_40000000, 64) == 0x40000000_40000000 +; run: %sshr_i64_i128(0x40000000_40000000, 65) == 0x20000000_20000000 +; run: %sshr_i64_i128(0xf0000000_f0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000_f0000000 +; run: %sshr_i64_i128(0x40000000_40000000, 0x00000000_00000001_00000000_00000042) == 0x10000000_10000000 + +function %sshr_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = sshr.i32 v0, v1 + return v2 +} +; run: %sshr_i32_i128(0x80000000, 0) == 0x80000000 +; run: %sshr_i32_i128(0x80000000, 1) == 0xC0000000 +; run: %sshr_i32_i128(0xf0000000, 4) == 0xff000000 +; run: %sshr_i32_i128(0x40000000, 32) == 0x40000000 +; run: %sshr_i32_i128(0x40000000, 33) == 0x20000000 +; run: %sshr_i32_i128(0xf0000000, 0x00000000_00000004_00000000_00000000) == 0xf0000000 +; run: %sshr_i32_i128(0x40000000, 0x00000000_00000001_00000000_00000022) == 0x10000000 diff --git a/cranelift/filetests/filetests/runtests/iabs.clif b/cranelift/filetests/filetests/runtests/iabs.clif new file mode 100644 index 0000000000..acf2bf8584 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/iabs.clif @@ -0,0 +1,42 @@ +test interpret +; aarch64 & x86_64 only support vector iabs + +function %iabs_i8(i8) -> i8 { +block0(v0: i8): + v1 = iabs v0 + return v1 +} +; run: %iabs_i8(0) == 0 +; run: %iabs_i8(127) == 127 +; run: %iabs_i8(-127) == 127 +; run: %iabs_i8(-128) == -128 + +function %iabs_i16(i16) -> i16 { +block0(v0: i16): + v1 = iabs v0 + return v1 +} +; run: %iabs_i16(0) == 0 +; run: %iabs_i16(32767) == 32767 +; run: %iabs_i16(-32767) == 32767 +; run: %iabs_i16(-32768) == -32768 + +function %iabs_i32(i32) -> i32 { +block0(v0: i32): + v1 = iabs v0 + return v1 +} +; run: %iabs_i32(0) == 0 +; run: %iabs_i32(2147483647) == 2147483647 +; run: %iabs_i32(-2147483647) == 2147483647 +; run: %iabs_i32(-2147483648) == -2147483648 + +function %iabs_i64(i64) -> i64 { +block0(v0: i64): + v1 = iabs v0 + return v1 +} +; run: %iabs_i64(0) == 0 +; run: %iabs_i64(9223372036854775807) == 9223372036854775807 +; run: %iabs_i64(-9223372036854775807) == 9223372036854775807 +; run: %iabs_i64(-9223372036854775808) == -9223372036854775808 \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/icmp-eq.clif b/cranelift/filetests/filetests/runtests/icmp-eq.clif index 4708e6da3e..ccd9c2ffe8 100644 --- a/cranelift/filetests/filetests/runtests/icmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/icmp-eq.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_eq_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ne.clif b/cranelift/filetests/filetests/runtests/icmp-ne.clif index 79dd304ed4..6e841b876b 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ne.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ne_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-nof.clif b/cranelift/filetests/filetests/runtests/icmp-nof.clif index 1814fa1547..6817b01651 100644 --- a/cranelift/filetests/filetests/runtests/icmp-nof.clif +++ b/cranelift/filetests/filetests/runtests/icmp-nof.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 function %icmp_nof_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-of.clif b/cranelift/filetests/filetests/runtests/icmp-of.clif index d45917ffd1..26565d4ce2 100644 --- a/cranelift/filetests/filetests/runtests/icmp-of.clif +++ b/cranelift/filetests/filetests/runtests/icmp-of.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 function %icmp_of_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-sge.clif b/cranelift/filetests/filetests/runtests/icmp-sge.clif index ccdcf80215..98981981e7 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sge.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sge.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_sge_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-sgt.clif b/cranelift/filetests/filetests/runtests/icmp-sgt.clif index 9395ab6af1..c5f036b39f 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sgt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sgt.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_sgt_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-sle.clif b/cranelift/filetests/filetests/runtests/icmp-sle.clif index 040f6a3b29..586c3a0c1b 100644 --- a/cranelift/filetests/filetests/runtests/icmp-sle.clif +++ b/cranelift/filetests/filetests/runtests/icmp-sle.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_sle_i8(i8, i8) -> b1 { diff --git a/cranelift/filetests/filetests/runtests/icmp-slt.clif b/cranelift/filetests/filetests/runtests/icmp-slt.clif index 826676b460..89b37904b0 100644 --- a/cranelift/filetests/filetests/runtests/icmp-slt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-slt.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_slt_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-uge.clif b/cranelift/filetests/filetests/runtests/icmp-uge.clif index bdc73e6e8e..8e6bda917f 100644 --- a/cranelift/filetests/filetests/runtests/icmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/icmp-uge.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_uge_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ugt.clif b/cranelift/filetests/filetests/runtests/icmp-ugt.clif index 98d5634157..5e99ec4ff2 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ugt.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ugt_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ule.clif b/cranelift/filetests/filetests/runtests/icmp-ule.clif index d7d0a72a3a..38ce80a309 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ule.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ule_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp-ult.clif b/cranelift/filetests/filetests/runtests/icmp-ult.clif index cd1e794deb..c0821afb87 100644 --- a/cranelift/filetests/filetests/runtests/icmp-ult.clif +++ b/cranelift/filetests/filetests/runtests/icmp-ult.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %icmp_ult_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): diff --git a/cranelift/filetests/filetests/runtests/icmp.clif b/cranelift/filetests/filetests/runtests/icmp.clif index 5f13fdffec..e33d3728e5 100644 --- a/cranelift/filetests/filetests/runtests/icmp.clif +++ b/cranelift/filetests/filetests/runtests/icmp.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 target s390x -target x86_64 machinst +target x86_64 ; This test is also a regression test for aarch64. ; We were not correctly handling the fact that the rhs constant value diff --git a/cranelift/filetests/filetests/runtests/shifts.clif b/cranelift/filetests/filetests/runtests/shifts.clif index a7b27ca3ed..c39ff3a0d4 100644 --- a/cranelift/filetests/filetests/runtests/shifts.clif +++ b/cranelift/filetests/filetests/runtests/shifts.clif @@ -1,6 +1,6 @@ test run target aarch64 -target x86_64 machinst +target x86_64 target s390x diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif index 323579b6ce..caa49d9534 100644 --- a/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif +++ b/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif @@ -3,7 +3,7 @@ ; simd-arithmetic-nondeterministic*.clif as well. test run set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %fmax_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif index 5397f5d874..1ca8e8fcfa 100644 --- a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst skylake -set enable_simd -target x86_64 legacy skylake +target x86_64 skylake function %iadd_i32x4(i32x4, i32x4) -> i32x4 { block0(v0:i32x4, v1:i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif index ae23e1552c..a2086b0426 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif @@ -3,12 +3,7 @@ target aarch64 ; target s390x TODO: Not yet implemented on s390x set opt_level=speed_and_size set enable_simd -target x86_64 machinst skylake -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy haswell - -;; x86_64 legacy: Test if bitselect->vselect optimization works properly +target x86_64 skylake function %mask_from_icmp(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif b/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif index 4f66e51ec4..af7b24d5e6 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif @@ -1,7 +1,7 @@ test run set enable_simd target aarch64 -target x86_64 legacy skylake +target x86_64 skylake ; TODO: once available, replace all lane extraction with `icmp + all_ones` diff --git a/cranelift/filetests/filetests/runtests/simd-bitwise.clif b/cranelift/filetests/filetests/runtests/simd-bitwise.clif index 44474b5b80..670844db22 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitwise.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitwise.clif @@ -2,7 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bitselect_i8x16(i8x16, i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16, v2: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-bmask.clif b/cranelift/filetests/filetests/runtests/simd-bmask.clif new file mode 100644 index 0000000000..ba504f7868 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-bmask.clif @@ -0,0 +1,30 @@ +test interpret + + +function %bmask_i8x16(b8x16) -> i8x16 { +block0(v0: b8x16): + v1 = bmask.i8x16 v0 + return v1 +} +; run: %bmask_i8x16([true false true false true false true false true false true false true false true false]) == [-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0] + +function %bmask_i16x8(b16x8) -> i16x8 { +block0(v0: b16x8): + v1 = bmask.i16x8 v0 + return v1 +} +; run: %bmask_i16x8([true false true false true false true false]) == [-1 0 -1 0 -1 0 -1 0] + +function %bmask_i32x4(b32x4) -> i32x4 { +block0(v0: b32x4): + v1 = bmask.i32x4 v0 + return v1 +} +; run: %bmask_i32x4([true false true false]) == [-1 0 -1 0] + +function %bmask_i64x2(b64x2) -> i64x2 { +block0(v0: b64x2): + v1 = bmask.i64x2 v0 + return v1 +} +; run: %bmask_i64x2([true false]) == [-1 0] diff --git a/cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif b/cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif deleted file mode 100644 index 4ed7ae8224..0000000000 --- a/cranelift/filetests/filetests/runtests/simd-comparison-legacy.clif +++ /dev/null @@ -1,44 +0,0 @@ -test run -set enable_simd -target x86_64 legacy - -function %maxs_i8x16() -> b1 { -block0: - v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with - ; signed max - v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] - v2 = x86_pmaxs v0, v1 - v8 = vall_true v2 - return v8 -} -; run - -function %maxu_i16x8() -> b1 { -block0: - v0 = vconst.i16x8 [0 1 1 1 1 1 1 1] - v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max - v2 = x86_pmaxu v0, v1 - v8 = vall_true v2 - return v8 -} -; run - -function %mins_i32x4() -> b1 { -block0: - v0 = vconst.i32x4 [0 1 1 1] - v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min - v2 = x86_pmins v0, v1 - v8 = vall_true v2 - return v8 -} -; run - -function %minu_i8x16() -> b1 { -block0: - v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min - v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] - v2 = x86_pminu v0, v1 - v8 = vall_true v2 - return v8 -} -; run diff --git a/cranelift/filetests/filetests/runtests/simd-comparison.clif b/cranelift/filetests/filetests/runtests/simd-comparison.clif index 33402b1175..c704e5a3b5 100644 --- a/cranelift/filetests/filetests/runtests/simd-comparison.clif +++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy +target x86_64 function %icmp_eq_i8x16() -> b8 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-conversion.clif b/cranelift/filetests/filetests/runtests/simd-conversion.clif index 4f984b8c48..b950a9a2cd 100644 --- a/cranelift/filetests/filetests/runtests/simd-conversion.clif +++ b/cranelift/filetests/filetests/runtests/simd-conversion.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy +target x86_64 function %fcvt_from_sint(i32x4) -> f32x4 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-iabs.clif b/cranelift/filetests/filetests/runtests/simd-iabs.clif new file mode 100644 index 0000000000..ee1db6762c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-iabs.clif @@ -0,0 +1,34 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %iabs_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iabs v0 + return v1 +} +; run: %iabs_i8x16([0 0 0 0 127 127 127 127 -127 -127 -127 -127 -128 -128 -128 -128]) == [0 0 0 0 127 127 127 127 127 127 127 127 -128 -128 -128 -128] + +function %iabs_i16x8(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iabs v0 + return v1 +} +; run: %iabs_i16x8([0 0 32767 32767 -32767 -32767 -32768 -32768]) == [0 0 32767 32767 32767 32767 -32768 -32768] + +function %iabs_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iabs v0 + return v1 +} +; run: %iabs_i32x4([0 2147483647 -2147483647 -2147483648]) == [0 2147483647 2147483647 -2147483648] + +function %iabs_i64x2(i64x2) -> i64x2 { +block0(v0: i64x2): + v1 = iabs v0 + return v1 +} +; run: %iabs_i64x2([0 9223372036854775807]) == [0 9223372036854775807] +; run: %iabs_i64x2([-9223372036854775807 -9223372036854775808]) == [9223372036854775807 -9223372036854775808] diff --git a/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif b/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif deleted file mode 100644 index 3318635035..0000000000 --- a/cranelift/filetests/filetests/runtests/simd-lane-access-legacy.clif +++ /dev/null @@ -1,221 +0,0 @@ -test run -set enable_simd -target x86_64 legacy - -function %shuffle_different_ssa_values() -> b1 { -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] - v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1 - v3 = extractlane.i8x16 v2, 15 - v4 = iconst.i8 42 - v5 = icmp eq v3, v4 - return v5 -} -; run - -function %shuffle_same_ssa_value() -> b1 { -block0: - v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax - v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes - v2 = extractlane.i8x16 v1, 4 - v3 = iconst.i8 0x01 - v4 = icmp eq v2, v3 - return v4 -} -; run - -function %compare_shuffle() -> b1 { -block0: - v1 = vconst.i32x4 [0 1 2 3] - v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 - ; keep each lane in place from the first vector - v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v4 = raw_bitcast.i32x4 v3 - v5 = extractlane.i32x4 v4, 3 - v6 = icmp_imm eq v5, 3 - v7 = extractlane.i32x4 v4, 0 - v8 = icmp_imm eq v7, 0 - v9 = band v6, v8 - return v9 -} -; run - -function %compare_shuffle() -> b32 { -block0: - v1 = vconst.b32x4 [true false true false] - v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 - ; pair up the true values to make the entire vector true - v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11] - v4 = raw_bitcast.b32x4 v3 - v5 = extractlane v4, 3 - v6 = extractlane v4, 0 - v7 = band v5, v6 - return v7 -} -; run - -; TODO once SIMD vector comparison is implemented, remove use of extractlane below - -function %insertlane_b8() -> b8 { -block0: - v1 = bconst.b8 true - v2 = vconst.b8x16 [false false false false false false false false false false false false false - false false false] - v3 = insertlane v2, v1, 10 - v4 = extractlane v3, 10 - return v4 -} -; run - -function %insertlane_f32() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = vconst.f32x4 0x00 - v2 = insertlane v1, v0, 1 - v3 = extractlane v2, 1 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %insertlane_f64_lane1() -> b1 { -block0: - v0 = f64const 0x42.42 - v1 = vconst.f64x2 0x00 - v2 = insertlane v1, v0, 1 - v3 = extractlane v2, 1 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %insertlane_f64_lane0() -> b1 { -block0: - v0 = f64const 0x42.42 - v1 = vconst.f64x2 0x00 - v2 = insertlane v1, v0, 0 - v3 = extractlane v2, 0 - v4 = fcmp eq v3, v0 - return v4 -} -; run - -function %extractlane_b8() -> b8 { -block0: - v1 = vconst.b8x16 [false false false false false false false false false false true false false - false false false] - v2 = extractlane v1, 10 - return v2 -} -; run - -function %extractlane_i16() -> b1 { -block0: - v0 = vconst.i16x8 0x00080007000600050004000300020001 - v1 = extractlane v0, 1 - v2 = icmp_imm eq v1, 2 - return v2 -} -; run - -function %extractlane_f32() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42] - v2 = extractlane v1, 3 - v3 = fcmp eq v2, v0 - return v3 -} -; run - -function %extractlane_i32_with_vector_reuse() -> b1 { -block0: - v0 = iconst.i32 42 - v1 = iconst.i32 99 - - v2 = splat.i32x4 v0 - v3 = insertlane v2, v1, 2 - - v4 = extractlane v3, 3 - v5 = icmp eq v4, v0 - - v6 = extractlane v3, 2 - v7 = icmp eq v6, v1 - - v8 = band v5, v7 - return v8 -} -; run - -function %extractlane_f32_with_vector_reuse() -> b1 { -block0: - v0 = f32const 0x42.42 - v1 = f32const 0x99.99 - - v2 = splat.f32x4 v0 - v3 = insertlane v2, v1, 2 - - v4 = extractlane v3, 3 - v5 = fcmp eq v4, v0 - - v6 = extractlane v3, 2 - v7 = fcmp eq v6, v1 - - v8 = band v5, v7 - return v8 -} -; run - -function %swizzle() -> b1 { -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42] - v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0 - - v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 -} -; run - -function %swizzle_with_overflow() -> b1 { -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0) - - v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 -} -; run - -function %unpack_low() -> b1 { -block0: - v0 = vconst.i32x4 [0 1 2 3] - v1 = vconst.i32x4 [4 5 6 7] - v2 = x86_punpckl v0, v1 - - v3 = vconst.i32x4 [0 4 1 5] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 -} -; run - -function %snarrow(i32x4, i32x4) -> i16x8 { -block0(v0: i32x4, v1: i32x4): - v2 = snarrow v0, v1 - return v2 -} -; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff] - -function %unarrow(i32x4, i32x4) -> i16x8 { -block0(v0: i32x4, v1: i32x4): - v2 = unarrow v0, v1 - return v2 -} -; run: %unarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 0 0xffff 4 5 0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-lane-access.clif b/cranelift/filetests/filetests/runtests/simd-lane-access.clif index 4ab67d9177..7510cd8865 100644 --- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif +++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif @@ -2,7 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst +target x86_64 ;; shuffle diff --git a/cranelift/filetests/filetests/runtests/simd-logical.clif b/cranelift/filetests/filetests/runtests/simd-logical.clif index 48470cdb35..9cdf8fdb69 100644 --- a/cranelift/filetests/filetests/runtests/simd-logical.clif +++ b/cranelift/filetests/filetests/runtests/simd-logical.clif @@ -2,9 +2,7 @@ test run target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy skylake +target x86_64 function %bnot() -> b32 { block0: @@ -26,13 +24,77 @@ block0: } ; run -function %vany_true_i16x8() -> b1 { +function %vany_true_i8x16() -> b1, b1 { block0: - v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] v1 = vany_true v0 - return v1 + + v2 = vconst.i8x16 [0 0 0 1 0 0 0 0 0 0 42 0 0 0 0 0] + v3 = vany_true v2 + + return v1, v3 } -; run +; run: %vany_true_i8x16() == [false, true] + +function %vany_true_i16x8() -> b1, b1 { +block0: + v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v1 = vany_true v0 + + v2 = vconst.i16x8 [0 0 42 0 0 0 0 0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i16x8() == [false, true] + +function %vany_true_i32x4() -> b1, b1 { +block0: + v0 = vconst.i32x4 [0 0 0 0] + v1 = vany_true v0 + + v2 = vconst.i32x4 [0 42 0 0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i32x4() == [false, true] + +function %vany_true_i64x2() -> b1, b1 { +block0: + v0 = vconst.i64x2 [0 0] + v1 = vany_true v0 + + v2 = vconst.i64x2 [0 1] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i64x2() == [false, true] + +function %vany_true_f32x4() -> b1, b1 { +block0: + v0 = vconst.f32x4 [0.0 0.0 0.0 0.0] + v1 = vany_true v0 + + v2 = vconst.f32x4 [0.0 0x4.2 0.0 0.0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_f32x4() == [false, true] + +function %vany_true_f64x2() -> b1, b1 { +block0: + v0 = vconst.f64x2 [0.0 0.0] + v1 = vany_true v0 + + v2 = vconst.f64x2 [0.0 0x1.0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_f64x2() == [false, true] function %vany_true_b32x4() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif new file mode 100644 index 0000000000..84c3de5d05 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-snarrow-aarch64.clif @@ -0,0 +1,11 @@ +test interpret +test run +target aarch64 +; x86_64 considers the case `i64x2` -> `i32x4` to be 'unreachable' + +function %snarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 -100000 2147483647 73] diff --git a/cranelift/filetests/filetests/runtests/simd-snarrow.clif b/cranelift/filetests/filetests/runtests/simd-snarrow.clif new file mode 100644 index 0000000000..18d667f743 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-snarrow.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %snarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i16x8([1 127 128 15 32767 -32 48 0], [8 255 -100 100 -32768 73 80 42]) == [1 127 127 15 127 -32 48 0 8 127 -100 100 -128 73 80 42] + +function %snarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = snarrow v0, v1 + return v2 +} +; run: %snarrow_i32x4([32767 1048575 -70000 -5], [268435455 73 268435455 42]) == [32767 32767 -32768 -5 32767 73 32767 42] diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif new file mode 100644 index 0000000000..f6809ddc5c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif @@ -0,0 +1,12 @@ +test interpret +test run +target aarch64 +;; x86_64 hasn't implemented this for `i32x4` + +function %sqmulrs_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = sqmul_round_sat v0, v1 + return v2 +} +; run: %sqmulrs_i32x4([1000 2000 3000 4000], [10000 100000 1000000 10000000]) == [0 0 1 19] +; run: %sqmulrs_i32x4([2147483647 -2147483648 -2147483648 0], [2147483647 -2147483648 2147483647 0]) == [2147483646 2147483647 -2147483647 0] diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif new file mode 100644 index 0000000000..1faa3592ad --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -0,0 +1,13 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = sqmul_round_sat v0, v1 + return v2 +} +; run: %sqmulrs_i16x8([1 2 3 4 5 6 7 8], [1 10 100 1000 10000 15000 20000 25000]) == [0 0 0 0 2 3 4 6] +; run: %sqmulrs_i16x8([32767 32767 -32768 -32768 -32768 -32768 0 0], [32767 32767 -32768 -32768 32767 32767 0 0]) == [32766 32766 32767 32767 -32767 -32767 0 0] diff --git a/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif b/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif index 6242859e8d..47d4229c61 100644 --- a/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif +++ b/cranelift/filetests/filetests/runtests/simd-swidenhigh.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %swidenhigh_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-swidenlow.clif b/cranelift/filetests/filetests/runtests/simd-swidenlow.clif index 38f100ef29..997734702b 100644 --- a/cranelift/filetests/filetests/runtests/simd-swidenlow.clif +++ b/cranelift/filetests/filetests/runtests/simd-swidenlow.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %swidenlow_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-swizzle.clif b/cranelift/filetests/filetests/runtests/simd-swizzle.clif index 6cad36f033..6375e0b0e8 100644 --- a/cranelift/filetests/filetests/runtests/simd-swizzle.clif +++ b/cranelift/filetests/filetests/runtests/simd-swizzle.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %swizzle_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif new file mode 100644 index 0000000000..478a1860aa --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-unarrow-aarch64.clif @@ -0,0 +1,11 @@ +test interpret +test run +target aarch64 +; x86_64 considers the case `i64x2 -> i32x4` to be 'unreachable' + +function %unarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 0 4294967295 73] diff --git a/cranelift/filetests/filetests/runtests/simd-unarrow.clif b/cranelift/filetests/filetests/runtests/simd-unarrow.clif new file mode 100644 index 0000000000..0725afd811 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-unarrow.clif @@ -0,0 +1,19 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %unarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 0 0 48 0 8 255 0 100 0 73 80 42] + +function %unarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = unarrow v0, v1 + return v2 +} +; run: %unarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 0 0 65535 73 65535 42] diff --git a/cranelift/filetests/filetests/runtests/simd-uunarrow.clif b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif new file mode 100644 index 0000000000..b2a68c4480 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-uunarrow.clif @@ -0,0 +1,26 @@ +test interpret +test run +target aarch64 +; x86_64 panics: `Did not match fcvt input! +; thread 'worker #0' panicked at 'register allocation: Analysis(EntryLiveinValues([v2V]))', cranelift/codegen/src/machinst/compile.rs:96:10` + +function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 { +block0(v0: i16x8, v1: i16x8): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i16x8([1 127 128 15 65535 -32 48 0], [8 255 -100 100 65534 73 80 42]) == [1 127 128 15 255 255 48 0 8 255 255 100 255 73 80 42] + +function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i32x4([65535 1048575 -70000 -5], [268435455 73 268435455 42]) == [65535 65535 65535 65535 65535 73 65535 42] + +function %uunarrow_i64x2(i64x2, i64x2) -> i32x4 { +block0(v0: i64x2, v1: i64x2): + v2 = uunarrow v0, v1 + return v2 +} +; run: %uunarrow_i64x2([65535 -100000], [5000000000 73]) == [65535 4294967295 4294967295 73] diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif index 281e63ac02..d9e4b2b0e0 100644 --- a/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif +++ b/cranelift/filetests/filetests/runtests/simd-uwidenhigh.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %uwidenhigh_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif index 8b865a305b..6acf4e1a2b 100644 --- a/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif +++ b/cranelift/filetests/filetests/runtests/simd-uwidenlow.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %uwidenlow_i8x16(i8x16) -> i16x8 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-valltrue.clif b/cranelift/filetests/filetests/runtests/simd-valltrue.clif index ced9a6bb0d..c39a2702e6 100644 --- a/cranelift/filetests/filetests/runtests/simd-valltrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-valltrue.clif @@ -1,7 +1,7 @@ test interpret test run target aarch64 -target x86_64 machinst +target x86_64 function %vall_true_b8x16(b8x16) -> b1 { block0(v0: b8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif index fd206d54ef..74b99d785e 100644 --- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif @@ -1,7 +1,7 @@ test interpret test run -target x86_64 machinst -; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304 +target aarch64 +target x86_64 function %vany_true_b8x16(b8x16) -> b1 { block0(v0: b8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif b/cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif deleted file mode 100644 index 8d28c6b0de..0000000000 --- a/cranelift/filetests/filetests/runtests/simd-vconst-optimized-legacy.clif +++ /dev/null @@ -1,46 +0,0 @@ -test run -set enable_simd -target x86_64 legacy - -function %vconst_syntax() -> b1 { -block0: - v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001 ; build constant using hexadecimal syntax - v1 = vconst.i32x4 [1 2 3 4] ; build constant using literal list syntax - - ; verify lane 1 matches - v2 = extractlane v0, 1 - v3 = extractlane v1, 1 - v4 = icmp eq v3, v2 - - ; verify lane 1 has the correct value - v5 = icmp_imm eq v3, 2 - - v6 = band v4, v5 - return v6 -} -; run - -; Since both jump tables and constants are emitted after the function body, it is important that any RIP-relative -; addressing of constants is not incorrect in the presence of jump tables. This test confirms that, even when both -; jump tables and constants are emitted, the constant addressing works correctly. -function %vconst_with_jumptables() -> b1 { -jt0 = jump_table [block0] - -block10: - v10 = iconst.i64 0 - br_table v10, block1, jt0 -block0: - v0 = iconst.i64 100 - jump block11(v0) -block1: - v1 = iconst.i64 101 - jump block11(v1) -block11(v11: i64): - v12 = icmp_imm eq v11, 100 ; We should have jumped through block 0. - v13 = vconst.i32x4 [1 2 3 4] - v14 = extractlane.i32x4 v13, 1 ; Extract the second element... - v15 = icmp_imm eq v14, 2 ; ...which should be the value 2. - v16 = band v12, v15 - return v16 -} -; run diff --git a/cranelift/filetests/filetests/runtests/simd-vconst.clif b/cranelift/filetests/filetests/runtests/simd-vconst.clif index 49b89a0330..f1a98e7ea1 100644 --- a/cranelift/filetests/filetests/runtests/simd-vconst.clif +++ b/cranelift/filetests/filetests/runtests/simd-vconst.clif @@ -2,11 +2,7 @@ test run ; target s390x TODO: Not yet implemented on s390x target aarch64 set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy -set enable_simd -target x86_64 legacy skylake +target x86_64 function %vconst_zeroes() -> b1 { diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif new file mode 100644 index 0000000000..2a9c5d1a75 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits.clif @@ -0,0 +1,53 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %vhighbits_i8x16(i8x16) -> i16 { +block0(v0: i8x16): + v1 = vhigh_bits.i16 v0 + return v1 +} +; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i8x16([1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7]) == 0 +; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128]) == -1 +; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 8]) == 32767 + +function %vhighbits_i16x8(i16x8) -> i8 { +block0(v0: i16x8): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i16x8([0 0 0 0 0 0 0 0]) == 0 +; run: %vhighbits_i16x8([0 0 0 0 0 0 0 1]) == 0 +; run: %vhighbits_i16x8([1 2 3 4 5 6 7 8]) == 0 +; run: %vhighbits_i16x8([128 128 128 128 128 128 128 128]) == 0 +; run: %vhighbits_i16x8([32768 32768 32768 32768 32768 32768 32768 0]) == 127 + + +function %vhighbits_i32x4(i32x4) -> i8 { +block0(v0: i32x4): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i32x4([0 0 0 0]) == 0 +; run: %vhighbits_i32x4([0 0 0 1]) == 0 +; run: %vhighbits_i32x4([1 2 3 4]) == 0 +; run: %vhighbits_i32x4([128 128 128 128]) == 0 +; run: %vhighbits_i32x4([2147483648 2147483648 2147483648 2147483648]) == 15 +; run: %vhighbits_i32x4([2147483648 0 2147483648 0]) == 5 + + +function %vhighbits_i64x2(i64x2) -> i8 { +block0(v0: i64x2): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_i64x2([0 0]) == 0 +; run: %vhighbits_i64x2([0 1]) == 0 +; run: %vhighbits_i64x2([1 2]) == 0 +; run: %vhighbits_i64x2([128 128]) == 0 +; run: %vhighbits_i64x2([18446744073709551615 18446744073709551615]) == 3 +; run: %vhighbits_i64x2([18446744073709551615 0]) == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-vselect.clif b/cranelift/filetests/filetests/runtests/simd-vselect.clif index 3817b2302f..84a2479ca5 100644 --- a/cranelift/filetests/filetests/runtests/simd-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-vselect.clif @@ -1,10 +1,9 @@ +test interpret test run ; target s390x TODO: Not yet implemented on s390x target aarch64 set enable_simd -target x86_64 machinst -set enable_simd -target x86_64 legacy haswell +target x86_64 function %vselect_i8x16() -> i8x16 { block0: @@ -45,3 +44,31 @@ block0: return v4 } ; run: %vselect_i64x2() == [200 101] + +function %vselect_p_i8x16(b8x16, i8x16, i8x16) -> i8x16 { +block0(v0: b8x16, v1: i8x16, v2: i8x16): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i8x16([true false true true true false false false true false true true true false false false], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [1 18 3 4 5 22 23 24 9 26 11 12 13 30 31 32] + +function %vselect_p_i16x8(b16x8, i16x8, i16x8) -> i16x8 { +block0(v0: b16x8, v1: i16x8, v2: i16x8): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i16x8([true false true true true false false false], [1 2 3 4 5 6 7 8], [17 18 19 20 21 22 23 24]) == [1 18 3 4 5 22 23 24] + +function %vselect_p_i32x4(b32x4, i32x4, i32x4) -> i32x4 { +block0(v0: b32x4, v1: i32x4, v2: i32x4): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i32x4([true false true true], [1 2 3 4], [100000 200000 300000 400000]) == [1 200000 3 4] + +function %vselect_p_i64x2(b64x2, i64x2, i64x2) -> i64x2 { +block0(v0: b64x2, v1: i64x2, v2: i64x2): + v3 = vselect v0, v1, v2 + return v3 +} +; run: %vselect_p_i64x2([true false], [1 2], [100000000000 200000000000]) == [1 200000000000] diff --git a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif new file mode 100644 index 0000000000..56987ef79d --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif @@ -0,0 +1,14 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %wpdps(i16x8, i16x8) -> i32x4 { +block0(v0: i16x8, v1: i16x8): + v2 = widening_pairwise_dot_product_s v0, v1 + return v2 +} +; run: %wpdps([1 2 3 4 5 6 7 8], [8000 7000 6000 5000 4000 3000 2000 1000]) == [22000 38000 38000 22000] +; run: %wpdps([1 -2 3 -4 5 -6 7 -8], [32767 32767 32767 32767 -32768 -32768 -32768 -32768]) == [-32767 -32767 32768 32768] +; run: %wpdps([-32768 -32768 32767 32767 -32768 -32768 32767 32767], [-32768 -32768 32767 32767 32767 32767 -32768 -32768]) == [2147483648 2147352578 -2147418112 -2147418112] diff --git a/cranelift/filetests/filetests/runtests/smulhi.clif b/cranelift/filetests/filetests/runtests/smulhi.clif index b2fe1072ed..ec855002a4 100644 --- a/cranelift/filetests/filetests/runtests/smulhi.clif +++ b/cranelift/filetests/filetests/runtests/smulhi.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 function %smulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): diff --git a/cranelift/filetests/filetests/runtests/spill-reload.clif b/cranelift/filetests/filetests/runtests/spill-reload.clif index 63f8c50576..f2c6bd0fe6 100644 --- a/cranelift/filetests/filetests/runtests/spill-reload.clif +++ b/cranelift/filetests/filetests/runtests/spill-reload.clif @@ -1,8 +1,7 @@ test run target s390x target aarch64 -target x86_64 machinst -target x86_64 legacy +target x86_64 function %f(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i64 { block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32, v9: i32, v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32): diff --git a/cranelift/filetests/filetests/runtests/stack-addr-64.clif b/cranelift/filetests/filetests/runtests/stack-addr-64.clif index 0e59e7c410..5dd4527024 100644 --- a/cranelift/filetests/filetests/runtests/stack-addr-64.clif +++ b/cranelift/filetests/filetests/runtests/stack-addr-64.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 target s390x target aarch64 diff --git a/cranelift/filetests/filetests/runtests/stack.clif b/cranelift/filetests/filetests/runtests/stack.clif index 0a7323c4d9..efcc25f02e 100644 --- a/cranelift/filetests/filetests/runtests/stack.clif +++ b/cranelift/filetests/filetests/runtests/stack.clif @@ -1,6 +1,6 @@ test interpret test run -target x86_64 machinst +target x86_64 target s390x target aarch64 diff --git a/cranelift/filetests/filetests/runtests/umulhi.clif b/cranelift/filetests/filetests/runtests/umulhi.clif index bb6b06bedd..272e40ca82 100644 --- a/cranelift/filetests/filetests/runtests/umulhi.clif +++ b/cranelift/filetests/filetests/runtests/umulhi.clif @@ -2,7 +2,8 @@ test interpret test run target aarch64 set enable_simd -target x86_64 machinst +target x86_64 +target s390x function %umulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): diff --git a/cranelift/filetests/filetests/safepoint/basic.clif b/cranelift/filetests/filetests/safepoint/basic.clif deleted file mode 100644 index 47acf2ad72..0000000000 --- a/cranelift/filetests/filetests/safepoint/basic.clif +++ /dev/null @@ -1,71 +0,0 @@ -test safepoint -set enable_safepoints=true -target x86_64 legacy - -function %test(i32, r64, r64) -> r64 { - block0(v0: i32, v1:r64, v2:r64): - jump block1(v0) - block1(v3: i32): - v4 = irsub_imm v3, 1 - jump block2(v4) - block2(v5: i32): - resumable_trap interrupt - brz v5, block1(v5) - jump block3 - block3: - v6 = null.r64 - v7 = is_null v6 - brnz v7, block2(v0) - jump block4 - block4: - brnz v0, block5 - jump block6 - block5: - return v1 - block6: - return v2 -} - -; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast { -; nextln: block0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]): -; nextln: v10 = copy v0 -; nextln: jump block1(v10) -; nextln: -; nextln: block7: -; nextln: regmove.i32 v5, %rcx -> %rax -; nextln: jump block1(v5) -; nextln: -; nextln: block1(v3: i32 [%rax]): -; nextln: v8 = iconst.i32 1 -; nextln: v4 = isub v8, v3 -; nextln: jump block2(v4) -; nextln: -; nextln: block8: -; nextln: v9 = copy.i32 v0 -; nextln: regmove v9, %rax -> %rcx -; nextln: jump block2(v9) -; nextln: -; nextln: block2(v5: i32 [%rcx]): -; nextln: safepoint v1, v2 -; nextln: resumable_trap interrupt -; nextln: brz v5, block7 -; nextln: jump block3 -; nextln: -; nextln: block3: -; nextln: v6 = null.r64 -; nextln: v7 = is_null v6 -; nextln: brnz v7, block8 -; nextln: jump block4 -; nextln: -; nextln: block4: -; nextln: brnz.i32 v0, block5 -; nextln: jump block6 -; nextln: -; nextln: block5: -; nextln: regmove.r64 v1, %rsi -> %rax -; nextln: return v1 -; nextln: -; nextln: block6: -; nextln: regmove.r64 v2, %rdx -> %rax -; nextln: return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/safepoint/call.clif b/cranelift/filetests/filetests/safepoint/call.clif deleted file mode 100644 index ffcf41fb46..0000000000 --- a/cranelift/filetests/filetests/safepoint/call.clif +++ /dev/null @@ -1,58 +0,0 @@ -test safepoint -set enable_safepoints=true -target x86_64 legacy - -function %direct() -> r64 { - fn0 = %none() - fn1 = %one() -> r64 - fn2 = %two() -> i32, r64 - -block0: - call fn0() - v1 = call fn1() - v2, v3 = call fn2() - brz v2, block2 - jump block1 -block1: - return v1 -block2: - v4 = call fn1() - return v3 -} - -; sameln: function %direct() -> r64 [%rax] fast { -; nextln: ss0 = spill_slot 8 -; nextln: ss1 = spill_slot 8 -; nextln: sig0 = () fast -; nextln: sig1 = () -> r64 [%rax] fast -; nextln: sig2 = () -> i32 [%rax], r64 [%rdx] fast -; nextln: fn0 = %none sig0 -; nextln: fn1 = %one sig1 -; nextln: fn2 = %two sig2 -; nextln: -; nextln: block0: -; nextln: v5 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v5() -; nextln: v6 = func_addr.i64 fn1 -; nextln: v9 = call_indirect sig1, v6() -; nextln: v1 = spill v9 -; nextln: v7 = func_addr.i64 fn2 -; nextln: safepoint v1 -; nextln: v2, v10 = call_indirect sig2, v7() -; nextln: v3 = spill v10 -; nextln: brz v2, block2 -; nextln: jump block1 -; nextln: -; nextln: block1: -; nextln: v11 = fill.r64 v1 -; nextln: regmove v11, %r15 -> %rax -; nextln: return v11 -; nextln: -; nextln: block2: -; nextln: v8 = func_addr.i64 fn1 -; nextln: safepoint v3 -; nextln: v4 = call_indirect sig1, v8() -; nextln: v12 = fill.r64 v3 -; nextln: regmove v12, %r15 -> %rax -; nextln: return v12 -; nextln: } diff --git a/cranelift/filetests/filetests/simple_gvn/reject.clif b/cranelift/filetests/filetests/simple_gvn/reject.clif index c4613af4dc..bb01fe5839 100644 --- a/cranelift/filetests/filetests/simple_gvn/reject.clif +++ b/cranelift/filetests/filetests/simple_gvn/reject.clif @@ -1,15 +1,5 @@ test simple-gvn -function %other_side_effects(i32) -> i32 { -block0(v0: i32): - regmove v0, %10 -> %20 - regmove v0, %10 -> %20 - regmove v0, %20 -> %10 -; check: regmove v0, %10 -> %20 -; check: regmove v0, %10 -> %20 - return v0 -} - function %differing_typevars() -> i64 { block0: v0 = iconst.i32 7 diff --git a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif deleted file mode 100644 index 5d10588da3..0000000000 --- a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif +++ /dev/null @@ -1,18 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with -;; its `v2` operand's instruction because `v2` is one of many results. Instead, -;; we need to make an alias `v3 -> v2`. - -function %replace_inst_with_alias() -> i32 { -block0: - v0 = iconst.i32 0 - v1, v2 = x86_smulx v0, v0 - v3 = isub v2, v0 - ; check: v0 = iconst.i32 0 - ; nextln: v1, v2 = x86_smulx v0, v0 - ; nextln: v3 -> v2 - return v3 -} diff --git a/cranelift/filetests/filetests/stack_maps/call.clif b/cranelift/filetests/filetests/stack_maps/call.clif deleted file mode 100644 index 6563ad450a..0000000000 --- a/cranelift/filetests/filetests/stack_maps/call.clif +++ /dev/null @@ -1,103 +0,0 @@ -test stack_maps -set enable_safepoints=true -target x86_64 legacy - -function %icall_fast(r64) -> r64 fast { -; check: function %icall_fast -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %icall_sys_v(r64) -> r64 system_v { -; check: function %icall_sys_v -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %icall_fastcall(r64) -> r64 windows_fastcall { -; check: function %icall_fastcall -; nextln: ss0 = spill_slot 8, offset -32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 8 -; nextln: - live: [4] - -function %call_fast(r64) -> r64 fast { -; check: function %call_fast -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %call_sys_v(r64) -> r64 system_v { -; check: function %call_sys_v -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %call_fastcall(r64) -> r64 windows_fastcall { -; check: function %call_fastcall -; nextln: ss0 = spill_slot 8, offset -32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 8 -; nextln: - live: [4] diff --git a/cranelift/filetests/filetests/stack_maps/incoming_args.clif b/cranelift/filetests/filetests/stack_maps/incoming_args.clif deleted file mode 100644 index e8231c3aad..0000000000 --- a/cranelift/filetests/filetests/stack_maps/incoming_args.clif +++ /dev/null @@ -1,30 +0,0 @@ -test stack_maps -set enable_safepoints=true -target x86_64 legacy - -;; Incoming args get included in stack maps. - -function %incoming_args(r64, r64, r64, r64, r64) -> r64 windows_fastcall { -; check: r64 [32] -; nextln: ss0 = incoming_arg 8, offset 32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - - fn0 = %none() -; nextln: sig0 = () fast -; nextln: fn0 = %none sig0 - -block0(v0: r64, v1: r64, v2: r64, v3: r64, v4: r64): -; check: v4: r64 [ss0] - - call fn0() -; check: safepoint v4 -; nextln: call_indirect - return v4 -} - -; check: Stack maps: -; nextln: -; nextln: safepoint v4 -; nextln: - mapped words: 13 -; nextln: - live: [12] diff --git a/cranelift/filetests/filetests/verifier/flags.clif b/cranelift/filetests/filetests/verifier/flags.clif deleted file mode 100644 index 088523d24a..0000000000 --- a/cranelift/filetests/filetests/verifier/flags.clif +++ /dev/null @@ -1,77 +0,0 @@ -test verifier -target aarch64 -target i686 - -; Simple, correct use of CPU flags. -function %simple(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 -} - -; Overlapping flag values of different types. -function %overlap(i32, f32) -> i32 { - block0(v0: i32, v1: f32): - [DynRexOp1rcmp#39] v2 = ifcmp v0, v0 - [Op2fcmp#42e] v3 = ffcmp v1, v1 - [Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3 - [Op2seti_abcd#490] v5 = trueif ugt v2 - [Op1rr#21] v6 = band v4, v5 - [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6 - [Op1ret#c3] return v7 -} - -; CPU flags clobbered by arithmetic. -function %clobbered(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [DynRexOp1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1 - [Op2seti_abcd#490] v3 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 - [Op1ret#c3] return v4 -} - -; CPU flags not clobbered by load. -function %live_across_load(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1ld#8b] v2 = load.i32 v0 - [Op2seti_abcd#490] v3 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 - [Op1ret#c3] return v4 -} - -; Correct use of CPU flags across block. -function %live_across_block(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 -} - -function %live_across_block_backwards(i32) -> i32 { - block0(v0: i32): - [Op1jmpb#eb] jump block2 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 - block2: - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 -} - -; Flags live into loop. -function %live_into_loop(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op1jmpb#eb] jump block1 -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif deleted file mode 100644 index f41f867918..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-b1.clif +++ /dev/null @@ -1,68 +0,0 @@ -test compile -target x86_64 legacy haswell - -;; `b1` return values need to be legalized into bytes so that they can be stored -;; in memory. - -function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 { -;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast { - -block0(v0: b1, v1: b1, v2: b1, v3: b1): -; check: block0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]): - - return v0, v1, v2, v3 - ; check: v5 = bint.i8 v0 - ; nextln: v9 = uextend.i32 v5 - ; nextln: istore8 notrap aligned v9, v4 - ; nextln: v6 = bint.i8 v1 - ; nextln: v10 = uextend.i32 v6 - ; nextln: istore8 notrap aligned v10, v4+1 - ; nextln: v7 = bint.i8 v2 - ; nextln: v11 = uextend.i32 v7 - ; nextln: istore8 notrap aligned v11, v4+2 - ; nextln: v8 = bint.i8 v3 - ; nextln: v12 = uextend.i32 v8 - ; nextln: istore8 notrap aligned v12, v4+3 -} - -function %call_4_b1s() { -; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast { -; nextln: ss0 = sret_slot 4, offset -28 - - fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 - ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast - -block0: -; check: block0(v26: i64 [%rbp], v27: i64 [%rbx]): - - v0 = bconst.b1 true - v1 = bconst.b1 false - v2 = bconst.b1 true - v3 = bconst.b1 false - - ; check: v8 = stack_addr.i64 ss0 - v4, v5, v6, v7 = call fn0(v0, v1, v2, v3) - ; check: v9 = call fn0(v0, v1, v2, v3, v8) - ; nextln: v22 = uload8.i32 notrap aligned v9 - ; nextln: v10 = ireduce.i8 v22 - ; nextln: v11 = raw_bitcast.b8 v10 - ; nextln: v12 = breduce.b1 v11 - ; nextln: v4 -> v12 - ; nextln: v23 = uload8.i32 notrap aligned v9+1 - ; nextln: v13 = ireduce.i8 v23 - ; nextln: v14 = raw_bitcast.b8 v13 - ; nextln: v15 = breduce.b1 v14 - ; nextln: v5 -> v15 - ; nextln: v24 = uload8.i32 notrap aligned v9+2 - ; nextln: v16 = ireduce.i8 v24 - ; nextln: v17 = raw_bitcast.b8 v16 - ; nextln: v18 = breduce.b1 v17 - ; nextln: v6 -> v18 - ; nextln: v25 = uload8.i32 notrap aligned v9+3 - ; nextln: v19 = ireduce.i8 v25 - ; nextln: v20 = raw_bitcast.b8 v19 - ; nextln: v21 = breduce.b1 v20 - ; nextln: v7 -> v21 - - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif deleted file mode 100644 index 06d0814dfb..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif +++ /dev/null @@ -1,26 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Indirect calls with many returns. - -function %call_indirect_many_rets(i64) { - ; check: ss0 = sret_slot 32 - - sig0 = () -> i64, i64, i64, i64 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - -block0(v0: i64): - v1, v2, v3, v4 = call_indirect sig0, v0() - ; check: v5 = stack_addr.i64 ss0 - ; nextln: v6 = call_indirect sig0, v0(v5) - ; nextln: v7 = load.i64 notrap aligned v6 - ; nextln: v1 -> v7 - ; nextln: v8 = load.i64 notrap aligned v6+8 - ; nextln: v2 -> v8 - ; nextln: v9 = load.i64 notrap aligned v6+16 - ; nextln: v3 -> v9 - ; nextln: v10 = load.i64 notrap aligned v6+24 - ; nextln: v4 -> v10 - - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif deleted file mode 100644 index aae733ddf4..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif +++ /dev/null @@ -1,24 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Test if arguments are legalized if function uses sret - -function %call_indirect_with_split_arg(i64, i64, i64) { - ; check: ss0 = sret_slot 32 - sig0 = (i128) -> i64, i64, i64, i64 - ; check: sig0 = (i64 [%rsi], i64 [%rdx], i64 sret [%rdi]) -> i64 sret [%rax] fast -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4, v5, v6, v7 = call_indirect sig0, v0(v3) - ; check: v8 = stack_addr.i64 ss0 - ; check: v9 = call_indirect sig0, v0(v1, v2, v8) - ; check: v10 = load.i64 notrap aligned v9 - ; check: v4 -> v10 - ; check: v11 = load.i64 notrap aligned v9+8 - ; check: v5 -> v11 - ; check: v12 = load.i64 notrap aligned v9+16 - ; check: v6 -> v12 - ; check: v13 = load.i64 notrap aligned v9+24 - ; check: v7 -> v13 - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif deleted file mode 100644 index c58102aedc..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif +++ /dev/null @@ -1,61 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Test that we don't reuse `sret` stack slots for multiple calls. We could do -;; this one day, but it would require some care to ensure that we don't have -;; subsequent calls overwrite the results of previous calls. - -function %foo() -> i32, f32 { - ; check: ss0 = sret_slot 20 - ; nextln: ss1 = sret_slot 20 - - fn0 = %f() -> i32, i32, i32, i32, i32 - fn1 = %g() -> f32, f32, f32, f32, f32 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: fn0 = %f sig0 - ; nextln: fn1 = %g sig1 - -block0: - v0, v1, v2, v3, v4 = call fn0() - ; check: v18 = stack_addr.i64 ss0 - ; nextln: v25 = func_addr.i64 fn0 - ; nextln: v19 = call_indirect sig0, v25(v18) - ; nextln: v20 = load.i32 notrap aligned v19 - ; nextln: v0 -> v20 - ; nextln: v21 = load.i32 notrap aligned v19+4 - ; nextln: v1 -> v21 - ; nextln: v22 = load.i32 notrap aligned v19+8 - ; nextln: v2 -> v22 - ; nextln: v23 = load.i32 notrap aligned v19+12 - ; nextln: v3 -> v23 - ; nextln: v24 = load.i32 notrap aligned v19+16 - ; nextln: v4 -> v24 - - v5, v6, v7, v8, v9 = call fn1() - ; check: v26 = stack_addr.i64 ss1 - ; nextln: v33 = func_addr.i64 fn1 - ; nextln: v27 = call_indirect sig1, v33(v26) - ; nextln: v28 = load.f32 notrap aligned v27 - ; nextln: v5 -> v28 - ; nextln: v29 = load.f32 notrap aligned v27+4 - ; nextln: v6 -> v29 - ; nextln: v30 = load.f32 notrap aligned v27+8 - ; nextln: v7 -> v30 - ; nextln: v31 = load.f32 notrap aligned v27+12 - ; nextln: v8 -> v31 - ; nextln: v32 = load.f32 notrap aligned v27+16 - ; nextln: v9 -> v32 - - v10 = iadd v0, v1 - v11 = iadd v2, v3 - v12 = iadd v10, v11 - v13 = iadd v12, v4 - - v14 = fadd v5, v6 - v15 = fadd v7, v8 - v16 = fadd v14, v15 - v17 = fadd v16, v9 - - return v13, v17 -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif deleted file mode 100644 index da9f25ed97..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif +++ /dev/null @@ -1,51 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Need to insert padding after the `i8`s so that the `i32` and `i64` are -;; aligned. - -function %returner() -> i8, i32, i8, i64 { -; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast { - -block0: -; check: block0(v4: i64): - - v0 = iconst.i8 0 - v1 = iconst.i32 1 - v2 = iconst.i8 2 - v3 = iconst.i64 3 - return v0, v1, v2, v3 - ; check: v6 = uextend.i32 v0 - ; nextln: istore8 notrap aligned v6, v4 - ; nextln: store notrap aligned v1, v4+4 - ; nextln: v7 = uextend.i32 v2 - ; nextln: istore8 notrap aligned v7, v4+8 - ; nextln: store notrap aligned v3, v4+16 - ; nextln: return v4 -} - -function %caller() { - ; check: ss0 = sret_slot 24 - - fn0 = %returner() -> i8, i32, i8, i64 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: fn0 = %returner sig0 - -block0: - v0, v1, v2, v3 = call fn0() - ; check: v4 = stack_addr.i64 ss0 - ; nextln: v10 = func_addr.i64 fn0 - ; nextln: v5 = call_indirect sig0, v10(v4) - ; nextln: v11 = uload8.i32 notrap aligned v5 - ; nextln: v6 = ireduce.i8 v11 - ; nextln: v0 -> v6 - ; nextln: v7 = load.i32 notrap aligned v5+4 - ; nextln: v1 -> v7 - ; nextln: v12 = uload8.i32 notrap aligned v5+8 - ; nextln: v8 = ireduce.i8 v12 - ; nextln: v2 -> v8 - ; nextln: v9 = load.i64 notrap aligned v5+16 - ; nextln: v3 -> v9 - - return -} diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index d764b916e5..4046072a23 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -3,7 +3,7 @@ use core::mem; use cranelift_codegen::binemit::{NullRelocSink, NullStackMapSink, NullTrapSink}; use cranelift_codegen::data_value::DataValue; use cranelift_codegen::ir::{condcodes::IntCC, Function, InstBuilder, Signature}; -use cranelift_codegen::isa::{BackendVariant, TargetIsa}; +use cranelift_codegen::isa::TargetIsa; use cranelift_codegen::{ir, settings, CodegenError, Context}; use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext}; use cranelift_native::builder_with_options; @@ -47,9 +47,9 @@ impl SingleFunctionCompiler { } /// Build a [SingleFunctionCompiler] using the host machine's ISA and the passed flags. - pub fn with_host_isa(flags: settings::Flags, variant: BackendVariant) -> Self { - let builder = builder_with_options(variant, true) - .expect("Unable to build a TargetIsa for the current host"); + pub fn with_host_isa(flags: settings::Flags) -> Self { + let builder = + builder_with_options(true).expect("Unable to build a TargetIsa for the current host"); let isa = builder.finish(flags); Self::new(isa) } @@ -58,7 +58,7 @@ impl SingleFunctionCompiler { /// ISA. pub fn with_default_host_isa() -> Self { let flags = settings::Flags::new(settings::builder()); - Self::with_host_isa(flags, BackendVariant::Any) + Self::with_host_isa(flags) } /// Compile the passed [Function] to a `CompiledFunction`. This function will: @@ -248,7 +248,7 @@ fn compile(function: Function, isa: &dyn TargetIsa) -> Result anyhow::Result> { match parsed.command { - "binemit" => test_binemit::subtest(parsed), "cat" => test_cat::subtest(parsed), "compile" => test_compile::subtest(parsed), "dce" => test_dce::subtest(parsed), @@ -127,17 +120,12 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result "legalizer" => test_legalizer::subtest(parsed), "licm" => test_licm::subtest(parsed), "peepmatic" => test_peepmatic::subtest(parsed), - "postopt" => test_postopt::subtest(parsed), "preopt" => test_preopt::subtest(parsed), "print-cfg" => test_print_cfg::subtest(parsed), - "regalloc" => test_regalloc::subtest(parsed), - "rodata" => test_rodata::subtest(parsed), "run" => test_run::subtest(parsed), "safepoint" => test_safepoint::subtest(parsed), - "shrink" => test_shrink::subtest(parsed), "simple-gvn" => test_simple_gvn::subtest(parsed), "simple_preopt" => test_simple_preopt::subtest(parsed), - "stack_maps" => test_stack_maps::subtest(parsed), "unwind" => test_unwind::subtest(parsed), "verifier" => test_verifier::subtest(parsed), _ => anyhow::bail!("unknown test command '{}'", parsed.command), @@ -146,9 +134,8 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result fn pretty_anyhow_error( func: &cranelift_codegen::ir::Function, - isa: Option<&dyn cranelift_codegen::isa::TargetIsa>, err: cranelift_codegen::CodegenError, ) -> anyhow::Error { - let s = cranelift_codegen::print_errors::pretty_error(func, isa, err); + let s = cranelift_codegen::print_errors::pretty_error(func, err); anyhow::anyhow!("{}", s) } diff --git a/cranelift/filetests/src/runone.rs b/cranelift/filetests/src/runone.rs index 5c1ff01ec9..59bebcba77 100644 --- a/cranelift/filetests/src/runone.rs +++ b/cranelift/filetests/src/runone.rs @@ -179,9 +179,8 @@ fn run_one_test<'a>( // Should we run the verifier before this test? if !context.verified && test.needs_verifier() { - verify_function(&func, context.flags_or_isa()).map_err(|errors| { - anyhow::anyhow!("{}", pretty_verifier_error(&func, isa, None, errors)) - })?; + verify_function(&func, context.flags_or_isa()) + .map_err(|errors| anyhow::anyhow!("{}", pretty_verifier_error(&func, None, errors)))?; context.verified = true; } diff --git a/cranelift/filetests/src/test_binemit.rs b/cranelift/filetests/src/test_binemit.rs deleted file mode 100644 index 9f8db9ebb9..0000000000 --- a/cranelift/filetests/src/test_binemit.rs +++ /dev/null @@ -1,335 +0,0 @@ -//! Test command for testing the binary machine code emission. -//! -//! The `binemit` test command generates binary machine code for every instruction in the input -//! functions and compares the results to the expected output. - -use crate::match_directive::match_directive; -use crate::subtest::{Context, SubTest}; -use cranelift_codegen::binemit::{self, CodeInfo, CodeSink, RegDiversions}; -use cranelift_codegen::dbg::DisplayList; -use cranelift_codegen::dominator_tree::DominatorTree; -use cranelift_codegen::flowgraph::ControlFlowGraph; -use cranelift_codegen::ir; -use cranelift_codegen::ir::entities::AnyEntity; -use cranelift_codegen::isa; -use cranelift_codegen::settings::OptLevel; -use cranelift_reader::TestCommand; -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt::Write; - -struct TestBinEmit; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "binemit"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed) - } else { - Ok(Box::new(TestBinEmit)) - } -} - -/// Code sink that generates text. -struct TextSink { - offset: binemit::CodeOffset, - text: String, -} - -impl TextSink { - /// Create a new empty TextSink. - pub fn new() -> Self { - Self { - offset: 0, - text: String::new(), - } - } -} - -impl binemit::CodeSink for TextSink { - fn offset(&self) -> binemit::CodeOffset { - self.offset - } - - fn put1(&mut self, x: u8) { - write!(self.text, "{:02x} ", x).unwrap(); - self.offset += 1; - } - - fn put2(&mut self, x: u16) { - write!(self.text, "{:04x} ", x).unwrap(); - self.offset += 2; - } - - fn put4(&mut self, x: u32) { - write!(self.text, "{:08x} ", x).unwrap(); - self.offset += 4; - } - - fn put8(&mut self, x: u64) { - write!(self.text, "{:016x} ", x).unwrap(); - self.offset += 8; - } - - fn reloc_external( - &mut self, - _srcloc: ir::SourceLoc, - reloc: binemit::Reloc, - name: &ir::ExternalName, - addend: binemit::Addend, - ) { - write!(self.text, "{}({}", reloc, name).unwrap(); - if addend != 0 { - write!(self.text, "{:+}", addend).unwrap(); - } - write!(self.text, ") ").unwrap(); - } - - fn reloc_constant(&mut self, reloc: binemit::Reloc, constant: ir::ConstantOffset) { - write!(self.text, "{}({}) ", reloc, constant).unwrap(); - } - - fn reloc_jt(&mut self, reloc: binemit::Reloc, jt: ir::JumpTable) { - write!(self.text, "{}({}) ", reloc, jt).unwrap(); - } - - fn trap(&mut self, code: ir::TrapCode, _srcloc: ir::SourceLoc) { - write!(self.text, "{} ", code).unwrap(); - } - - fn begin_jumptables(&mut self) {} - fn begin_rodata(&mut self) {} - fn end_codegen(&mut self) {} - fn add_stack_map( - &mut self, - _: &[ir::entities::Value], - _: &ir::Function, - _: &dyn isa::TargetIsa, - ) { - } -} - -impl SubTest for TestBinEmit { - fn name(&self) -> &'static str { - "binemit" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("binemit needs an ISA"); - let encinfo = isa.encoding_info(); - // TODO: Run a verifier pass over the code first to detect any bad encodings or missing/bad - // value locations. The current error reporting is just crashing... - let mut func = func.into_owned(); - - // Fix the stack frame layout so we can test spill/fill encodings. - let min_offset = func - .stack_slots - .values() - .map(|slot| slot.offset.unwrap()) - .min(); - func.stack_slots.layout_info = min_offset.map(|off| ir::StackLayoutInfo { - frame_size: (-off) as u32, - inbound_args_size: 0, - }); - - let opt_level = isa.flags().opt_level(); - - // Give an encoding to any instruction that doesn't already have one. - let mut divert = RegDiversions::new(); - for block in func.layout.blocks() { - divert.clear(); - for inst in func.layout.block_insts(block) { - if !func.encodings[inst].is_legal() { - // Find an encoding that satisfies both immediate field and register - // constraints. - if let Some(enc) = { - let mut legal_encodings = isa - .legal_encodings(&func, &func.dfg[inst], func.dfg.ctrl_typevar(inst)) - .filter(|e| { - let recipe_constraints = &encinfo.constraints[e.recipe()]; - recipe_constraints.satisfied(inst, &divert, &func) - }); - - if opt_level == OptLevel::SpeedAndSize { - // Get the smallest legal encoding - legal_encodings - .min_by_key(|&e| encinfo.byte_size(e, inst, &divert, &func)) - } else { - // If not optimizing, just use the first encoding. - legal_encodings.next() - } - } { - func.encodings[inst] = enc; - } - } - divert.apply(&func.dfg[inst]); - } - } - - // Relax branches and compute block offsets based on the encodings. - let mut cfg = ControlFlowGraph::with_function(&func); - let mut domtree = DominatorTree::with_function(&func, &cfg); - let CodeInfo { total_size, .. } = - binemit::relax_branches(&mut func, &mut cfg, &mut domtree, isa) - .map_err(|e| crate::pretty_anyhow_error(&func, context.isa, e))?; - - // Collect all of the 'bin:' directives on instructions. - let mut bins = HashMap::new(); - for comment in &context.details.comments { - if let Some(want) = match_directive(comment.text, "bin:") { - match comment.entity { - AnyEntity::Inst(inst) => { - if let Some(prev) = bins.insert(inst, want) { - anyhow::bail!( - "multiple 'bin:' directives on {}: '{}' and '{}'", - func.dfg.display_inst(inst, isa), - prev, - want - ); - } - } - _ => { - anyhow::bail!( - "'bin:' directive on non-inst {}: {}", - comment.entity, - comment.text - ); - } - } - } - } - if bins.is_empty() { - anyhow::bail!("No 'bin:' directives found"); - } - - // Now emit all instructions. - let mut sink = TextSink::new(); - for block in func.layout.blocks() { - divert.clear(); - // Correct header offsets should have been computed by `relax_branches()`. - assert_eq!( - sink.offset, func.offsets[block], - "Inconsistent {} header offset", - block - ); - for (offset, inst, enc_bytes) in func.inst_offsets(block, &encinfo) { - assert_eq!(sink.offset, offset); - sink.text.clear(); - let enc = func.encodings[inst]; - - // Send legal encodings into the emitter. - if enc.is_legal() { - // Generate a better error message if output locations are not specified. - validate_location_annotations(&func, inst, isa, false)?; - - let before = sink.offset; - isa.emit_inst(&func, inst, &mut divert, &mut sink); - let emitted = sink.offset - before; - // Verify the encoding recipe sizes against the ISAs emit_inst implementation. - assert_eq!( - emitted, - enc_bytes, - "Inconsistent size for [{}] {}", - encinfo.display(enc), - func.dfg.display_inst(inst, isa) - ); - } - - // Check against bin: directives. - if let Some(want) = bins.remove(&inst) { - if !enc.is_legal() { - // A possible cause of an unencoded instruction is a missing location for - // one of the input/output operands. - validate_location_annotations(&func, inst, isa, true)?; - validate_location_annotations(&func, inst, isa, false)?; - - // Do any encodings exist? - let encodings = isa - .legal_encodings(&func, &func.dfg[inst], func.dfg.ctrl_typevar(inst)) - .map(|e| encinfo.display(e)) - .collect::>(); - - if encodings.is_empty() { - anyhow::bail!( - "No encodings found for: {}", - func.dfg.display_inst(inst, isa) - ); - } - anyhow::bail!( - "No matching encodings for {} in {}", - func.dfg.display_inst(inst, isa), - DisplayList(&encodings), - ); - } - let have = sink.text.trim(); - if have != want { - anyhow::bail!( - "Bad machine code for {}: {}\nWant: {}\nGot: {}", - inst, - func.dfg.display_inst(inst, isa), - want, - have - ); - } - } - } - } - - sink.begin_jumptables(); - - for (jt, jt_data) in func.jump_tables.iter() { - let jt_offset = func.jt_offsets[jt]; - for block in jt_data.iter() { - let rel_offset: i32 = func.offsets[*block] as i32 - jt_offset as i32; - sink.put4(rel_offset as u32) - } - } - - sink.begin_rodata(); - - // output constants - for (_, constant_data) in func.dfg.constants.iter() { - for byte in constant_data.iter() { - sink.put1(*byte) - } - } - - sink.end_codegen(); - - if sink.offset != total_size { - anyhow::bail!("Expected code size {}, got {}", total_size, sink.offset); - } - - Ok(()) - } -} - -/// Validate registers/stack slots are correctly annotated. -fn validate_location_annotations( - func: &ir::Function, - inst: ir::Inst, - isa: &dyn isa::TargetIsa, - validate_inputs: bool, -) -> anyhow::Result<()> { - let values = if validate_inputs { - func.dfg.inst_args(inst) - } else { - func.dfg.inst_results(inst) - }; - - if let Some(&v) = values.iter().find(|&&v| !func.locations[v].is_assigned()) { - anyhow::bail!( - "Need register/stack slot annotation for {} in {}", - v, - func.dfg.display_inst(inst, isa) - ); - } - Ok(()) -} diff --git a/cranelift/filetests/src/test_cat.rs b/cranelift/filetests/src/test_cat.rs index adf16e9403..fe25ea4935 100644 --- a/cranelift/filetests/src/test_cat.rs +++ b/cranelift/filetests/src/test_cat.rs @@ -31,6 +31,6 @@ impl SubTest for TestCat { } fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - subtest::run_filecheck(&func.display(context.isa).to_string(), context) + subtest::run_filecheck(&func.display().to_string(), context) } } diff --git a/cranelift/filetests/src/test_compile.rs b/cranelift/filetests/src/test_compile.rs index b66f8eca66..c0115f3d47 100644 --- a/cranelift/filetests/src/test_compile.rs +++ b/cranelift/filetests/src/test_compile.rs @@ -6,7 +6,6 @@ use crate::subtest::{run_filecheck, Context, SubTest}; use cranelift_codegen; use cranelift_codegen::binemit::{self, CodeInfo}; use cranelift_codegen::ir; -use cranelift_codegen::isa; use cranelift_reader::TestCommand; use log::info; use std::borrow::Cow; @@ -38,48 +37,27 @@ impl SubTest for TestCompile { let isa = context.isa.expect("compile needs an ISA"); let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - if isa.get_mach_backend().is_some() { - // With `MachBackend`s, we need to explicitly request dissassembly results. - comp_ctx.set_disasm(true); - } + // With `MachBackend`s, we need to explicitly request dissassembly results. + comp_ctx.set_disasm(true); let CodeInfo { total_size, .. } = comp_ctx .compile(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; info!( "Generated {} bytes of code:\n{}", total_size, - comp_ctx.func.display(isa) + comp_ctx.func.display() ); - if !isa.get_mach_backend().is_some() { - // Verify that the returned code size matches the emitted bytes. - let mut sink = SizeSink { offset: 0 }; - binemit::emit_function( - &comp_ctx.func, - |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), - &mut sink, - isa, - ); - - if sink.offset != total_size { - anyhow::bail!("Expected code size {}, got {}", total_size, sink.offset); - } - - // Run final code through filecheck. - let text = comp_ctx.func.display(Some(isa)).to_string(); - run_filecheck(&text, context) - } else { - let disasm = comp_ctx - .mach_compile_result - .as_ref() - .unwrap() - .disasm - .as_ref() - .unwrap(); - run_filecheck(&disasm, context) - } + let disasm = comp_ctx + .mach_compile_result + .as_ref() + .unwrap() + .disasm + .as_ref() + .unwrap(); + run_filecheck(&disasm, context) } } @@ -117,17 +95,8 @@ impl binemit::CodeSink for SizeSink { _addend: binemit::Addend, ) { } - fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} - fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} fn begin_jumptables(&mut self) {} fn begin_rodata(&mut self) {} fn end_codegen(&mut self) {} - fn add_stack_map( - &mut self, - _: &[ir::entities::Value], - _: &ir::Function, - _: &dyn isa::TargetIsa, - ) { - } } diff --git a/cranelift/filetests/src/test_dce.rs b/cranelift/filetests/src/test_dce.rs index 826f7cacc9..48d28c7e72 100644 --- a/cranelift/filetests/src/test_dce.rs +++ b/cranelift/filetests/src/test_dce.rs @@ -37,9 +37,9 @@ impl SubTest for TestDCE { comp_ctx.compute_loop_analysis(); comp_ctx .dce(context.flags_or_isa()) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; - let text = comp_ctx.func.display(context.isa).to_string(); + let text = comp_ctx.func.display().to_string(); run_filecheck(&text, context) } } diff --git a/cranelift/filetests/src/test_legalizer.rs b/cranelift/filetests/src/test_legalizer.rs index f161226127..8af819bed2 100644 --- a/cranelift/filetests/src/test_legalizer.rs +++ b/cranelift/filetests/src/test_legalizer.rs @@ -39,9 +39,9 @@ impl SubTest for TestLegalizer { comp_ctx.compute_cfg(); comp_ctx .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; - let text = comp_ctx.func.display(Some(isa)).to_string(); + let text = comp_ctx.func.display().to_string(); run_filecheck(&text, context) } } diff --git a/cranelift/filetests/src/test_licm.rs b/cranelift/filetests/src/test_licm.rs index ad23d281d1..2ca245055a 100644 --- a/cranelift/filetests/src/test_licm.rs +++ b/cranelift/filetests/src/test_licm.rs @@ -42,9 +42,9 @@ impl SubTest for TestLICM { comp_ctx.compute_loop_analysis(); comp_ctx .licm(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; - let text = comp_ctx.func.display(context.isa).to_string(); + let text = comp_ctx.func.display().to_string(); run_filecheck(&text, context) } } diff --git a/cranelift/filetests/src/test_peepmatic.rs b/cranelift/filetests/src/test_peepmatic.rs index 5d228239a0..6efe42e00e 100644 --- a/cranelift/filetests/src/test_peepmatic.rs +++ b/cranelift/filetests/src/test_peepmatic.rs @@ -37,8 +37,8 @@ impl SubTest for TestPreopt { comp_ctx.compute_cfg(); comp_ctx .preopt(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; - let text = &comp_ctx.func.display(isa).to_string(); + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; + let text = &comp_ctx.func.display().to_string(); log::debug!("After peepmatic-based simple_preopt:\n{}", text); // Only actually run the filecheck if peepmatic is enabled, because it diff --git a/cranelift/filetests/src/test_postopt.rs b/cranelift/filetests/src/test_postopt.rs deleted file mode 100644 index ff7726d9c8..0000000000 --- a/cranelift/filetests/src/test_postopt.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! Test command for testing the postopt pass. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestPostopt; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "postopt"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestPostopt)) -} - -impl SubTest for TestPostopt { - fn name(&self) -> &'static str { - "postopt" - } - - fn needs_isa(&self) -> bool { - true - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - let isa = context.isa.expect("postopt needs an ISA"); - - comp_ctx.flowgraph(); - comp_ctx - .postopt(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; - - let text = comp_ctx.func.display(isa).to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_preopt.rs b/cranelift/filetests/src/test_preopt.rs index 072bd7a2ad..b9d9e9d60c 100644 --- a/cranelift/filetests/src/test_preopt.rs +++ b/cranelift/filetests/src/test_preopt.rs @@ -40,9 +40,9 @@ impl SubTest for TestPreopt { let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); optimize(&mut comp_ctx, isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; - let text = comp_ctx.func.display(context.isa).to_string(); + let text = comp_ctx.func.display().to_string(); run_filecheck(&text, context) } } diff --git a/cranelift/filetests/src/test_regalloc.rs b/cranelift/filetests/src/test_regalloc.rs deleted file mode 100644 index f0f4025560..0000000000 --- a/cranelift/filetests/src/test_regalloc.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Test command for testing the register allocator. -//! -//! The `regalloc` test command runs each function through the register allocator after ensuring -//! that all instructions are legal for the target. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestRegalloc; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "regalloc"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestRegalloc)) -} - -impl SubTest for TestRegalloc { - fn name(&self) -> &'static str { - "regalloc" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("register allocator needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx.compute_cfg(); - // TODO: Should we have an option to skip legalization? - comp_ctx - .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - comp_ctx.compute_domtree(); - comp_ctx - .regalloc(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - - let text = comp_ctx.func.display(Some(isa)).to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_rodata.rs b/cranelift/filetests/src/test_rodata.rs deleted file mode 100644 index 83b10b4e08..0000000000 --- a/cranelift/filetests/src/test_rodata.rs +++ /dev/null @@ -1,133 +0,0 @@ -//! Test command for verifying the rodata emitted after each function -//! -//! The `rodata` test command runs each function through the full code generator pipeline - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::binemit::{self, CodeInfo}; -use cranelift_codegen::ir; -use cranelift_codegen::ir::{Function, Value}; -use cranelift_codegen::isa::TargetIsa; -use cranelift_reader::TestCommand; -use log::info; -use std::borrow::Cow; - -struct TestRodata; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "rodata"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestRodata)) -} - -impl SubTest for TestRodata { - fn name(&self) -> &'static str { - "rodata" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("rodata needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - let CodeInfo { total_size, .. } = comp_ctx - .compile(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - - info!( - "Generated {} bytes of code:\n{}", - total_size, - comp_ctx.func.display(isa) - ); - - // Verify that the returned code size matches the emitted bytes. - let mut sink = RodataSink::default(); - binemit::emit_function( - &comp_ctx.func, - |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), - &mut sink, - isa, - ); - - // Run final code through filecheck. - let text = format!("{:X?}", sink.rodata); - info!("Found rodata: {}", text); - run_filecheck(&text, context) - } -} - -/// Code sink that only captures emitted rodata -#[derive(Default)] -struct RodataSink { - offset: usize, - rodata: Vec, - in_rodata: bool, -} - -impl binemit::CodeSink for RodataSink { - fn offset(&self) -> binemit::CodeOffset { - self.offset as u32 - } - - fn put1(&mut self, byte: u8) { - self.offset += 1; - if self.in_rodata { - self.rodata.push(byte); - } - } - - fn put2(&mut self, bytes: u16) { - self.offset += 2; - if self.in_rodata { - self.rodata.extend_from_slice(&bytes.to_be_bytes()); - } - } - - fn put4(&mut self, bytes: u32) { - self.offset += 4; - if self.in_rodata { - self.rodata.extend_from_slice(&bytes.to_be_bytes()); - } - } - - fn put8(&mut self, bytes: u64) { - self.offset += 8; - if self.in_rodata { - self.rodata.extend_from_slice(&bytes.to_be_bytes()); - } - } - - fn reloc_external( - &mut self, - _: ir::SourceLoc, - _: binemit::Reloc, - _: &ir::ExternalName, - _: binemit::Addend, - ) { - } - fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} - fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} - fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} - fn begin_jumptables(&mut self) { - assert!(!self.in_rodata, "Jump tables must be emitted before rodata"); - } - fn begin_rodata(&mut self) { - self.in_rodata = true; - } - fn end_codegen(&mut self) { - assert!( - self.in_rodata, - "Expected rodata to be emitted before the end of codegen" - ); - } - fn add_stack_map(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa) {} -} diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 4b9e528cfd..86b346e21b 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -48,11 +48,10 @@ impl SubTest for TestRun { ); return Ok(()); } - let variant = context.isa.unwrap().variant(); let test_env = RuntestEnvironment::parse(&context.details.comments[..])?; - let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone(), variant); + let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone()); for comment in context.details.comments.iter() { if let Some(command) = parse_run_command(comment.text, &func.signature)? { trace!("Parsed run command: {}", command); diff --git a/cranelift/filetests/src/test_safepoint.rs b/cranelift/filetests/src/test_safepoint.rs index 90d155ad1e..49676e01be 100644 --- a/cranelift/filetests/src/test_safepoint.rs +++ b/cranelift/filetests/src/test_safepoint.rs @@ -25,13 +25,10 @@ impl SubTest for TestSafepoint { comp_ctx.compute_cfg(); comp_ctx .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; comp_ctx.compute_domtree(); - comp_ctx - .regalloc(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - let text = comp_ctx.func.display(context.isa).to_string(); + let text = comp_ctx.func.display().to_string(); run_filecheck(&text, context) } } diff --git a/cranelift/filetests/src/test_shrink.rs b/cranelift/filetests/src/test_shrink.rs deleted file mode 100644 index e3b971b66a..0000000000 --- a/cranelift/filetests/src/test_shrink.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Test command for testing the Shrink pass. -//! -//! The `shrink` test command runs each function through the Shrink pass after ensuring -//! that all instructions are legal for the target. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestShrink; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "shrink"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestShrink)) -} - -impl SubTest for TestShrink { - fn name(&self) -> &'static str { - "shrink" - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("shrink needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx - .shrink_instructions(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; - - let text = comp_ctx.func.display(isa).to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_simple_gvn.rs b/cranelift/filetests/src/test_simple_gvn.rs index 66baa0b7a5..bb563f4315 100644 --- a/cranelift/filetests/src/test_simple_gvn.rs +++ b/cranelift/filetests/src/test_simple_gvn.rs @@ -36,9 +36,9 @@ impl SubTest for TestSimpleGVN { comp_ctx.flowgraph(); comp_ctx .simple_gvn(context.flags_or_isa()) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; - let text = comp_ctx.func.display(context.isa).to_string(); + let text = comp_ctx.func.display().to_string(); run_filecheck(&text, context) } } diff --git a/cranelift/filetests/src/test_simple_preopt.rs b/cranelift/filetests/src/test_simple_preopt.rs index 2187afe0eb..01fef79b82 100644 --- a/cranelift/filetests/src/test_simple_preopt.rs +++ b/cranelift/filetests/src/test_simple_preopt.rs @@ -38,8 +38,8 @@ impl SubTest for TestSimplePreopt { comp_ctx.compute_cfg(); comp_ctx .preopt(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - let text = &comp_ctx.func.display(isa).to_string(); + .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?; + let text = &comp_ctx.func.display().to_string(); log::debug!("After simple_preopt:\n{}", text); // Only actually run the filecheck if peepmatic is *not* enabled, diff --git a/cranelift/filetests/src/test_stack_maps.rs b/cranelift/filetests/src/test_stack_maps.rs deleted file mode 100644 index 0f09966a85..0000000000 --- a/cranelift/filetests/src/test_stack_maps.rs +++ /dev/null @@ -1,109 +0,0 @@ -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen::binemit::{self, Addend, CodeOffset, CodeSink, Reloc, StackMap}; -use cranelift_codegen::ir::*; -use cranelift_codegen::isa::TargetIsa; -use cranelift_reader::TestCommand; -use std::borrow::Cow; -use std::fmt::Write; - -struct TestStackMaps; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "stack_maps"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestStackMaps)) -} - -impl SubTest for TestStackMaps { - fn name(&self) -> &'static str { - "stack_maps" - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx - .compile(context.isa.expect("`test stack_maps` requires an isa")) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - - let mut sink = TestStackMapsSink::default(); - binemit::emit_function( - &comp_ctx.func, - |func, inst, div, sink, isa| { - if func.dfg[inst].opcode() == Opcode::Safepoint { - writeln!(&mut sink.text, "{}", func.dfg.display_inst(inst, isa)).unwrap(); - } - isa.emit_inst(func, inst, div, sink) - }, - &mut sink, - context.isa.expect("`test stack_maps` requires an isa"), - ); - - let mut text = comp_ctx.func.display(context.isa).to_string(); - text.push('\n'); - text.push_str("Stack maps:\n"); - text.push('\n'); - text.push_str(&sink.text); - - run_filecheck(&text, context) - } -} - -#[derive(Default)] -struct TestStackMapsSink { - offset: u32, - text: String, -} - -impl CodeSink for TestStackMapsSink { - fn offset(&self) -> CodeOffset { - self.offset - } - - fn put1(&mut self, _: u8) { - self.offset += 1; - } - - fn put2(&mut self, _: u16) { - self.offset += 2; - } - - fn put4(&mut self, _: u32) { - self.offset += 4; - } - - fn put8(&mut self, _: u64) { - self.offset += 8; - } - - fn reloc_external(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {} - fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset) {} - fn reloc_jt(&mut self, _: Reloc, _: JumpTable) {} - fn trap(&mut self, _: TrapCode, _: SourceLoc) {} - fn begin_jumptables(&mut self) {} - fn begin_rodata(&mut self) {} - fn end_codegen(&mut self) {} - - fn add_stack_map(&mut self, val_list: &[Value], func: &Function, isa: &dyn TargetIsa) { - let map = StackMap::from_values(&val_list, func, isa); - - writeln!(&mut self.text, " - mapped words: {}", map.mapped_words()).unwrap(); - write!(&mut self.text, " - live: [").unwrap(); - - let mut needs_comma_space = false; - for i in 0..(map.mapped_words() as usize) { - if map.get_bit(i) { - if needs_comma_space { - write!(&mut self.text, ", ").unwrap(); - } - needs_comma_space = true; - - write!(&mut self.text, "{}", i).unwrap(); - } - } - - writeln!(&mut self.text, "]").unwrap(); - } -} diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 63ccdcb56b..7e72608b51 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -4,14 +4,13 @@ use crate::variable::Variable; use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::entity::{EntitySet, SecondaryMap}; use cranelift_codegen::ir; -use cranelift_codegen::ir::function::DisplayFunction; use cranelift_codegen::ir::{ types, AbiParam, Block, DataFlowGraph, ExtFuncData, ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, Inst, InstBuilder, InstBuilderBase, InstructionData, JumpTable, JumpTableData, LibCall, MemFlags, SigRef, Signature, StackSlot, StackSlotData, Type, Value, ValueLabel, ValueLabelAssignments, ValueLabelStart, }; -use cranelift_codegen::isa::{TargetFrontendConfig, TargetIsa}; +use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_codegen::packed_option::PackedOption; /// Structure used for translating a series of functions into Cranelift IR. @@ -481,7 +480,7 @@ impl<'a> FunctionBuilder<'a> { // Iterate manually to provide more helpful error messages. for block in self.func_ctx.blocks.keys() { if let Err((inst, _msg)) = self.func.is_block_basic(block) { - let inst_str = self.func.dfg.display_inst(inst, None); + let inst_str = self.func.dfg.display_inst(inst); panic!("{} failed basic block invariants on {}", block, inst_str); } } @@ -579,15 +578,6 @@ impl<'a> FunctionBuilder<'a> { pub fn is_filled(&self) -> bool { self.func_ctx.blocks[self.position.unwrap()].filled } - - /// Returns a displayable object for the function as it is. - /// - /// Useful for debug purposes. Use it with `None` for standard printing. - // Clippy thinks the lifetime that follows is needless, but rustc needs it - #[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_lifetimes))] - pub fn display<'b, I: Into>>(&'b self, isa: I) -> DisplayFunction { - self.func.display(isa) - } } /// Helper functions @@ -955,7 +945,7 @@ mod tests { let flags = settings::Flags::new(settings::builder()); // println!("{}", func.display(None)); if let Err(errors) = verify_function(&func, &flags) { - panic!("{}\n{}", func.display(None), errors) + panic!("{}\n{}", func.display(), errors) } } @@ -1009,7 +999,7 @@ mod tests { } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i32 system_v { sig0 = (i64, i64, i64) system_v fn0 = %Memcpy sig0 @@ -1065,7 +1055,7 @@ block0: } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i32 system_v { block0: v4 = iconst.i64 0 @@ -1119,7 +1109,7 @@ block0: } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i32 system_v { sig0 = (i64, i64, i64) system_v fn0 = %Memcpy sig0 @@ -1164,7 +1154,7 @@ block0: } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i32 system_v { block0: v2 = iconst.i64 0 @@ -1204,7 +1194,7 @@ block0: } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i32 system_v { sig0 = (i64, i32, i64) system_v fn0 = %Memset sig0 @@ -1253,7 +1243,7 @@ block0: } assert_eq!( - func.display(None).to_string(), + func.display().to_string(), "function %sample() -> i8x16, b8x16, f32x4 system_v { const0 = 0x00000000000000000000000000000000 diff --git a/cranelift/frontend/src/lib.rs b/cranelift/frontend/src/lib.rs index 40bd27bfbc..73928d6d79 100644 --- a/cranelift/frontend/src/lib.rs +++ b/cranelift/frontend/src/lib.rs @@ -156,7 +156,7 @@ //! //! let flags = settings::Flags::new(settings::builder()); //! let res = verify_function(&func, &flags); -//! println!("{}", func.display(None)); +//! println!("{}", func.display()); //! if let Err(errors) = res { //! panic!("{}", errors); //! } diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index c8cfcecf98..eb6adc770f 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -451,17 +451,11 @@ where Opcode::Spill => unimplemented!("Spill"), Opcode::Fill => unimplemented!("Fill"), Opcode::FillNop => assign(arg(0)?), - Opcode::DummySargT => unimplemented!("DummySargT"), - Opcode::Regmove => ControlFlow::Continue, - Opcode::CopySpecial => ControlFlow::Continue, - Opcode::CopyToSsa => assign(arg(0)?), Opcode::CopyNop => unimplemented!("CopyNop"), Opcode::AdjustSpDown => unimplemented!("AdjustSpDown"), Opcode::AdjustSpUpImm => unimplemented!("AdjustSpUpImm"), Opcode::AdjustSpDownImm => unimplemented!("AdjustSpDownImm"), Opcode::IfcmpSp => unimplemented!("IfcmpSp"), - Opcode::Regspill => unimplemented!("Regspill"), - Opcode::Regfill => unimplemented!("Regfill"), Opcode::Safepoint => unimplemented!("Safepoint"), Opcode::Icmp => assign(icmp( ctrl_ty, @@ -557,7 +551,30 @@ where false, )?), Opcode::Ineg => binary(Value::sub, Value::int(0, ctrl_ty)?, arg(0)?)?, - Opcode::Iabs => unimplemented!("Iabs"), + Opcode::Iabs => { + let (min_val, _) = ctrl_ty.lane_type().bounds(true); + let min_val: V = Value::int(min_val as i128, ctrl_ty.lane_type())?; + if ctrl_ty.is_vector() { + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let new_vec = arg0 + .into_iter() + .map(|lane| { + if Value::eq(&lane, &min_val)? { + Ok(min_val.clone()) + } else { + Value::int(lane.into_int()?.abs(), ctrl_ty.lane_type()) + } + }) + .collect::>>()?; + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } else { + assign(if Value::eq(&arg(0)?, &min_val)? { + min_val.clone() + } else { + Value::int(arg(0)?.into_int()?.abs(), ctrl_ty.lane_type())? + }) + } + } Opcode::Imul => binary(Value::mul, arg(0)?, arg(1)?)?, Opcode::Umulhi | Opcode::Smulhi => { let double_length = match ctrl_ty.lane_bits() { @@ -767,24 +784,57 @@ where | Opcode::Breduce | Opcode::Bextend | Opcode::Bint - | Opcode::Bmask | Opcode::Ireduce => assign(Value::convert( arg(0)?, ValueConversionKind::Exact(ctrl_ty), )?), - Opcode::Snarrow => assign(Value::convert( - arg(0)?, - ValueConversionKind::Truncate(ctrl_ty), - )?), + Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => { + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let new_type = ctrl_ty.split_lanes().unwrap(); + let (min, max) = new_type.bounds(inst.opcode() == Opcode::Snarrow); + let mut min: V = Value::int(min as i128, ctrl_ty.lane_type())?; + let mut max: V = Value::int(max as i128, ctrl_ty.lane_type())?; + if inst.opcode() == Opcode::Uunarrow { + min = min.convert(ValueConversionKind::ToUnsigned)?; + max = max.convert(ValueConversionKind::ToUnsigned)?; + } + let narrow = |mut lane: V| -> ValueResult { + if inst.opcode() == Opcode::Uunarrow { + lane = lane.convert(ValueConversionKind::ToUnsigned)?; + } + lane = Value::max(lane, min.clone())?; + lane = Value::min(lane, max.clone())?; + lane = lane.convert(ValueConversionKind::Truncate(new_type.lane_type()))?; + if inst.opcode() == Opcode::Unarrow || inst.opcode() == Opcode::Uunarrow { + lane = lane.convert(ValueConversionKind::ToUnsigned)?; + } + Ok(lane) + }; + let new_vec = arg0 + .into_iter() + .chain(arg1) + .map(|lane| narrow(lane)) + .collect::>>()?; + assign(vectorizelanes(&new_vec, new_type)?) + } + Opcode::Bmask => assign({ + let bool = arg(0)?; + let bool_ty = ctrl_ty.as_bool_pedantic(); + if ctrl_ty.is_vector() { + let lanes = extractlanes(&bool, bool_ty.lane_type())? + .into_iter() + .map(|lane| lane.convert(ValueConversionKind::Exact(ctrl_ty.lane_type()))) + .collect::>>()?; + vectorizelanes(&lanes, ctrl_ty)? + } else { + bool.convert(ValueConversionKind::Exact(ctrl_ty))? + } + }), Opcode::Sextend => assign(Value::convert( arg(0)?, ValueConversionKind::SignExtend(ctrl_ty), )?), - Opcode::Unarrow => assign(Value::convert( - arg(0)?, - ValueConversionKind::Truncate(ctrl_ty), - )?), - Opcode::Uunarrow => unimplemented!("Uunarrow"), Opcode::Uextend => assign(Value::convert( arg(0)?, ValueConversionKind::ZeroExtend(ctrl_ty), @@ -840,10 +890,37 @@ where let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; assign(lanes[idx].clone()) } - Opcode::VhighBits => unimplemented!("VhighBits"), + Opcode::VhighBits => { + // `ctrl_ty` controls the return type for this, so the input type + // must be retrieved via `inst_context`. + let lane_type = inst_context + .type_of(inst_context.args()[0]) + .unwrap() + .lane_type(); + let a = extractlanes(&arg(0)?, lane_type)?; + let mut result: i128 = 0; + for (i, val) in a.into_iter().enumerate() { + let val = val.reverse_bits()?.into_int()?; // MSB -> LSB + result |= (val & 1) << i; + } + assign(Value::int(result, ctrl_ty)?) + } Opcode::Vsplit => unimplemented!("Vsplit"), Opcode::Vconcat => unimplemented!("Vconcat"), - Opcode::Vselect => unimplemented!("Vselect"), + Opcode::Vselect => { + let c = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let x = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let y = extractlanes(&arg(2)?, ctrl_ty.lane_type())?; + let mut new_vec = SimdVec::new(); + for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) { + if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? { + new_vec.push(y); + } else { + new_vec.push(x); + } + } + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } Opcode::VanyTrue => assign(fold_vector( arg(0)?, ctrl_ty, @@ -900,48 +977,56 @@ where Opcode::AtomicLoad => unimplemented!("AtomicLoad"), Opcode::AtomicStore => unimplemented!("AtomicStore"), Opcode::Fence => unimplemented!("Fence"), - Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"), - Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), + Opcode::WideningPairwiseDotProductS => { + let ctrl_ty = types::I16X8; + let new_type = ctrl_ty.merge_lanes().unwrap(); + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + let new_vec = arg0 + .chunks(2) + .into_iter() + .zip(arg1.chunks(2)) + .into_iter() + .map(|(x, y)| { + let mut z = 0i128; + for (lhs, rhs) in x.into_iter().zip(y.into_iter()) { + z += lhs.clone().into_int()? * rhs.clone().into_int()?; + } + Value::int(z, new_type.lane_type()) + }) + .collect::>>()?; + assign(vectorizelanes(&new_vec, new_type)?) + } + Opcode::SqmulRoundSat => { + let lane_type = ctrl_ty.lane_type(); + let double_width = ctrl_ty.double_width().unwrap().lane_type(); + let arg0 = extractlanes(&arg(0)?, lane_type)?; + let arg1 = extractlanes(&arg(1)?, lane_type)?; + let (min, max) = lane_type.bounds(true); + let min: V = Value::int(min as i128, double_width)?; + let max: V = Value::int(max as i128, double_width)?; + let new_vec = arg0 + .into_iter() + .zip(arg1.into_iter()) + .map(|(x, y)| { + let x = x.into_int()?; + let y = y.into_int()?; + // temporarily double width of the value to avoid overflow. + let z: V = Value::int( + (x * y + (1 << (lane_type.bits() - 2))) >> (lane_type.bits() - 1), + double_width, + )?; + // check bounds, saturate, and truncate to correct width. + let z = Value::min(z, max.clone())?; + let z = Value::max(z, min.clone())?; + let z = z.convert(ValueConversionKind::Truncate(lane_type))?; + Ok(z) + }) + .collect::>>()?; + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), - // TODO: these instructions should be removed once the new backend makes these obsolete - // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the - // "all-arch" feature for cranelift-codegen would become unnecessary for this crate. - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Vcvtudq2ps - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pblendw - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Palignr - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => unimplemented!("x86 instruction: {}", inst.opcode()), Opcode::JumpTableBase | Opcode::JumpTableEntry | Opcode::IndirectJumpTableBr => { unimplemented!("Legacy instruction: {}", inst.opcode()) } diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 2bdcd335ed..70f6ac78d6 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -26,6 +26,9 @@ pub trait Value: Clone + From { fn convert(self, kind: ValueConversionKind) -> ValueResult; fn concat(self, other: Self) -> ValueResult; + fn max(self, other: Self) -> ValueResult; + fn min(self, other: Self) -> ValueResult; + // Comparison. fn eq(&self, other: &Self) -> ValueResult; fn gt(&self, other: &Self) -> ValueResult; @@ -274,11 +277,11 @@ impl Value for DataValue { (DataValue::I64(n), types::I128) => DataValue::I128(n as i128), (DataValue::B(b), t) if t.is_bool() => DataValue::B(b), (DataValue::B(b), t) if t.is_int() => { - let val = if b { - // Bools are represented in memory as all 1's - (1i128 << t.bits()) - 1 - } else { - 0 + // Bools are represented in memory as all 1's + let val = match (b, t) { + (true, types::I128) => -1, + (true, t) => (1i128 << t.bits()) - 1, + _ => 0, }; DataValue::int(val, t)? } @@ -313,11 +316,17 @@ impl Value for DataValue { Self::from_integer(extracted, ty)? } ValueConversionKind::SignExtend(ty) => match (self, ty) { + (DataValue::U8(n), types::I16) => DataValue::U16(n as u16), + (DataValue::U8(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U8(n), types::I64) => DataValue::U64(n as u64), (DataValue::I8(n), types::I16) => DataValue::I16(n as i16), (DataValue::I8(n), types::I32) => DataValue::I32(n as i32), (DataValue::I8(n), types::I64) => DataValue::I64(n as i64), + (DataValue::U16(n), types::I32) => DataValue::U32(n as u32), + (DataValue::U16(n), types::I64) => DataValue::U64(n as u64), (DataValue::I16(n), types::I32) => DataValue::I32(n as i32), (DataValue::I16(n), types::I64) => DataValue::I64(n as i64), + (DataValue::U32(n), types::I64) => DataValue::U64(n as u64), (DataValue::I32(n), types::I64) => DataValue::I64(n as i64), (DataValue::I64(n), types::I128) => DataValue::I128(n as i128), (dv, _) => unimplemented!("conversion: {} -> {:?}", dv.ty(), kind), @@ -376,6 +385,22 @@ impl Value for DataValue { } } + fn max(self, other: Self) -> ValueResult { + if Value::gt(&self, &other)? { + Ok(self) + } else { + Ok(other) + } + } + + fn min(self, other: Self) -> ValueResult { + if Value::lt(&self, &other)? { + Ok(self) + } else { + Ok(other) + } + } + fn eq(&self, other: &Self) -> ValueResult { comparison_match!(PartialEq::eq[&self, &other]; [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, F32, F64]) } diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index 07a5cad65d..d7d092d258 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -634,7 +634,7 @@ impl Module for JITModule { trap_sink: &mut dyn TrapSink, stack_map_sink: &mut dyn StackMapSink, ) -> ModuleResult { - info!("defining function {}: {}", id, ctx.func.display(self.isa())); + info!("defining function {}: {}", id, ctx.func.display()); let CodeInfo { total_size: code_size, .. @@ -657,7 +657,7 @@ impl Module for JITModule { .expect("TODO: handle OOM etc."); let mut reloc_sink = JITRelocSink::default(); - unsafe { ctx.emit_to_memory(&*self.isa, ptr, &mut reloc_sink, trap_sink, stack_map_sink) }; + unsafe { ctx.emit_to_memory(ptr, &mut reloc_sink, trap_sink, stack_map_sink) }; self.record_function_for_perf(ptr, size, &decl.name); self.compiled_functions[id] = Some(CompiledBlob { @@ -893,28 +893,4 @@ impl RelocSink for JITRelocSink { addend, }); } - - fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { - match reloc { - Reloc::X86PCRelRodata4 => { - // Not necessary to record this unless we are going to split apart code and its - // jumptbl/rodata. - } - _ => { - panic!("Unhandled reloc"); - } - } - } - - fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) { - match reloc { - Reloc::X86PCRelRodata4 => { - // Not necessary to record this unless we are going to split apart code and its - // jumptbl/rodata. - } - _ => { - panic!("Unhandled reloc"); - } - } - } } diff --git a/cranelift/jit/tests/basic.rs b/cranelift/jit/tests/basic.rs index 8fb1b0d73d..f53480702d 100644 --- a/cranelift/jit/tests/basic.rs +++ b/cranelift/jit/tests/basic.rs @@ -149,7 +149,7 @@ fn switch_error() { Ok(_) => {} Err(err) => { let pretty_error = - cranelift_codegen::print_errors::pretty_verifier_error(&func, None, None, err); + cranelift_codegen::print_errors::pretty_verifier_error(&func, None, err); panic!("pretty_error:\n{}", pretty_error); } } diff --git a/cranelift/native/Cargo.toml b/cranelift/native/Cargo.toml index 96fe91f665..493778a463 100644 --- a/cranelift/native/Cargo.toml +++ b/cranelift/native/Cargo.toml @@ -15,7 +15,7 @@ cranelift-codegen = { path = "../codegen", version = "0.77.0", default-features target-lexicon = "0.12" [target.'cfg(target_arch = "s390x")'.dependencies] -rsix = "0.23.0" +libc = "0.2.95" [features] default = ["std"] diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index b2364c6ad9..157a4f1e62 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -30,7 +30,7 @@ use target_lexicon::Triple; /// machine, or `Err(())` if the host machine is not supported /// in the current configuration. pub fn builder() -> Result { - builder_with_options(isa::BackendVariant::Any, true) + builder_with_options(true) } /// Return an `isa` builder configured for the current host @@ -40,17 +40,11 @@ pub fn builder() -> Result { /// Selects the given backend variant specifically; this is /// useful when more than oen backend exists for a given target /// (e.g., on x86-64). -pub fn builder_with_options( - variant: isa::BackendVariant, - infer_native_flags: bool, -) -> Result { - let mut isa_builder = - isa::lookup_variant(Triple::host(), variant).map_err(|err| match err { - isa::LookupError::SupportDisabled => { - "support for architecture disabled at compile time" - } - isa::LookupError::Unsupported => "unsupported architecture", - })?; +pub fn builder_with_options(infer_native_flags: bool) -> Result { + let mut isa_builder = isa::lookup(Triple::host()).map_err(|err| match err { + isa::LookupError::SupportDisabled => "support for architecture disabled at compile time", + isa::LookupError::Unsupported => "unsupported architecture", + })?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { @@ -126,7 +120,7 @@ pub fn builder_with_options( } // There is no is_s390x_feature_detected macro yet, so for now - // we use linux_hwcap from the rsix crate directly. + // we use getauxval from the libc crate directly. #[cfg(all(target_arch = "s390x", target_os = "linux"))] { use cranelift_codegen::settings::Configurable; @@ -135,8 +129,8 @@ pub fn builder_with_options( return Ok(isa_builder); } - let v = rsix::process::linux_hwcap().0; - const HWCAP_S390X_VXRS_EXT2: usize = 32768; + let v = unsafe { libc::getauxval(libc::AT_HWCAP) }; + const HWCAP_S390X_VXRS_EXT2: libc::c_ulong = 32768; if (v & HWCAP_S390X_VXRS_EXT2) != 0 { isa_builder.enable("has_vxrs_ext2").unwrap(); // There is no separate HWCAP bit for mie2, so assume diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index 2e48284df4..0f710d9f0e 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -310,11 +310,7 @@ impl Module for ObjectModule { trap_sink: &mut dyn TrapSink, stack_map_sink: &mut dyn StackMapSink, ) -> ModuleResult { - info!( - "defining function {}: {}", - func_id, - ctx.func.display(self.isa()) - ); + info!("defining function {}: {}", func_id, ctx.func.display()); let CodeInfo { total_size: code_size, .. @@ -324,7 +320,6 @@ impl Module for ObjectModule { unsafe { ctx.emit_to_memory( - &*self.isa, code.as_mut_ptr(), &mut reloc_sink, trap_sink, @@ -739,28 +734,4 @@ impl RelocSink for ObjectRelocSink { name: name.clone(), }) } - - fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) { - match reloc { - Reloc::X86PCRelRodata4 => { - // Not necessary to record this unless we are going to split apart code and its - // jumptbl/rodata. - } - _ => { - panic!("Unhandled reloc"); - } - } - } - - fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) { - match reloc { - Reloc::X86PCRelRodata4 => { - // Not necessary to record this unless we are going to split apart code and its - // jumptbl/rodata. - } - _ => { - panic!("Unhandled reloc"); - } - } - } } diff --git a/cranelift/object/tests/basic.rs b/cranelift/object/tests/basic.rs index 20fd4453ae..a5add02295 100644 --- a/cranelift/object/tests/basic.rs +++ b/cranelift/object/tests/basic.rs @@ -142,7 +142,7 @@ fn switch_error() { Ok(_) => {} Err(err) => { let pretty_error = - cranelift_codegen::print_errors::pretty_verifier_error(&func, None, None, err); + cranelift_codegen::print_errors::pretty_verifier_error(&func, None, err); panic!("pretty_error:\n{}", pretty_error); } } diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index 647742cacc..45b419f1e3 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -326,7 +326,6 @@ impl<'a> Lexer<'a> { .unwrap_or_else(|| match text { "iflags" => Token::Type(types::IFLAGS), "fflags" => Token::Type(types::FFLAGS), - "sarg_t" => Token::Type(types::SARG_T), _ => Token::Identifier(text), }), loc, @@ -620,7 +619,7 @@ mod tests { let mut lex = Lexer::new( "v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \ function0 function b1 i32x4 f32x5 \ - iflags fflags sarg_t iflagss", + iflags fflags iflagss", ); assert_eq!( lex.next(), @@ -643,7 +642,6 @@ mod tests { assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1)); assert_eq!(lex.next(), token(Token::Type(types::IFLAGS), 1)); assert_eq!(lex.next(), token(Token::Type(types::FFLAGS), 1)); - assert_eq!(lex.next(), token(Token::Type(types::SARG_T), 1)); assert_eq!(lex.next(), token(Token::Identifier("iflagss"), 1)); assert_eq!(lex.next(), None); } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 97b4a7c77d..0f63a1c4fd 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -17,12 +17,12 @@ use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, Va use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{ - AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Block, Constant, ConstantData, - ExtFuncData, ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, - HeapStyle, JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, - StackSlotData, StackSlotKind, Table, TableData, Type, Value, ValueLoc, + AbiParam, ArgumentExtension, ArgumentPurpose, Block, Constant, ConstantData, ExtFuncData, + ExternalName, FuncRef, Function, GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, + JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, + StackSlotKind, Table, TableData, Type, Value, }; -use cranelift_codegen::isa::{self, BackendVariant, CallConv, Encoding, RegUnit, TargetIsa}; +use cranelift_codegen::isa::{self, CallConv}; use cranelift_codegen::packed_option::ReservedValue; use cranelift_codegen::{settings, settings::Configurable, timing}; use smallvec::SmallVec; @@ -31,6 +31,51 @@ use std::str::FromStr; use std::{u16, u32}; use target_lexicon::Triple; +macro_rules! match_imm { + ($signed:ty, $unsigned:ty, $parser:expr, $err_msg:expr) => {{ + if let Some(Token::Integer(text)) = $parser.token() { + $parser.consume(); + let negative = text.starts_with('-'); + let positive = text.starts_with('+'); + let text = if negative || positive { + // Strip sign prefix. + &text[1..] + } else { + text + }; + + // Parse the text value; the lexer gives us raw text that looks like an integer. + let value = if text.starts_with("0x") { + // Skip underscores. + let text = text.replace("_", ""); + // Parse it in hexadecimal form. + <$unsigned>::from_str_radix(&text[2..], 16).map_err(|_| { + $parser.error("unable to parse value as a hexadecimal immediate") + })? + } else { + // Parse it as a signed type to check for overflow and other issues. + text.parse() + .map_err(|_| $parser.error("expected decimal immediate"))? + }; + + // Apply sign if necessary. + let signed = if negative { + let value = value.wrapping_neg() as $signed; + if value > 0 { + return Err($parser.error("negative number too small")); + } + value + } else { + value as $signed + }; + + Ok(signed) + } else { + err!($parser.loc, $err_msg) + } + }}; +} + /// After some quick benchmarks a program should never have more than 100,000 blocks. const MAX_BLOCKS_IN_A_FUNCTION: u32 = 100_000; @@ -109,7 +154,7 @@ pub fn parse_test<'a>(text: &'a str, options: ParseOptions<'a>) -> ParseResult { } /// Context for resolving references when parsing a single function. -struct Context<'a> { +struct Context { function: Function, map: SourceMap, /// Aliases to resolve once value definitions are known. aliases: Vec, - - /// Reference to the unique_isa for things like parsing target-specific instruction encoding - /// information. This is only `Some` if exactly one set of `isa` directives were found in the - /// prologue (it is valid to have directives for multiple different targets, but in that case - /// we couldn't know which target the provided encodings are intended for) - unique_isa: Option<&'a dyn TargetIsa>, } -impl<'a> Context<'a> { - fn new(f: Function, unique_isa: Option<&'a dyn TargetIsa>) -> Self { +impl Context { + fn new(f: Function) -> Self { Self { function: f, map: SourceMap::new(), - unique_isa, aliases: Vec::new(), } } - // Get the index of a recipe name if it exists. - fn find_recipe_index(&self, recipe_name: &str) -> Option { - if let Some(unique_isa) = self.unique_isa { - unique_isa - .encoding_info() - .names - .iter() - .position(|&name| name == recipe_name) - .map(|idx| idx as u16) - } else { - None - } - } - // Allocate a new stack slot. fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> { self.map.def_ss(ss, loc)?; @@ -793,132 +817,23 @@ impl<'a> Parser<'a> { // Match and consume an i8 immediate. fn match_imm8(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; - - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i8 in hexadecimal form. - u8::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i8 as a hexadecimal immediate"))? - } else { - // Parse it as a i8 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i8 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i8; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i8 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + match_imm!(i8, u8, self, err_msg) } // Match and consume a signed 16-bit immediate. fn match_imm16(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; - - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i16 in hexadecimal form. - u16::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i16 as a hexadecimal immediate"))? - } else { - // Parse it as a i16 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i16 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i16; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i16 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + match_imm!(i16, u16, self, err_msg) } // Match and consume an i32 immediate. // This is used for stack argument byte offsets. fn match_imm32(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::Integer(text)) = self.token() { - self.consume(); - let negative = text.starts_with('-'); - let positive = text.starts_with('+'); - let text = if negative || positive { - // Strip sign prefix. - &text[1..] - } else { - text - }; + match_imm!(i32, u32, self, err_msg) + } - // Parse the text value; the lexer gives us raw text that looks like an integer. - let value = if text.starts_with("0x") { - // Skip underscores. - let text = text.replace("_", ""); - // Parse it as a i32 in hexadecimal form. - u32::from_str_radix(&text[2..], 16) - .map_err(|_| self.error("unable to parse i32 as a hexadecimal immediate"))? - } else { - // Parse it as a i32 to check for overflow and other issues. - text.parse() - .map_err(|_| self.error("expected i32 decimal immediate"))? - }; - - // Apply sign if necessary. - let signed = if negative { - let value = value.wrapping_neg() as i32; - if value > 0 { - return Err(self.error("negative number too small")); - } - value - } else { - value as i32 - }; - - Ok(signed) - } else { - err!(self.loc, err_msg) - } + // Match and consume an i128 immediate. + fn match_imm128(&mut self, err_msg: &str) -> ParseResult { + match_imm!(i128, u128, self, err_msg) } // Match and consume an optional offset32 immediate. @@ -1026,42 +941,6 @@ impl<'a> Parser<'a> { } } - // Match and consume a HexSequence that fits into a u16. - // This is used for instruction encodings. - fn match_hex16(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::HexSequence(bits_str)) = self.token() { - self.consume(); - // The only error we anticipate from this parse is overflow, the lexer should - // already have ensured that the string doesn't contain invalid characters, and - // isn't empty or negative. - u16::from_str_radix(bits_str, 16) - .map_err(|_| self.error("the hex sequence given overflows the u16 type")) - } else { - err!(self.loc, err_msg) - } - } - - // Match and consume a register unit either by number `%15` or by name `%rax`. - fn match_regunit(&mut self, isa: Option<&dyn TargetIsa>) -> ParseResult { - if let Some(Token::Name(name)) = self.token() { - self.consume(); - match isa { - Some(isa) => isa - .register_info() - .parse_regunit(name) - .ok_or_else(|| self.error("invalid register name")), - None => name - .parse() - .map_err(|_| self.error("invalid register number")), - } - } else { - match isa { - Some(isa) => err!(self.loc, "Expected {} register unit", isa.name()), - None => err!(self.loc, "Expected register unit number"), - } - } - } - /// Parse an optional source location. /// /// Return an optional source location if no real location is present. @@ -1223,19 +1102,7 @@ impl<'a> Parser<'a> { Ok(triple) => triple, Err(err) => return err!(loc, err), }; - // Look for `machinst` or `legacy` option before instantiating IsaBuilder. - let variant = match words.peek() { - Some(&"machinst") => { - words.next(); - BackendVariant::MachInst - } - Some(&"legacy") => { - words.next(); - BackendVariant::Legacy - } - _ => BackendVariant::Any, - }; - let mut isa_builder = match isa::lookup_variant(triple, variant) { + let mut isa_builder = match isa::lookup(triple) { Err(isa::LookupError::SupportDisabled) => { continue; } @@ -1293,13 +1160,10 @@ impl<'a> Parser<'a> { /// Parse a list of function definitions. /// /// This is the top-level parse function matching the whole contents of a file. - pub fn parse_function_list( - &mut self, - unique_isa: Option<&dyn TargetIsa>, - ) -> ParseResult)>> { + pub fn parse_function_list(&mut self) -> ParseResult)>> { let mut list = Vec::new(); while self.token().is_some() { - list.push(self.parse_function(unique_isa)?); + list.push(self.parse_function()?); } if let Some(err) = self.lex_error { return match err { @@ -1313,10 +1177,7 @@ impl<'a> Parser<'a> { // // function ::= * "function" name signature "{" preamble function-body "}" // - fn parse_function( - &mut self, - unique_isa: Option<&dyn TargetIsa>, - ) -> ParseResult<(Function, Details<'a>)> { + fn parse_function(&mut self) -> ParseResult<(Function, Details<'a>)> { // Begin gathering comments. // Make sure we don't include any comments before the `function` keyword. self.token(); @@ -1331,9 +1192,9 @@ impl<'a> Parser<'a> { let name = self.parse_external_name()?; // function ::= "function" name * signature "{" preamble function-body "}" - let sig = self.parse_signature(unique_isa)?; + let sig = self.parse_signature()?; - let mut ctx = Context::new(Function::with_name_signature(name, sig), unique_isa); + let mut ctx = Context::new(Function::with_name_signature(name, sig)); // function ::= "function" name signature * "{" preamble function-body "}" self.match_token(Token::LBrace, "expected '{' before function body")?; @@ -1403,18 +1264,18 @@ impl<'a> Parser<'a> { // // signature ::= * "(" [paramlist] ")" ["->" retlist] [callconv] // - fn parse_signature(&mut self, unique_isa: Option<&dyn TargetIsa>) -> ParseResult { + fn parse_signature(&mut self) -> ParseResult { // Calling convention defaults to `fast`, but can be changed. let mut sig = Signature::new(self.default_calling_convention); self.match_token(Token::LPar, "expected function signature: ( args... )")?; // signature ::= "(" * [abi-param-list] ")" ["->" retlist] [callconv] if self.token() != Some(Token::RPar) { - sig.params = self.parse_abi_param_list(unique_isa)?; + sig.params = self.parse_abi_param_list()?; } self.match_token(Token::RPar, "expected ')' after function arguments")?; if self.optional(Token::Arrow) { - sig.returns = self.parse_abi_param_list(unique_isa)?; + sig.returns = self.parse_abi_param_list()?; } // The calling convention is optional. @@ -1435,26 +1296,23 @@ impl<'a> Parser<'a> { // // paramlist ::= * param { "," param } // - fn parse_abi_param_list( - &mut self, - unique_isa: Option<&dyn TargetIsa>, - ) -> ParseResult> { + fn parse_abi_param_list(&mut self) -> ParseResult> { let mut list = Vec::new(); // abi-param-list ::= * abi-param { "," abi-param } - list.push(self.parse_abi_param(unique_isa)?); + list.push(self.parse_abi_param()?); // abi-param-list ::= abi-param * { "," abi-param } while self.optional(Token::Comma) { // abi-param-list ::= abi-param { "," * abi-param } - list.push(self.parse_abi_param(unique_isa)?); + list.push(self.parse_abi_param()?); } Ok(list) } // Parse a single argument type with flags. - fn parse_abi_param(&mut self, unique_isa: Option<&dyn TargetIsa>) -> ParseResult { + fn parse_abi_param(&mut self) -> ParseResult { // abi-param ::= * type { flag } [ argumentloc ] let mut arg = AbiParam::new(self.match_type("expected parameter type")?); @@ -1482,53 +1340,9 @@ impl<'a> Parser<'a> { self.consume(); } - // abi-param ::= type { flag } * [ argumentloc ] - arg.location = self.parse_argument_location(unique_isa)?; - Ok(arg) } - // Parse an argument location specifier; either a register or a byte offset into the stack. - fn parse_argument_location( - &mut self, - unique_isa: Option<&dyn TargetIsa>, - ) -> ParseResult { - // argumentloc ::= '[' regname | uimm32 ']' - if self.optional(Token::LBracket) { - let result = match self.token() { - Some(Token::Name(name)) => { - self.consume(); - if let Some(isa) = unique_isa { - isa.register_info() - .parse_regunit(name) - .map(ArgumentLoc::Reg) - .ok_or_else(|| self.error("invalid register name")) - } else { - err!(self.loc, "argument location requires exactly one isa") - } - } - Some(Token::Integer(_)) => { - let offset = self.match_imm32("expected stack argument byte offset")?; - Ok(ArgumentLoc::Stack(offset)) - } - Some(Token::Minus) => { - self.consume(); - Ok(ArgumentLoc::Unassigned) - } - _ => err!(self.loc, "expected argument location"), - }; - - self.match_token( - Token::RBracket, - "expected ']' to end argument location annotation", - )?; - - result - } else { - Ok(ArgumentLoc::Unassigned) - } - } - // Parse the function preamble. // // preamble ::= * { preamble-decl } @@ -1565,10 +1379,9 @@ impl<'a> Parser<'a> { } Some(Token::SigRef(..)) => { self.start_gathering_comments(); - self.parse_signature_decl(ctx.unique_isa) - .and_then(|(sig, dat)| { - ctx.add_sig(sig, dat, self.loc, self.default_calling_convention) - }) + self.parse_signature_decl().and_then(|(sig, dat)| { + ctx.add_sig(sig, dat, self.loc, self.default_calling_convention) + }) } Some(Token::FuncRef(..)) => { self.start_gathering_comments(); @@ -1851,13 +1664,10 @@ impl<'a> Parser<'a> { // // signature-decl ::= SigRef(sigref) "=" signature // - fn parse_signature_decl( - &mut self, - unique_isa: Option<&dyn TargetIsa>, - ) -> ParseResult<(SigRef, Signature)> { + fn parse_signature_decl(&mut self) -> ParseResult<(SigRef, Signature)> { let sig = self.match_sig("expected signature number: sig«n»")?; self.match_token(Token::Equal, "expected '=' in signature decl")?; - let data = self.parse_signature(unique_isa)?; + let data = self.parse_signature()?; // Collect any trailing comments. self.token(); @@ -1892,7 +1702,7 @@ impl<'a> Parser<'a> { let data = match self.token() { Some(Token::LPar) => { // function-decl ::= FuncRef(fnref) "=" ["colocated"] name * signature - let sig = self.parse_signature(ctx.unique_isa)?; + let sig = self.parse_signature()?; let sigref = ctx.function.import_signature(sig); ctx.map .def_entity(sigref.into(), loc) @@ -2086,7 +1896,6 @@ impl<'a> Parser<'a> { _ => false, } { let srcloc = self.optional_srcloc()?; - let (encoding, result_locations) = self.parse_instruction_encoding(ctx)?; // We need to parse instruction results here because they are shared // between the parsing of value aliases and the parsing of instructions. @@ -2107,24 +1916,10 @@ impl<'a> Parser<'a> { } Some(Token::Equal) => { self.consume(); - self.parse_instruction( - &results, - srcloc, - encoding, - result_locations, - ctx, - block, - )?; + self.parse_instruction(&results, srcloc, ctx, block)?; } _ if !results.is_empty() => return err!(self.loc, "expected -> or ="), - _ => self.parse_instruction( - &results, - srcloc, - encoding, - result_locations, - ctx, - block, - )?, + _ => self.parse_instruction(&results, srcloc, ctx, block)?, } } @@ -2176,97 +1971,9 @@ impl<'a> Parser<'a> { ctx.function.dfg.append_block_param_for_parser(block, t, v); ctx.map.def_value(v, v_location)?; - // block-param ::= Value(v) ":" Type(t) * arg-loc? - if self.optional(Token::LBracket) { - let loc = self.parse_value_location(ctx)?; - ctx.function.locations[v] = loc; - self.match_token(Token::RBracket, "expected ']' after value location")?; - } - Ok(()) } - fn parse_value_location(&mut self, ctx: &Context) -> ParseResult { - match self.token() { - Some(Token::StackSlot(src_num)) => { - self.consume(); - let ss = match StackSlot::with_number(src_num) { - None => { - return err!( - self.loc, - "attempted to use invalid stack slot ss{}", - src_num - ); - } - Some(ss) => ss, - }; - ctx.check_ss(ss, self.loc)?; - Ok(ValueLoc::Stack(ss)) - } - Some(Token::Name(name)) => { - self.consume(); - if let Some(isa) = ctx.unique_isa { - isa.register_info() - .parse_regunit(name) - .map(ValueLoc::Reg) - .ok_or_else(|| self.error("invalid register value location")) - } else { - err!(self.loc, "value location requires exactly one isa") - } - } - Some(Token::Minus) => { - self.consume(); - Ok(ValueLoc::Unassigned) - } - _ => err!(self.loc, "invalid value location"), - } - } - - fn parse_instruction_encoding( - &mut self, - ctx: &Context, - ) -> ParseResult<(Option, Option>)> { - let (mut encoding, mut result_locations) = (None, None); - - // encoding ::= "[" encoding_literal result_locations "]" - if self.optional(Token::LBracket) { - // encoding_literal ::= "-" | Identifier HexSequence - if !self.optional(Token::Minus) { - let recipe = self.match_any_identifier("expected instruction encoding or '-'")?; - let bits = self.match_hex16("expected a hex sequence")?; - - if let Some(recipe_index) = ctx.find_recipe_index(recipe) { - encoding = Some(Encoding::new(recipe_index, bits)); - } else if ctx.unique_isa.is_some() { - return err!(self.loc, "invalid instruction recipe"); - } else { - // We allow encodings to be specified when there's no unique ISA purely - // for convenience, eg when copy-pasting code for a test. - } - } - - // result_locations ::= ("," ( "-" | names ) )? - // names ::= Name { "," Name } - if self.optional(Token::Comma) { - let mut results = Vec::new(); - - results.push(self.parse_value_location(ctx)?); - while self.optional(Token::Comma) { - results.push(self.parse_value_location(ctx)?); - } - - result_locations = Some(results); - } - - self.match_token( - Token::RBracket, - "expected ']' to terminate instruction encoding", - )?; - } - - Ok((encoding, result_locations)) - } - // Parse instruction results and return them. // // inst-results ::= Value(v) { "," Value(v) } @@ -2341,8 +2048,6 @@ impl<'a> Parser<'a> { &mut self, results: &[Value], srcloc: ir::SourceLoc, - encoding: Option, - result_locations: Option>, ctx: &mut Context, block: Block, ) -> ParseResult<()> { @@ -2397,10 +2102,6 @@ impl<'a> Parser<'a> { ctx.function.srclocs[inst] = srcloc; } - if let Some(encoding) = encoding { - ctx.function.encodings[inst] = encoding; - } - if results.len() != num_results { return err!( self.loc, @@ -2410,30 +2111,6 @@ impl<'a> Parser<'a> { ); } - if let Some(ref result_locations) = result_locations { - if results.len() != result_locations.len() { - return err!( - self.loc, - "instruction produces {} result values, but {} locations were \ - specified", - results.len(), - result_locations.len() - ); - } - } - - if let Some(result_locations) = result_locations { - for (&value, loc) in ctx - .function - .dfg - .inst_results(inst) - .iter() - .zip(result_locations) - { - ctx.function.locations[value] = loc; - } - } - // Collect any trailing comments. self.token(); self.claim_gathered_comments(inst); @@ -2805,6 +2482,7 @@ impl<'a> Parser<'a> { I16 => DataValue::from(self.match_imm16("expected an i16")?), I32 => DataValue::from(self.match_imm32("expected an i32")?), I64 => DataValue::from(Into::::into(self.match_imm64("expected an i64")?)), + I128 => DataValue::from(self.match_imm128("expected an i128")?), F32 => DataValue::from(self.match_ieee32("expected an f32")?), F64 => DataValue::from(self.match_ieee64("expected an f64")?), _ if ty.is_vector() => { @@ -3247,60 +2925,6 @@ impl<'a> Parser<'a> { offset, } } - InstructionFormat::RegMove => { - let arg = self.match_value("expected SSA value operand")?; - self.match_token(Token::Comma, "expected ',' between operands")?; - let src = self.match_regunit(ctx.unique_isa)?; - self.match_token(Token::Arrow, "expected '->' between register units")?; - let dst = self.match_regunit(ctx.unique_isa)?; - InstructionData::RegMove { - opcode, - arg, - src, - dst, - } - } - InstructionFormat::CopySpecial => { - let src = self.match_regunit(ctx.unique_isa)?; - self.match_token(Token::Arrow, "expected '->' between register units")?; - let dst = self.match_regunit(ctx.unique_isa)?; - InstructionData::CopySpecial { opcode, src, dst } - } - InstructionFormat::CopyToSsa => InstructionData::CopyToSsa { - opcode, - src: self.match_regunit(ctx.unique_isa)?, - }, - InstructionFormat::RegSpill => { - let arg = self.match_value("expected SSA value operand")?; - self.match_token(Token::Comma, "expected ',' between operands")?; - let src = self.match_regunit(ctx.unique_isa)?; - self.match_token(Token::Arrow, "expected '->' before destination stack slot")?; - let dst = self.match_ss("expected stack slot number: ss«n»")?; - ctx.check_ss(dst, self.loc)?; - InstructionData::RegSpill { - opcode, - arg, - src, - dst, - } - } - InstructionFormat::RegFill => { - let arg = self.match_value("expected SSA value operand")?; - self.match_token(Token::Comma, "expected ',' between operands")?; - let src = self.match_ss("expected stack slot number: ss«n»")?; - ctx.check_ss(src, self.loc)?; - self.match_token( - Token::Arrow, - "expected '->' before destination register units", - )?; - let dst = self.match_regunit(ctx.unique_isa)?; - InstructionData::RegFill { - opcode, - arg, - src, - dst, - } - } InstructionFormat::Trap => { let code = self.match_enum("expected trap code")?; InstructionData::Trap { opcode, code } @@ -3401,7 +3025,7 @@ mod tests { #[test] fn argument_type() { let mut p = Parser::new("i32 sext"); - let arg = p.parse_abi_param(None).unwrap(); + let arg = p.parse_abi_param().unwrap(); assert_eq!(arg.value_type, types::I32); assert_eq!(arg.extension, ArgumentExtension::Sext); assert_eq!(arg.purpose, ArgumentPurpose::Normal); @@ -3409,7 +3033,7 @@ mod tests { location, message, is_warning, - } = p.parse_abi_param(None).unwrap_err(); + } = p.parse_abi_param().unwrap_err(); assert_eq!(location.line_number, 1); assert_eq!(message, "expected parameter type"); assert!(!is_warning); @@ -3425,7 +3049,7 @@ mod tests { v1 = iadd_imm v3, 17 }", ) - .parse_function(None) + .parse_function() .unwrap(); assert_eq!(func.name.to_string(), "%qux"); let v4 = details.map.lookup_str("v4").unwrap(); @@ -3443,13 +3067,13 @@ mod tests { #[test] fn signature() { - let sig = Parser::new("()system_v").parse_signature(None).unwrap(); + let sig = Parser::new("()system_v").parse_signature().unwrap(); assert_eq!(sig.params.len(), 0); assert_eq!(sig.returns.len(), 0); assert_eq!(sig.call_conv, CallConv::SystemV); let sig2 = Parser::new("(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash_system_v") - .parse_signature(None) + .parse_signature() .unwrap(); assert_eq!( sig2.to_string(), @@ -3459,12 +3083,12 @@ mod tests { // Old-style signature without a calling convention. assert_eq!( - Parser::new("()").parse_signature(None).unwrap().to_string(), + Parser::new("()").parse_signature().unwrap().to_string(), "() fast" ); assert_eq!( Parser::new("() notacc") - .parse_signature(None) + .parse_signature() .unwrap_err() .to_string(), "1: unknown calling convention: notacc" @@ -3473,21 +3097,21 @@ mod tests { // `void` is not recognized as a type by the lexer. It should not appear in files. assert_eq!( Parser::new("() -> void") - .parse_signature(None) + .parse_signature() .unwrap_err() .to_string(), "1: expected parameter type" ); assert_eq!( Parser::new("i8 -> i8") - .parse_signature(None) + .parse_signature() .unwrap_err() .to_string(), "1: expected function signature: ( args... )" ); assert_eq!( Parser::new("(i8 -> i8") - .parse_signature(None) + .parse_signature() .unwrap_err() .to_string(), "1: expected ')' after function arguments" @@ -3502,7 +3126,7 @@ mod tests { ss1 = spill_slot 1 }", ) - .parse_function(None) + .parse_function() .unwrap(); assert_eq!(func.name.to_string(), "%foo"); let mut iter = func.stack_slots.keys(); @@ -3526,7 +3150,7 @@ mod tests { ss1 = spill_slot 1 }", ) - .parse_function(None) + .parse_function() .unwrap_err() .to_string(), "3: duplicate entity: ss1" @@ -3541,7 +3165,7 @@ mod tests { block4(v3: i32): }", ) - .parse_function(None) + .parse_function() .unwrap(); assert_eq!(func.name.to_string(), "%blocks"); @@ -3568,7 +3192,7 @@ mod tests { block0: return 2", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 3); @@ -3586,7 +3210,7 @@ mod tests { "function %a() { block100000:", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 2); @@ -3605,7 +3229,7 @@ mod tests { jt0 = jump_table [] jt0 = jump_table []", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 3); @@ -3624,7 +3248,7 @@ mod tests { ss0 = explicit_slot 8 ss0 = explicit_slot 8", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 3); @@ -3643,7 +3267,7 @@ mod tests { gv0 = vmctx gv0 = vmctx", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 3); @@ -3662,7 +3286,7 @@ mod tests { heap0 = static gv0, min 0x1000, bound 0x10_0000, offset_guard 0x1000 heap0 = static gv0, min 0x1000, bound 0x10_0000, offset_guard 0x1000", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 3); @@ -3681,7 +3305,7 @@ mod tests { sig0 = () sig0 = ()", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 3); @@ -3701,7 +3325,7 @@ mod tests { fn0 = %foo sig0 fn0 = %foo sig0", ) - .parse_function(None) + .parse_function() .unwrap_err(); assert_eq!(location.line_number, 4); @@ -3723,7 +3347,7 @@ mod tests { } ; Trailing. ; More trailing.", ) - .parse_function(None) + .parse_function() .unwrap(); assert_eq!(func.name.to_string(), "%comment"); assert_eq!(comments.len(), 8); // no 'before' comment. @@ -3825,7 +3449,7 @@ mod tests { trap int_divz }", ) - .parse_function(None) + .parse_function() .unwrap() .0; assert_eq!(func.name.to_string(), "u1:2"); @@ -3837,7 +3461,7 @@ mod tests { trap stk_ovf }", ); - assert!(parser.parse_function(None).is_err()); + assert!(parser.parse_function().is_err()); // Incomplete function names should not be valid: let mut parser = Parser::new( @@ -3846,7 +3470,7 @@ mod tests { trap int_ovf }", ); - assert!(parser.parse_function(None).is_err()); + assert!(parser.parse_function().is_err()); let mut parser = Parser::new( "function u0() system_v { @@ -3854,7 +3478,7 @@ mod tests { trap int_ovf }", ); - assert!(parser.parse_function(None).is_err()); + assert!(parser.parse_function().is_err()); let mut parser = Parser::new( "function u0:() system_v { @@ -3862,7 +3486,7 @@ mod tests { trap int_ovf }", ); - assert!(parser.parse_function(None).is_err()); + assert!(parser.parse_function().is_err()); } #[test] @@ -3875,14 +3499,14 @@ mod tests { // By default the parser will use the fast calling convention if none is specified. let mut parser = Parser::new(code); assert_eq!( - parser.parse_function(None).unwrap().0.signature.call_conv, + parser.parse_function().unwrap().0.signature.call_conv, CallConv::Fast ); // However, we can specify a different calling convention to be the default. let mut parser = Parser::new(code).with_default_calling_convention(CallConv::Cold); assert_eq!( - parser.parse_function(None).unwrap().0.signature.call_conv, + parser.parse_function().unwrap().0.signature.call_conv, CallConv::Cold ); } @@ -4126,6 +3750,11 @@ mod tests { assert_eq!(parse("16", I16).to_string(), "16"); assert_eq!(parse("32", I32).to_string(), "32"); assert_eq!(parse("64", I64).to_string(), "64"); + assert_eq!( + parse("0x01234567_01234567_01234567_01234567", I128).to_string(), + "1512366032949150931280199141537564007" + ); + assert_eq!(parse("1234567", I128).to_string(), "1234567"); assert_eq!(parse("0x32.32", F32).to_string(), "0x1.919000p5"); assert_eq!(parse("0x64.64", F64).to_string(), "0x1.9190000000000p6"); assert_eq!(parse("true", B1).to_string(), "true"); diff --git a/cranelift/src/bugpoint.rs b/cranelift/src/bugpoint.rs index 374a5d869b..e16e91ae40 100644 --- a/cranelift/src/bugpoint.rs +++ b/cranelift/src/bugpoint.rs @@ -570,18 +570,6 @@ impl Mutator for RemoveUnusedEntities { .push(inst); } - InstructionData::RegSpill { dst, .. } => { - stack_slot_usage_map - .entry(dst) - .or_insert_with(Vec::new) - .push(inst); - } - InstructionData::RegFill { src, .. } => { - stack_slot_usage_map - .entry(src) - .or_insert_with(Vec::new) - .push(inst); - } _ => {} } } @@ -599,12 +587,6 @@ impl Mutator for RemoveUnusedEntities { | InstructionData::StackStore { stack_slot, .. } => { *stack_slot = new_stack_slot; } - InstructionData::RegSpill { dst, .. } => { - *dst = new_stack_slot; - } - InstructionData::RegFill { src, .. } => { - *src = new_stack_slot; - } _ => unreachable!(), } } diff --git a/cranelift/src/compile.rs b/cranelift/src/compile.rs index e9138f181c..6a498a1b3a 100644 --- a/cranelift/src/compile.rs +++ b/cranelift/src/compile.rs @@ -80,12 +80,10 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - // Compile and encode the result to machine code. let code_info = context .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stack_maps) - .map_err(|err| { - anyhow::anyhow!("{}", pretty_error(&context.func, Some(isa), err)) - })?; + .map_err(|err| anyhow::anyhow!("{}", pretty_error(&context.func, err)))?; if options.print { - println!("{}", context.func.display(isa)); + println!("{}", context.func.display()); } if options.disasm { diff --git a/cranelift/src/disasm.rs b/cranelift/src/disasm.rs index da593f8679..96972c6bc5 100644 --- a/cranelift/src/disasm.rs +++ b/cranelift/src/disasm.rs @@ -36,28 +36,6 @@ impl binemit::RelocSink for PrintRelocs { .unwrap(); } } - - fn reloc_jt(&mut self, where_: binemit::CodeOffset, r: binemit::Reloc, jt: ir::JumpTable) { - if self.flag_print { - writeln!(&mut self.text, "reloc_jt: {} {} at {}", r, jt, where_).unwrap(); - } - } - - fn reloc_constant( - &mut self, - code_offset: binemit::CodeOffset, - reloc: binemit::Reloc, - constant: ir::ConstantOffset, - ) { - if self.flag_print { - writeln!( - &mut self.text, - "reloc_constant: {} {} at {}", - reloc, constant, code_offset - ) - .unwrap(); - } - } } pub struct PrintTraps { @@ -111,28 +89,6 @@ cfg_if! { fn get_disassembler(isa: &dyn TargetIsa) -> Result { let cs = match isa.triple().architecture { - Architecture::Riscv32(_) => { - let mut cs = Capstone::new() - .riscv() - .mode(arch::riscv::ArchMode::RiscV32) - .extra_mode(std::iter::once(arch::riscv::ArchExtraMode::RiscVC)) - .build() - .map_err(map_caperr)?; - // See the comment of AArch64 below - cs.set_skipdata(true).map_err(map_caperr)?; - cs - } - Architecture::Riscv64(_) => { - let mut cs = Capstone::new() - .riscv() - .mode(arch::riscv::ArchMode::RiscV64) - .extra_mode(std::iter::once(arch::riscv::ArchExtraMode::RiscVC)) - .build() - .map_err(map_caperr)?; - // See the comment of AArch64 below - cs.set_skipdata(true).map_err(map_caperr)?; - cs - } Architecture::X86_32(_) => Capstone::new() .x86() .mode(arch::x86::ArchMode::Mode32) diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 20310f157c..35ca9857b3 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -220,7 +220,7 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - { println!("; Exported as \"{}\"", export_name); } - println!("{}", context.func.display(None)); + println!("{}", context.func.display()); vprintln!(options.verbose, ""); } let _ = terminal.reset(); @@ -263,15 +263,12 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - let mut stack_maps = PrintStackMaps::new(options.print); if options.check_translation { if let Err(errors) = context.verify(fisa) { - anyhow::bail!( - "{}", - pretty_verifier_error(&context.func, fisa.isa, None, errors) - ); + anyhow::bail!("{}", pretty_verifier_error(&context.func, None, errors)); } } else { let code_info = context .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stack_maps) - .map_err(|err| anyhow::anyhow!("{}", pretty_error(&context.func, fisa.isa, err)))?; + .map_err(|err| anyhow::anyhow!("{}", pretty_error(&context.func, err)))?; if options.print_size { println!( @@ -309,8 +306,11 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - let value_ranges = if options.value_ranges { Some( context - .build_value_labels_ranges(isa) - .expect("value location ranges"), + .mach_compile_result + .as_ref() + .unwrap() + .value_labels_ranges + .clone(), ) } else { None @@ -318,7 +318,6 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - println!( "{}", context.func.display_with(DisplayFunctionAnnotations { - isa: fisa.isa, value_ranges: value_ranges.as_ref(), }) ); diff --git a/cranelift/tests/bugpoint_test.clif b/cranelift/tests/bugpoint_test.clif index b2e9acc37e..ced5b9e809 100644 --- a/cranelift/tests/bugpoint_test.clif +++ b/cranelift/tests/bugpoint_test.clif @@ -300,7 +300,8 @@ block0(v0: i64, v1: i64, v2: i64): v241 -> v1 v256 -> v1 v262 -> v1 - v3, v4 = x86_sdivmodx v0, v1, v2 + v3 = imul v0, v1 + v4 = imul v1, v2 store aligned v4, v3 v5 = load.i64 aligned v2+8 store aligned v5, v3+8 diff --git a/cranelift/wasm/Cargo.toml b/cranelift/wasm/Cargo.toml index 31e84a517d..cd45a4faac 100644 --- a/cranelift/wasm/Cargo.toml +++ b/cranelift/wasm/Cargo.toml @@ -26,8 +26,7 @@ smallvec = "1.6.1" [dev-dependencies] wat = "1.0.37" target-lexicon = "0.12" -# Enable the riscv feature for cranelift-codegen, as some tests require it -cranelift-codegen = { path = "../codegen", version = "0.77.0", default-features = false, features = ["riscv"] } +cranelift-codegen = { path = "../codegen", version = "0.77.0", default-features = false } [features] default = ["std"] diff --git a/cranelift/wasm/src/func_translator.rs b/cranelift/wasm/src/func_translator.rs index f69ac2e77b..2b28cb54a7 100644 --- a/cranelift/wasm/src/func_translator.rs +++ b/cranelift/wasm/src/func_translator.rs @@ -329,7 +329,7 @@ mod tests { trans .translate_body(&mut validator, body, &mut ctx.func, &mut runtime.func_env()) .unwrap(); - debug!("{}", ctx.func.display(None)); + debug!("{}", ctx.func.display()); ctx.verify(&flags).unwrap(); } @@ -368,7 +368,7 @@ mod tests { trans .translate_body(&mut validator, body, &mut ctx.func, &mut runtime.func_env()) .unwrap(); - debug!("{}", ctx.func.display(None)); + debug!("{}", ctx.func.display()); ctx.verify(&flags).unwrap(); } @@ -411,7 +411,7 @@ mod tests { trans .translate_body(&mut validator, body, &mut ctx.func, &mut runtime.func_env()) .unwrap(); - debug!("{}", ctx.func.display(None)); + debug!("{}", ctx.func.display()); ctx.verify(&flags).unwrap(); } diff --git a/cranelift/wasm/tests/wasm_testsuite.rs b/cranelift/wasm/tests/wasm_testsuite.rs index a2e0f7db7c..fa07a7ba2c 100644 --- a/cranelift/wasm/tests/wasm_testsuite.rs +++ b/cranelift/wasm/tests/wasm_testsuite.rs @@ -1,12 +1,11 @@ -use cranelift_codegen::isa; +use cranelift_codegen::isa::{CallConv, TargetFrontendConfig}; use cranelift_codegen::print_errors::pretty_verifier_error; use cranelift_codegen::settings::{self, Flags}; use cranelift_codegen::verifier; use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex, ReturnMode}; use std::fs; use std::path::Path; -use std::str::FromStr; -use target_lexicon::triple; +use target_lexicon::PointerWidth; #[test] fn testsuite() { @@ -52,11 +51,15 @@ fn use_name_section() { ) .unwrap(); - let flags = Flags::new(settings::builder()); - let triple = triple!("riscv64"); - let isa = isa::lookup(triple).unwrap().finish(flags.clone()); let return_mode = ReturnMode::NormalReturns; - let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + let mut dummy_environ = DummyEnvironment::new( + TargetFrontendConfig { + default_call_conv: CallConv::SystemV, + pointer_width: PointerWidth::U32, + }, + return_mode, + false, + ); translate_module(data.as_ref(), &mut dummy_environ).unwrap(); @@ -82,15 +85,20 @@ fn read_module(path: &Path) -> Vec { } fn handle_module(data: Vec, flags: &Flags, return_mode: ReturnMode) { - let triple = triple!("riscv64"); - let isa = isa::lookup(triple).unwrap().finish(flags.clone()); - let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + let mut dummy_environ = DummyEnvironment::new( + TargetFrontendConfig { + default_call_conv: CallConv::SystemV, + pointer_width: PointerWidth::U64, + }, + return_mode, + false, + ); translate_module(&data, &mut dummy_environ).unwrap(); for func in dummy_environ.info.function_bodies.values() { - verifier::verify_function(func, &*isa) - .map_err(|errors| panic!("{}", pretty_verifier_error(func, Some(&*isa), None, errors))) + verifier::verify_function(func, flags) + .map_err(|errors| panic!("{}", pretty_verifier_error(func, None, errors))) .unwrap(); } } @@ -168,10 +176,14 @@ fn reachability_is_correct() { for (return_mode, wat, expected_reachability) in tests { println!("testing wat:\n{}", wat); - let flags = Flags::new(settings::builder()); - let triple = triple!("riscv64"); - let isa = isa::lookup(triple).unwrap().finish(flags.clone()); - let mut env = DummyEnvironment::new(isa.frontend_config(), return_mode, false); + let mut env = DummyEnvironment::new( + TargetFrontendConfig { + default_call_conv: CallConv::SystemV, + pointer_width: PointerWidth::U64, + }, + return_mode, + false, + ); env.test_expected_reachability(expected_reachability); let data = wat::parse_str(wat).unwrap(); translate_module(data.as_ref(), &mut env).unwrap(); diff --git a/crates/bench-api/Cargo.toml b/crates/bench-api/Cargo.toml index a67b48c501..2edf20800c 100644 --- a/crates/bench-api/Cargo.toml +++ b/crates/bench-api/Cargo.toml @@ -31,4 +31,3 @@ wat = "1.0" default = ["shuffling-allocator"] wasi-crypto = ["wasmtime-wasi-crypto"] wasi-nn = ["wasmtime-wasi-nn"] -old-x86-backend = ["wasmtime/old-x86-backend"] diff --git a/crates/c-api/Cargo.toml b/crates/c-api/Cargo.toml index 39e99eb183..97a0d759ae 100644 --- a/crates/c-api/Cargo.toml +++ b/crates/c-api/Cargo.toml @@ -34,7 +34,6 @@ cap-std = { version = "0.19.1", optional = true } [features] default = ['jitdump', 'wat', 'wasi', 'cache'] -lightbeam = ["wasmtime/lightbeam"] jitdump = ["wasmtime/jitdump"] cache = ["wasmtime/cache"] wasi = ['wasi-common', 'wasi-cap-std-sync', 'wasmtime-wasi', 'cap-std'] diff --git a/crates/c-api/include/wasmtime/config.h b/crates/c-api/include/wasmtime/config.h index 0150ee01a5..9f90059b09 100644 --- a/crates/c-api/include/wasmtime/config.h +++ b/crates/c-api/include/wasmtime/config.h @@ -26,19 +26,13 @@ typedef uint8_t wasmtime_strategy_t; * The default value is #WASMTIME_STRATEGY_AUTO. */ enum wasmtime_strategy_enum { // Strategy - /// Wasmtime will automatically determine whether to use Cranelift or - /// Lightbeam, and currently it will always pick Cranelift. This default may - /// change over time though. + /// Automatically picks the compilation backend, currently always defaulting + /// to Cranelift. WASMTIME_STRATEGY_AUTO, - /// Indicates that Cranelift will unconditionally use Cranelift to compile + /// Indicates that Wasmtime will unconditionally use Cranelift to compile /// WebAssembly code. WASMTIME_STRATEGY_CRANELIFT, - - /// Indicates that Cranelift will unconditionally use Lightbeam to compile - /// WebAssembly code. Note that Lightbeam isn't always enabled at compile - /// time, and if that's the case an error will be returned. - WASMTIME_STRATEGY_LIGHTBEAM, }; /** diff --git a/crates/c-api/include/wasmtime/func.h b/crates/c-api/include/wasmtime/func.h index f254d922aa..83339e8b2b 100644 --- a/crates/c-api/include/wasmtime/func.h +++ b/crates/c-api/include/wasmtime/func.h @@ -87,6 +87,75 @@ WASM_API_EXTERN void wasmtime_func_new( wasmtime_func_t *ret ); +/** + * \brief Callback signature for #wasmtime_func_new_unchecked. + * + * This is the function signature for host functions that can be made accessible + * to WebAssembly. The arguments to this function are: + * + * \param env user-provided argument passed to #wasmtime_func_new_unchecked + * \param caller a temporary object that can only be used during this function + * call. Used to acquire #wasmtime_context_t or caller's state + * \param args_and_results storage space for both the parameters to the + * function as well as the results of the function. The size of this + * array depends on the function type that the host function is created + * with, but it will be the maximum of the number of parameters and + * number of results. + * + * This callback can optionally return a #wasm_trap_t indicating that a trap + * should be raised in WebAssembly. It's expected that in this case the caller + * relinquishes ownership of the trap and it is passed back to the engine. + * + * This differs from #wasmtime_func_callback_t in that the payload of + * `args_and_results` does not have type information, nor does it have sizing + * information. This is especially unsafe because it's only valid within the + * particular #wasm_functype_t that the function was created with. The onus is + * on the embedder to ensure that `args_and_results` are all read correctly + * for parameters and all written for results within the execution of a + * function. + * + * Parameters will be listed starting at index 0 in the `args_and_results` + * array. Results are also written starting at index 0, which will overwrite + * the arguments. + */ +typedef wasm_trap_t* (*wasmtime_func_unchecked_callback_t)( + void *env, + wasmtime_caller_t* caller, + wasmtime_val_raw_t *args_and_results); + +/** + * \brief Creates a new host function in the same manner of #wasmtime_func_new, + * but the function-to-call has no type information available at runtime. + * + * This function is very similar to #wasmtime_func_new. The difference is that + * this version is "more unsafe" in that when the host callback is invoked there + * is no type information and no checks that the right types of values are + * produced. The onus is on the consumer of this API to ensure that all + * invariants are upheld such as: + * + * * The host callback reads parameters correctly and interprets their types + * correctly. + * * If a trap doesn't happen then all results are written to the results + * pointer. All results must have the correct type. + * * Types such as `funcref` cannot cross stores. + * * Types such as `externref` have valid reference counts. + * + * It's generally only recommended to use this if your application can wrap + * this in a safe embedding. This should not be frequently used due to the + * number of invariants that must be upheld on the wasm<->host boundary. On the + * upside, though, this flavor of host function will be faster to call than + * those created by #wasmtime_func_new (hence the reason for this function's + * existence). + */ +WASM_API_EXTERN void wasmtime_func_new_unchecked( + wasmtime_context_t *store, + const wasm_functype_t* type, + wasmtime_func_unchecked_callback_t callback, + void *env, + void (*finalizer)(void*), + wasmtime_func_t *ret +); + /** * \brief Returns the type of the function specified * @@ -142,6 +211,39 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_func_call( wasm_trap_t **trap ); +/** + * \brief Call a WebAssembly function in an "unchecked" fashion. + * + * This function is similar to #wasmtime_func_call except that there is no type + * information provided with the arguments (or sizing information). Consequently + * this is less safe to call since it's up to the caller to ensure that `args` + * has an appropriate size and all the parameters are configured with their + * appropriate values/types. Additionally all the results must be interpreted + * correctly if this function returns successfully. + * + * Parameters must be specified starting at index 0 in the `args_and_results` + * array. Results are written starting at index 0, which will overwrite + * the arguments. + * + * Callers must ensure that various correctness variants are upheld when this + * API is called such as: + * + * * The `args_and_results` pointer has enough space to hold all the parameters + * and all the results (but not at the same time). + * * Parameters must all be configured as if they were the correct type. + * * Values such as `externref` and `funcref` are valid within the store being + * called. + * + * When in doubt it's much safer to call #wasmtime_func_call. This function is + * faster than that function, but the tradeoff is that embeddings must uphold + * more invariants rather than relying on Wasmtime to check them for you. + */ +WASM_API_EXTERN wasm_trap_t *wasmtime_func_call_unchecked( + wasmtime_context_t *store, + const wasmtime_func_t *func, + wasmtime_val_raw_t *args_and_results +); + /** * \brief Loads a #wasmtime_extern_t from the caller's context * @@ -172,6 +274,32 @@ WASM_API_EXTERN bool wasmtime_caller_export_get( */ WASM_API_EXTERN wasmtime_context_t* wasmtime_caller_context(wasmtime_caller_t* caller); +/** + * \brief Converts a `raw` nonzero `funcref` value from #wasmtime_val_raw_t + * into a #wasmtime_func_t. + * + * This function can be used to interpret nonzero values of the `funcref` field + * of the #wasmtime_val_raw_t structure. It is assumed that `raw` does not have + * a value of 0, or otherwise the program will abort. + * + * Note that this function is unchecked and unsafe. It's only safe to pass + * values learned from #wasmtime_val_raw_t with the same corresponding + * #wasmtime_context_t that they were produced from. Providing arbitrary values + * to `raw` here or cross-context values with `context` is UB. + */ +WASM_API_EXTERN void wasmtime_func_from_raw( + wasmtime_context_t* context, + size_t raw, + wasmtime_func_t *ret); + +/** + * \brief Converts a `func` which belongs to `context` into a `usize` + * parameter that is suitable for insertion into a #wasmtime_val_raw_t. + */ +WASM_API_EXTERN size_t wasmtime_func_to_raw( + wasmtime_context_t* context, + const wasmtime_func_t *func); + #ifdef __cplusplus } // extern "C" #endif diff --git a/crates/c-api/include/wasmtime/linker.h b/crates/c-api/include/wasmtime/linker.h index 09bc0bb10a..edd52442df 100644 --- a/crates/c-api/include/wasmtime/linker.h +++ b/crates/c-api/include/wasmtime/linker.h @@ -102,6 +102,8 @@ WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define( * Note that this function does not create a #wasmtime_func_t. This creates a * store-independent function within the linker, allowing this function * definition to be used with multiple stores. + * + * For more information about host callbacks see #wasmtime_func_new. */ WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define_func( wasmtime_linker_t *linker, @@ -115,6 +117,27 @@ WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define_func( void (*finalizer)(void*) ); +/** + * \brief Defines a new function in this linker. + * + * This is the same as #wasmtime_linker_define_func except that it's the analog + * of #wasmtime_func_new_unchecked instead of #wasmtime_func_new. Be sure to + * consult the documentation of #wasmtime_linker_define_func for argument + * information as well as #wasmtime_func_new_unchecked for why this is an + * unsafe API. + */ +WASM_API_EXTERN wasmtime_error_t* wasmtime_linker_define_func_unchecked( + wasmtime_linker_t *linker, + const char *module, + size_t module_len, + const char *name, + size_t name_len, + const wasm_functype_t *ty, + wasmtime_func_unchecked_callback_t cb, + void *data, + void (*finalizer)(void*) +); + /** * \brief Defines WASI functions in this linker. * diff --git a/crates/c-api/include/wasmtime/val.h b/crates/c-api/include/wasmtime/val.h index 43b40ff77e..c17f605cd8 100644 --- a/crates/c-api/include/wasmtime/val.h +++ b/crates/c-api/include/wasmtime/val.h @@ -63,6 +63,29 @@ WASM_API_EXTERN wasmtime_externref_t *wasmtime_externref_clone(wasmtime_externre */ WASM_API_EXTERN void wasmtime_externref_delete(wasmtime_externref_t *ref); +/** + * \brief Converts a raw `externref` value coming from #wasmtime_val_raw_t into + * a #wasmtime_externref_t. + * + * Note that the returned #wasmtime_externref_t is an owned value that must be + * deleted via #wasmtime_externref_delete by the caller if it is non-null. + */ +WASM_API_EXTERN wasmtime_externref_t *wasmtime_externref_from_raw(wasmtime_context_t *context, size_t raw); + +/** + * \brief Converts a #wasmtime_externref_t to a raw value suitable for storing + * into a #wasmtime_val_raw_t. + * + * Note that the returned underlying value is not tracked by Wasmtime's garbage + * collector until it enters WebAssembly. This means that a GC may release the + * context's reference to the raw value, making the raw value invalid within the + * context of the store. Do not perform a GC between calling this function and + * passing it to WebAssembly. + */ +WASM_API_EXTERN size_t wasmtime_externref_to_raw( + wasmtime_context_t *context, + const wasmtime_externref_t *ref); + /// \brief Discriminant stored in #wasmtime_val::kind typedef uint8_t wasmtime_valkind_t; /// \brief Value of #wasmtime_valkind_t meaning that #wasmtime_val_t is an i32 @@ -117,6 +140,43 @@ typedef union wasmtime_valunion { wasmtime_v128 v128; } wasmtime_valunion_t; +/** + * \typedef wasmtime_val_raw_t + * \brief Convenience alias for #wasmtime_val_raw + * + * \union wasmtime_val_raw + * \brief Container for possible wasm values. + * + * This type is used on conjunction with #wasmtime_func_new_unchecked as well + * as #wasmtime_func_call_unchecked. Instances of this type do not have type + * information associated with them, it's up to the embedder to figure out + * how to interpret the bits contained within, often using some other channel + * to determine the type. + */ +typedef union wasmtime_val_raw { + /// Field for when this val is a WebAssembly `i32` value. + int32_t i32; + /// Field for when this val is a WebAssembly `i64` value. + int64_t i64; + /// Field for when this val is a WebAssembly `f32` value. + float32_t f32; + /// Field for when this val is a WebAssembly `f64` value. + float64_t f64; + /// Field for when this val is a WebAssembly `v128` value. + wasmtime_v128 v128; + /// Field for when this val is a WebAssembly `funcref` value. + /// + /// If this is set to 0 then it's a null funcref, otherwise this must be + /// passed to `wasmtime_func_from_raw` to determine the `wasmtime_func_t`. + size_t funcref; + /// Field for when this val is a WebAssembly `externref` value. + /// + /// If this is set to 0 then it's a null externref, otherwise this must be + /// passed to `wasmtime_externref_from_raw` to determine the + /// `wasmtime_externref_t`. + size_t externref; +} wasmtime_val_raw_t; + /** * \typedef wasmtime_val_t * \brief Convenience alias for #wasmtime_val_t diff --git a/crates/c-api/src/config.rs b/crates/c-api/src/config.rs index 6386ca6c91..64c3a88131 100644 --- a/crates/c-api/src/config.rs +++ b/crates/c-api/src/config.rs @@ -20,7 +20,6 @@ wasmtime_c_api_macros::declare_own!(wasm_config_t); pub enum wasmtime_strategy_t { WASMTIME_STRATEGY_AUTO, WASMTIME_STRATEGY_CRANELIFT, - WASMTIME_STRATEGY_LIGHTBEAM, } #[repr(u8)] @@ -114,7 +113,6 @@ pub extern "C" fn wasmtime_config_strategy_set( let result = c.config.strategy(match strategy { WASMTIME_STRATEGY_AUTO => Strategy::Auto, WASMTIME_STRATEGY_CRANELIFT => Strategy::Cranelift, - WASMTIME_STRATEGY_LIGHTBEAM => Strategy::Lightbeam, }); handle_result(result, |_cfg| {}) } diff --git a/crates/c-api/src/func.rs b/crates/c-api/src/func.rs index 7e6a2e8c2d..76ba0759c0 100644 --- a/crates/c-api/src/func.rs +++ b/crates/c-api/src/func.rs @@ -3,13 +3,12 @@ use crate::{ wasm_extern_t, wasm_functype_t, wasm_store_t, wasm_val_t, wasm_val_vec_t, wasmtime_error_t, wasmtime_extern_t, wasmtime_val_t, wasmtime_val_union, CStoreContext, CStoreContextMut, }; -use anyhow::anyhow; use std::ffi::c_void; use std::mem::{self, MaybeUninit}; use std::panic::{self, AssertUnwindSafe}; use std::ptr; use std::str; -use wasmtime::{AsContextMut, Caller, Extern, Func, Trap, Val}; +use wasmtime::{AsContextMut, Caller, Extern, Func, Trap, Val, ValRaw}; #[derive(Clone)] #[repr(transparent)] @@ -109,6 +108,22 @@ pub unsafe extern "C" fn wasm_func_new_with_env( }) } +/// Places the `args` into `dst` and additionally reserves space in `dst` for `results_size` +/// returns. The params/results slices are then returned separately. +fn translate_args<'a>( + dst: &'a mut Vec, + args: impl ExactSizeIterator, + results_size: usize, +) -> (&'a [Val], &'a mut [Val]) { + debug_assert!(dst.is_empty()); + let num_args = args.len(); + dst.reserve(args.len() + results_size); + dst.extend(args); + dst.extend((0..results_size).map(|_| Val::null())); + let (a, b) = dst.split_at_mut(num_args); + (a, b) +} + #[no_mangle] pub unsafe extern "C" fn wasm_func_call( func: &mut wasm_func_t, @@ -118,23 +133,20 @@ pub unsafe extern "C" fn wasm_func_call( let f = func.func(); let results = (*results).as_uninit_slice(); let args = (*args).as_slice(); - if results.len() != f.ty(func.ext.store.context()).results().len() { - return Box::into_raw(Box::new(wasm_trap_t::new( - anyhow!("wrong number of results provided").into(), - ))); - } - let params = args.iter().map(|i| i.val()).collect::>(); + let mut dst = Vec::new(); + let (wt_params, wt_results) = + translate_args(&mut dst, args.iter().map(|i| i.val()), results.len()); // We're calling arbitrary code here most of the time, and we in general // want to try to insulate callers against bugs in wasmtime/wasi/etc if we // can. As a result we catch panics here and transform them to traps to // allow the caller to have any insulation possible against Rust panics. let result = panic::catch_unwind(AssertUnwindSafe(|| { - f.call(func.ext.store.context_mut(), ¶ms) + f.call(func.ext.store.context_mut(), wt_params, wt_results) })); match result { - Ok(Ok(out)) => { - for (slot, val) in results.iter_mut().zip(out.into_vec().into_iter()) { + Ok(Ok(())) => { + for (slot, val) in results.iter_mut().zip(wt_results.iter().cloned()) { crate::initialize(slot, wasm_val_t::from_val(val)); } ptr::null_mut() @@ -196,6 +208,9 @@ pub type wasmtime_func_callback_t = extern "C" fn( usize, ) -> Option>; +pub type wasmtime_func_unchecked_callback_t = + extern "C" fn(*mut c_void, *mut wasmtime_caller_t, *mut ValRaw) -> Option>; + #[no_mangle] pub unsafe extern "C" fn wasmtime_func_new( store: CStoreContextMut<'_>, @@ -260,8 +275,37 @@ pub(crate) unsafe fn c_callback_to_rust_fn( } #[no_mangle] -pub unsafe extern "C" fn wasmtime_func_call( +pub unsafe extern "C" fn wasmtime_func_new_unchecked( store: CStoreContextMut<'_>, + ty: &wasm_functype_t, + callback: wasmtime_func_unchecked_callback_t, + data: *mut c_void, + finalizer: Option, + func: &mut Func, +) { + let ty = ty.ty().ty.clone(); + let cb = c_unchecked_callback_to_rust_fn(callback, data, finalizer); + *func = Func::new_unchecked(store, ty, cb); +} + +pub(crate) unsafe fn c_unchecked_callback_to_rust_fn( + callback: wasmtime_func_unchecked_callback_t, + data: *mut c_void, + finalizer: Option, +) -> impl Fn(Caller<'_, crate::StoreData>, *mut ValRaw) -> Result<(), Trap> { + let foreign = crate::ForeignData { data, finalizer }; + move |caller, values| { + let mut caller = wasmtime_caller_t { caller }; + match callback(foreign.data, &mut caller, values) { + None => Ok(()), + Some(trap) => Err(trap.trap), + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_call( + mut store: CStoreContextMut<'_>, func: &Func, args: *const wasmtime_val_t, nargs: usize, @@ -269,27 +313,31 @@ pub unsafe extern "C" fn wasmtime_func_call( nresults: usize, trap_ret: &mut *mut wasm_trap_t, ) -> Option> { - if nresults != func.ty(&store).results().len() { - return Some(Box::new(wasmtime_error_t::from(anyhow!( - "wrong number of results provided" - )))); - } - let params = crate::slice_from_raw_parts(args, nargs) - .iter() - .map(|i| i.to_val()) - .collect::>(); + let mut store = store.as_context_mut(); + let mut params = mem::take(&mut store.data_mut().wasm_val_storage); + let (wt_params, wt_results) = translate_args( + &mut params, + crate::slice_from_raw_parts(args, nargs) + .iter() + .map(|i| i.to_val()), + nresults, + ); // We're calling arbitrary code here most of the time, and we in general // want to try to insulate callers against bugs in wasmtime/wasi/etc if we // can. As a result we catch panics here and transform them to traps to // allow the caller to have any insulation possible against Rust panics. - let result = panic::catch_unwind(AssertUnwindSafe(|| func.call(store, ¶ms))); + let result = panic::catch_unwind(AssertUnwindSafe(|| { + func.call(&mut store, wt_params, wt_results) + })); match result { - Ok(Ok(out)) => { + Ok(Ok(())) => { let results = crate::slice_from_raw_parts_mut(results, nresults); - for (slot, val) in results.iter_mut().zip(out.into_vec().into_iter()) { - crate::initialize(slot, wasmtime_val_t::from_val(val)); + for (slot, val) in results.iter_mut().zip(wt_results.iter()) { + crate::initialize(slot, wasmtime_val_t::from_val(val.clone())); } + params.truncate(0); + store.data_mut().wasm_val_storage = params; None } Ok(Err(trap)) => match trap.downcast::() { @@ -313,6 +361,18 @@ pub unsafe extern "C" fn wasmtime_func_call( } } +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_call_unchecked( + store: CStoreContextMut<'_>, + func: &Func, + args_and_results: *mut ValRaw, +) -> *mut wasm_trap_t { + match func.call_unchecked(store, args_and_results) { + Ok(()) => ptr::null_mut(), + Err(trap) => Box::into_raw(Box::new(wasm_trap_t::new(trap))), + } +} + #[no_mangle] pub extern "C" fn wasmtime_func_type( store: CStoreContext<'_>, @@ -346,3 +406,17 @@ pub unsafe extern "C" fn wasmtime_caller_export_get( crate::initialize(item, which.into()); true } + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_from_raw( + store: CStoreContextMut<'_>, + raw: usize, + func: &mut Func, +) { + *func = Func::from_raw(store, raw).unwrap(); +} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_func_to_raw(store: CStoreContextMut<'_>, func: &Func) -> usize { + func.to_raw(store) +} diff --git a/crates/c-api/src/linker.rs b/crates/c-api/src/linker.rs index 1b582b50c7..1ff5624a52 100644 --- a/crates/c-api/src/linker.rs +++ b/crates/c-api/src/linker.rs @@ -1,7 +1,6 @@ -use crate::func::c_callback_to_rust_fn; use crate::{ bad_utf8, handle_result, wasm_engine_t, wasm_functype_t, wasm_trap_t, wasmtime_error_t, - wasmtime_extern_t, wasmtime_func_callback_t, wasmtime_module_t, CStoreContextMut, + wasmtime_extern_t, wasmtime_module_t, CStoreContextMut, }; use std::ffi::c_void; use std::mem::MaybeUninit; @@ -64,17 +63,39 @@ pub unsafe extern "C" fn wasmtime_linker_define_func( name: *const u8, name_len: usize, ty: &wasm_functype_t, - callback: wasmtime_func_callback_t, + callback: crate::wasmtime_func_callback_t, data: *mut c_void, finalizer: Option, ) -> Option> { let ty = ty.ty().ty.clone(); let module = to_str!(module, module_len); let name = to_str!(name, name_len); - let cb = c_callback_to_rust_fn(callback, data, finalizer); + let cb = crate::func::c_callback_to_rust_fn(callback, data, finalizer); handle_result(linker.linker.func_new(module, name, ty, cb), |_linker| ()) } +#[no_mangle] +pub unsafe extern "C" fn wasmtime_linker_define_func_unchecked( + linker: &mut wasmtime_linker_t, + module: *const u8, + module_len: usize, + name: *const u8, + name_len: usize, + ty: &wasm_functype_t, + callback: crate::wasmtime_func_unchecked_callback_t, + data: *mut c_void, + finalizer: Option, +) -> Option> { + let ty = ty.ty().ty.clone(); + let module = to_str!(module, module_len); + let name = to_str!(name, name_len); + let cb = crate::func::c_unchecked_callback_to_rust_fn(callback, data, finalizer); + handle_result( + linker.linker.func_new_unchecked(module, name, ty, cb), + |_linker| (), + ) +} + #[cfg(feature = "wasi")] #[no_mangle] pub extern "C" fn wasmtime_linker_define_wasi( diff --git a/crates/c-api/src/store.rs b/crates/c-api/src/store.rs index ea8994ddc9..cdee5969ba 100644 --- a/crates/c-api/src/store.rs +++ b/crates/c-api/src/store.rs @@ -2,7 +2,9 @@ use crate::{wasm_engine_t, wasmtime_error_t, wasmtime_val_t, ForeignData}; use std::cell::UnsafeCell; use std::ffi::c_void; use std::sync::Arc; -use wasmtime::{AsContext, AsContextMut, InterruptHandle, Store, StoreContext, StoreContextMut}; +use wasmtime::{ + AsContext, AsContextMut, InterruptHandle, Store, StoreContext, StoreContextMut, Val, +}; /// This representation of a `Store` is used to implement the `wasm.h` API. /// @@ -71,6 +73,10 @@ pub struct StoreData { /// Temporary storage for usage during a wasm->host call to store values /// in a slice we pass to the C API. pub hostcall_val_storage: Vec, + + /// Temporary storage for usage during host->wasm calls, same as above but + /// for a different direction. + pub wasm_val_storage: Vec, } #[no_mangle] @@ -90,6 +96,7 @@ pub extern "C" fn wasmtime_store_new( #[cfg(feature = "wasi")] wasi: None, hostcall_val_storage: Vec::new(), + wasm_val_storage: Vec::new(), }, ), }) diff --git a/crates/c-api/src/val.rs b/crates/c-api/src/val.rs index c088390cb2..a98ece8bfe 100644 --- a/crates/c-api/src/val.rs +++ b/crates/c-api/src/val.rs @@ -1,5 +1,8 @@ use crate::r#ref::{ref_to_val, WasmRefInner}; -use crate::{from_valtype, into_valtype, wasm_ref_t, wasm_valkind_t, wasmtime_valkind_t, WASM_I32}; +use crate::{ + from_valtype, into_valtype, wasm_ref_t, wasm_valkind_t, wasmtime_valkind_t, CStoreContextMut, + WASM_I32, +}; use std::ffi::c_void; use std::mem::{self, ManuallyDrop, MaybeUninit}; use std::ptr; @@ -288,3 +291,22 @@ pub extern "C" fn wasmtime_externref_clone(externref: ManuallyDrop) - #[no_mangle] pub extern "C" fn wasmtime_externref_delete(_val: Option) {} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_externref_to_raw( + cx: CStoreContextMut<'_>, + val: Option>, +) -> usize { + match val { + Some(ptr) => ptr.to_raw(cx), + None => 0, + } +} + +#[no_mangle] +pub unsafe extern "C" fn wasmtime_externref_from_raw( + _cx: CStoreContextMut<'_>, + val: usize, +) -> Option { + ExternRef::from_raw(val) +} diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 49a9069dc1..331f660f79 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -27,4 +27,3 @@ thiserror = "1.0.4" [features] all-arch = ["cranelift-codegen/all-arch"] -old-x86-backend = ["cranelift-codegen/old-x86-backend"] diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 170b8e841b..6983945461 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -73,31 +73,19 @@ impl Compiler { let start_srcloc = FilePos::new(offset as u32); let end_srcloc = FilePos::new((offset + len) as u32); - let instructions = if let Some(ref mcr) = &context.mach_compile_result { - // New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping - // tuples. - collect_address_maps( - body_len, - mcr.buffer - .get_srclocs_sorted() - .into_iter() - .map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))), - ) - } else { - // Old-style backend: we need to traverse the instruction/encoding info in the function. - let func = &context.func; - let mut blocks = func.layout.blocks().collect::>(); - blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase - - let encinfo = self.isa.encoding_info(); - collect_address_maps( - body_len, - blocks - .into_iter() - .flat_map(|block| func.inst_offsets(block, &encinfo)) - .map(|(offset, inst, size)| (func.srclocs[inst], offset, size)), - ) - }; + // New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping + // tuples. + let instructions = collect_address_maps( + body_len, + context + .mach_compile_result + .as_ref() + .unwrap() + .buffer + .get_srclocs_sorted() + .into_iter() + .map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))), + ); FunctionAddressMap { instructions: instructions.into(), @@ -184,22 +172,24 @@ impl wasmtime_environ::Compiler for Compiler { &mut trap_sink, &mut stack_map_sink, ) - .map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; + .map_err(|error| CompileError::Codegen(pretty_error(&context.func, error)))?; - let unwind_info = context.create_unwind_info(isa).map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; + let unwind_info = context + .create_unwind_info(isa) + .map_err(|error| CompileError::Codegen(pretty_error(&context.func, error)))?; let address_transform = self.get_function_address_map(&context, &input, code_buf.len() as u32); let ranges = if tunables.generate_native_debuginfo { - let ranges = context.build_value_labels_ranges(isa).map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; - Some(ranges) + Some( + context + .mach_compile_result + .as_ref() + .unwrap() + .value_labels_ranges + .clone(), + ) } else { None }; @@ -207,7 +197,6 @@ impl wasmtime_environ::Compiler for Compiler { let length = u32::try_from(code_buf.len()).unwrap(); Ok(Box::new(CompiledFunction { body: code_buf, - jt_offsets: context.func.jt_offsets, relocations: reloc_sink.func_relocs, value_labels_ranges: ranges.unwrap_or(Default::default()), stack_slots: context.func.stack_slots, @@ -542,17 +531,14 @@ impl Compiler { &mut trap_sink, &mut stack_map_sink, ) - .map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; + .map_err(|error| CompileError::Codegen(pretty_error(&context.func, error)))?; - let unwind_info = context.create_unwind_info(isa).map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; + let unwind_info = context + .create_unwind_info(isa) + .map_err(|error| CompileError::Codegen(pretty_error(&context.func, error)))?; Ok(CompiledFunction { body: code_buf, - jt_offsets: context.func.jt_offsets, unwind_info, relocations: reloc_sink.relocs, stack_slots: Default::default(), @@ -657,25 +643,6 @@ impl binemit::RelocSink for RelocSink { addend, }); } - - fn reloc_constant( - &mut self, - _code_offset: binemit::CodeOffset, - _reloc: binemit::Reloc, - _constant_offset: ir::ConstantOffset, - ) { - // Do nothing for now: cranelift emits constant data after the function code and also emits - // function code with correct relative offsets to the constant data. - } - - fn reloc_jt(&mut self, offset: binemit::CodeOffset, reloc: binemit::Reloc, jt: ir::JumpTable) { - self.func_relocs.push(Relocation { - reloc, - reloc_target: RelocationTarget::JumpTable(jt), - offset, - addend: 0, - }); - } } impl RelocSink { @@ -787,20 +754,4 @@ impl binemit::RelocSink for TrampolineRelocSink { addend, }); } - fn reloc_constant( - &mut self, - _code_offset: binemit::CodeOffset, - _reloc: binemit::Reloc, - _constant_offset: ir::ConstantOffset, - ) { - panic!("trampoline compilation should not produce constant relocs"); - } - fn reloc_jt( - &mut self, - _offset: binemit::CodeOffset, - _reloc: binemit::Reloc, - _jt: ir::JumpTable, - ) { - panic!("trampoline compilation should not produce jump table relocs"); - } } diff --git a/crates/cranelift/src/debug/transform/expression.rs b/crates/cranelift/src/debug/transform/expression.rs index 0ccfde8b03..a72e5daa42 100644 --- a/crates/cranelift/src/debug/transform/expression.rs +++ b/crates/cranelift/src/debug/transform/expression.rs @@ -1,7 +1,7 @@ use super::address_transform::AddressTransform; use crate::debug::ModuleMemoryOffset; use anyhow::{Context, Error, Result}; -use cranelift_codegen::ir::{LabelValueLoc, StackSlots, ValueLabel, ValueLoc}; +use cranelift_codegen::ir::{LabelValueLoc, StackSlots, ValueLabel}; use cranelift_codegen::isa::TargetIsa; use cranelift_codegen::ValueLabelsRanges; use cranelift_wasm::get_vmctx_value_label; @@ -129,40 +129,12 @@ impl CompiledExpression { } } -const X86_64_STACK_OFFSET: i64 = 16; - fn translate_loc( loc: LabelValueLoc, - frame_info: Option<&FunctionFrameInfo>, isa: &dyn TargetIsa, add_stack_value: bool, ) -> Result>> { Ok(match loc { - LabelValueLoc::ValueLoc(ValueLoc::Reg(reg)) => { - let machine_reg = isa.map_dwarf_register(reg)?; - let mut writer = ExpressionWriter::new(); - if add_stack_value { - writer.write_op_reg(machine_reg)?; - } else { - writer.write_op_breg(machine_reg)?; - writer.write_sleb128(0)?; - } - Some(writer.into_vec()) - } - LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => { - if let Some(frame_info) = frame_info { - if let Some(ss_offset) = frame_info.stack_slots[ss].offset { - let mut writer = ExpressionWriter::new(); - writer.write_op_breg(X86_64::RBP.0)?; - writer.write_sleb128(ss_offset as i64 + X86_64_STACK_OFFSET)?; - if !add_stack_value { - writer.write_op(gimli::constants::DW_OP_deref)?; - } - return Ok(Some(writer.into_vec())); - } - } - None - } LabelValueLoc::Reg(r) => { let machine_reg = isa.map_regalloc_reg_to_dwarf(r)?; let mut writer = ExpressionWriter::new(); @@ -183,8 +155,6 @@ fn translate_loc( } return Ok(Some(writer.into_vec())); } - - _ => None, }) } @@ -197,35 +167,6 @@ fn append_memory_deref( let mut writer = ExpressionWriter::new(); // FIXME for imported memory match vmctx_loc { - LabelValueLoc::ValueLoc(ValueLoc::Reg(vmctx_reg)) => { - let reg = isa.map_dwarf_register(vmctx_reg)? as u8; - writer.write_u8(gimli::constants::DW_OP_breg0.0 + reg)?; - let memory_offset = match frame_info.vmctx_memory_offset() { - Some(offset) => offset, - None => { - return Ok(false); - } - }; - writer.write_sleb128(memory_offset)?; - } - LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => { - if let Some(ss_offset) = frame_info.stack_slots[ss].offset { - writer.write_op_breg(X86_64::RBP.0)?; - writer.write_sleb128(ss_offset as i64 + X86_64_STACK_OFFSET)?; - writer.write_op(gimli::constants::DW_OP_deref)?; - writer.write_op(gimli::constants::DW_OP_consts)?; - let memory_offset = match frame_info.vmctx_memory_offset() { - Some(offset) => offset, - None => { - return Ok(false); - } - }; - writer.write_sleb128(memory_offset)?; - writer.write_op(gimli::constants::DW_OP_plus)?; - } else { - return Ok(false); - } - } LabelValueLoc::Reg(r) => { let reg = isa.map_regalloc_reg_to_dwarf(r)?; writer.write_op_breg(reg)?; @@ -251,9 +192,6 @@ fn append_memory_deref( writer.write_sleb128(memory_offset)?; writer.write_op(gimli::constants::DW_OP_plus)?; } - _ => { - return Ok(false); - } } writer.write_op(gimli::constants::DW_OP_deref)?; writer.write_op(gimli::constants::DW_OP_swap)?; @@ -416,9 +354,7 @@ impl CompiledExpression { CompiledExpressionPart::Local { label, trailing } => { let loc = *label_location.get(&label).context("label_location")?; - if let Some(expr) = - translate_loc(loc, frame_info, isa, *trailing)? - { + if let Some(expr) = translate_loc(loc, isa, *trailing)? { code_buf.extend_from_slice(&expr) } else { return Ok(None); @@ -1221,7 +1157,7 @@ mod tests { } fn create_mock_value_ranges() -> (ValueLabelsRanges, (ValueLabel, ValueLabel, ValueLabel)) { - use cranelift_codegen::ir::{LabelValueLoc, ValueLoc}; + use cranelift_codegen::ir::LabelValueLoc; use cranelift_codegen::ValueLocRange; use cranelift_entity::EntityRef; use std::collections::HashMap; @@ -1232,7 +1168,7 @@ mod tests { value_ranges.insert( value_0, vec![ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 0, end: 25, }], @@ -1240,7 +1176,7 @@ mod tests { value_ranges.insert( value_1, vec![ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 5, end: 30, }], @@ -1249,12 +1185,12 @@ mod tests { value_2, vec![ ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 0, end: 10, }, ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 20, end: 30, }, diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index b361bd0b21..f288252dd0 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -114,9 +114,6 @@ pub struct CompiledFunction { /// The machine code for this function. body: Vec, - /// The jump tables offsets (in the body). - jt_offsets: ir::JumpTableOffsets, - /// The unwind information. unwind_info: Option, @@ -181,8 +178,6 @@ enum RelocationTarget { UserFunc(FuncIndex), /// A compiler-generated libcall. LibCall(ir::LibCall), - /// Jump table index. - JumpTable(ir::JumpTable), } /// Creates a new cranelift `Signature` with no wasm params/results for the diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index 0166347ff4..a5bed41d89 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -305,22 +305,11 @@ impl<'a> ObjectBuilder<'a> { // seem too common though so aren't necessarily that important // to optimize. RelocationTarget::LibCall(call) => (self.libcalls[&call], 0), - RelocationTarget::JumpTable(jt) => (symbol_id, func.jt_offsets[jt]), }; let (kind, encoding, size) = match r.reloc { Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32), Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64), - // This is emitted by the old x86 backend and is only present - // for when the constant rodata is separated from the code - // itself. We don't do that, though, so we ignore these - // relocations since the offsets already listed here are already - // correct. - // - // FIXME(#3009): when the old backend is removed delete this - // case. - Reloc::X86PCRelRodata4 => continue, - other => unimplemented!("Unimplemented relocation {:?}", other), }; self.obj diff --git a/crates/fiber/src/unix.rs b/crates/fiber/src/unix.rs index d8fbff7250..e5f61e613c 100644 --- a/crates/fiber/src/unix.rs +++ b/crates/fiber/src/unix.rs @@ -60,7 +60,7 @@ impl FiberStack { let mmap = rsix::io::mmap_anonymous( ptr::null_mut(), mmap_len, - rsix::io::ProtFlags::NONE, + rsix::io::ProtFlags::empty(), rsix::io::MapFlags::PRIVATE, )?; diff --git a/crates/fuzzing/Cargo.toml b/crates/fuzzing/Cargo.toml index 4e6a0ca746..fcdcfac483 100644 --- a/crates/fuzzing/Cargo.toml +++ b/crates/fuzzing/Cargo.toml @@ -32,7 +32,8 @@ rusty_v8 = "0.27" [dev-dependencies] wat = "1.0.37" +# FIXME(#3251) should re-enable once spec interpreter won't time out # We only build the library containing the OCaml spec interpreter if the OCaml # toolchain is available--which is assumed here to be the case when fuzzing. -[target.'cfg(fuzzing)'.dependencies] -wasm-spec-interpreter = { path = "./wasm-spec-interpreter", features = ["build-libinterpret"] } +# [target.'cfg(fuzzing)'.dependencies] +# wasm-spec-interpreter = { path = "./wasm-spec-interpreter", features = ["build-libinterpret"] } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 361298ca3c..10eaa86844 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -18,7 +18,6 @@ use arbitrary::{Arbitrary, Unstructured}; /// testing between. #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] pub struct DifferentialConfig { - strategy: DifferentialStrategy, opt_level: OptLevel, force_jump_veneers: bool, } @@ -26,10 +25,7 @@ pub struct DifferentialConfig { impl DifferentialConfig { /// Convert this differential fuzzing config into a `wasmtime::Config`. pub fn to_wasmtime_config(&self) -> anyhow::Result { - let mut config = crate::fuzz_default_config(match self.strategy { - DifferentialStrategy::Cranelift => wasmtime::Strategy::Cranelift, - DifferentialStrategy::Lightbeam => wasmtime::Strategy::Lightbeam, - })?; + let mut config = crate::fuzz_default_config(wasmtime::Strategy::Cranelift)?; config.cranelift_opt_level(self.opt_level.to_wasmtime()); if self.force_jump_veneers { unsafe { @@ -40,12 +36,6 @@ impl DifferentialConfig { } } -#[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] -enum DifferentialStrategy { - Cranelift, - Lightbeam, -} - #[derive(Arbitrary, Clone, Debug, PartialEq, Eq, Hash)] enum OptLevel { None, @@ -142,12 +132,6 @@ impl wasm_smith::Config for WasmtimeDefaultConfig { 4 } - // Turn some wasm features default-on for those that have a finished - // implementation in Wasmtime. - fn simd_enabled(&self) -> bool { - true - } - fn reference_types_enabled(&self) -> bool { true } diff --git a/crates/fuzzing/src/lib.rs b/crates/fuzzing/src/lib.rs index 16b9441caf..4e93b52216 100644 --- a/crates/fuzzing/src/lib.rs +++ b/crates/fuzzing/src/lib.rs @@ -40,7 +40,6 @@ pub fn fuzz_default_config(strategy: wasmtime::Strategy) -> anyhow::Result, module: &Module) -> Op pub fn compile(wasm: &[u8], strategy: Strategy) { crate::init_fuzzing(); - let engine = Engine::new(&crate::fuzz_default_config(strategy).unwrap()).unwrap(); + let mut config = crate::fuzz_default_config(strategy).unwrap(); + config.wasm_module_linking(false); + let engine = Engine::new(&config).unwrap(); log_wasm(wasm); let _ = Module::new(&engine, wasm); } @@ -253,7 +255,7 @@ pub fn differential_execution( let configs: Vec<_> = match configs.iter().map(|c| c.to_wasmtime_config()).collect() { Ok(cs) => cs, // If the config is trying to use something that was turned off at - // compile time, eg lightbeam, just continue to the next fuzz input. + // compile time just continue to the next fuzz input. Err(_) => return, }; @@ -297,8 +299,10 @@ pub fn differential_execution( let ty = f.ty(&store); let params = dummy::dummy_values(ty.params()); + let mut results = vec![Val::I32(0); ty.results().len()]; let this_result = f - .call(&mut store, ¶ms) + .call(&mut store, ¶ms, &mut results) + .map(|()| results.into()) .map_err(|e| e.downcast::().unwrap()); let existing_result = export_func_results @@ -312,7 +316,7 @@ pub fn differential_execution( match instance.get_export(&mut *store, "hangLimitInitializer") { None => return, Some(Extern::Func(f)) => { - f.call(store, &[]) + f.call(store, &[], &mut []) .expect("initializing the hang limit should not fail"); } Some(_) => panic!("unexpected hangLimitInitializer export"), @@ -478,7 +482,8 @@ pub fn make_api_calls(api: crate::generators::api::ApiCalls) { let f = &funcs[nth]; let ty = f.ty(&store); let params = dummy::dummy_values(ty.params()); - let _ = f.call(store, ¶ms); + let mut results = vec![Val::I32(0); ty.results().len()]; + let _ = f.call(store, ¶ms, &mut results); } } } @@ -636,7 +641,7 @@ pub fn table_ops( let args: Vec<_> = (0..ops.num_params()) .map(|_| Val::ExternRef(Some(ExternRef::new(CountDrops(num_dropped.clone()))))) .collect(); - let _ = run.call(&mut store, &args); + let _ = run.call(&mut store, &args, &mut []); } assert_eq!(num_dropped.load(SeqCst), expected_drops.load(SeqCst)); @@ -740,7 +745,7 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con // Introspect wasmtime module to find name of an exported function and of an // exported memory. - let (func_name, _ty) = first_exported_function(&wasmtime_module)?; + let (func_name, ty) = first_exported_function(&wasmtime_module)?; let memory_name = first_exported_memory(&wasmtime_module)?; let wasmi_mem_export = wasmi_instance.export_by_name(memory_name).unwrap(); @@ -755,8 +760,10 @@ pub fn differential_wasmi_execution(wasm: &[u8], config: &crate::generators::Con let wasmtime_main = wasmtime_instance .get_func(&mut wasmtime_store, func_name) .expect("function export is present"); - let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, &[]); - let wasmtime_val = wasmtime_vals.map(|v| v.iter().next().cloned()); + let mut wasmtime_results = vec![Val::I32(0); ty.results().len()]; + let wasmtime_val = wasmtime_main + .call(&mut wasmtime_store, &[], &mut wasmtime_results) + .map(|()| wasmtime_results.get(0).cloned()); debug!( "Successful execution: wasmi returned {:?}, wasmtime returned {:?}", @@ -918,15 +925,17 @@ fn run_in_wasmtime( .context("Wasmtime cannot instantiate module")?; // Find the first exported function. - let (func_name, _ty) = + let (func_name, ty) = first_exported_function(&wasmtime_module).context("Cannot find exported function")?; let wasmtime_main = wasmtime_instance .get_func(&mut wasmtime_store, &func_name[..]) .expect("function export is present"); // Execute the function and return the values. - let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, params); - wasmtime_vals.map(|v| v.to_vec()) + let mut results = vec![Val::I32(0); ty.results().len()]; + wasmtime_main + .call(&mut wasmtime_store, params, &mut results) + .map(|()| results) } // Introspect wasmtime module to find the name of the first exported function. diff --git a/crates/fuzzing/src/oracles/v8.rs b/crates/fuzzing/src/oracles/v8.rs index 0e66c38cb9..4016b1ffdf 100644 --- a/crates/fuzzing/src/oracles/v8.rs +++ b/crates/fuzzing/src/oracles/v8.rs @@ -93,7 +93,9 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config let wasmtime_main = wasmtime_instance .get_func(&mut wasmtime_store, func) .expect("function export is present"); - let wasmtime_vals = wasmtime_main.call(&mut wasmtime_store, &wasmtime_params); + let mut wasmtime_vals = vec![Val::I32(0); ty.results().len()]; + let wasmtime_result = + wasmtime_main.call(&mut wasmtime_store, &wasmtime_params, &mut wasmtime_vals); log::trace!("finished wasmtime invocation"); // V8: call the first exported func @@ -112,15 +114,15 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config log::trace!("finished v8 invocation"); // Verify V8 and wasmtime match - match (wasmtime_vals, v8_vals) { - (Ok(wasmtime), Ok(v8)) => { + match (wasmtime_result, v8_vals) { + (Ok(()), Ok(v8)) => { log::trace!("both executed successfully"); - match wasmtime.len() { + match wasmtime_vals.len() { 0 => assert!(v8.is_undefined()), - 1 => assert_val_match(&wasmtime[0], &v8, &mut scope), + 1 => assert_val_match(&wasmtime_vals[0], &v8, &mut scope), _ => { let array = v8::Local::<'_, v8::Array>::try_from(v8).unwrap(); - for (i, wasmtime) in wasmtime.iter().enumerate() { + for (i, wasmtime) in wasmtime_vals.iter().enumerate() { let v8 = array.get_index(&mut scope, i as u32).unwrap(); assert_val_match(wasmtime, &v8, &mut scope); // .. @@ -128,7 +130,7 @@ pub fn differential_v8_execution(wasm: &[u8], config: &crate::generators::Config } } } - (Ok(_), Err(msg)) => { + (Ok(()), Err(msg)) => { panic!("wasmtime succeeded at invocation, v8 failed: {}", msg) } (Err(err), Ok(_)) => { @@ -285,7 +287,18 @@ fn assert_error_matches(wasmtime: &anyhow::Error, v8: &str) { TrapCode::IntegerDivisionByZero => { return verify_v8(&["divide by zero", "remainder by zero"]) } - TrapCode::StackOverflow => return verify_v8(&["call stack size exceeded"]), + TrapCode::StackOverflow => { + return verify_v8(&[ + "call stack size exceeded", + // Similar to the above comment in `UnreachableCodeReached` + // if wasmtime hits a stack overflow but v8 ran all the way + // to when the `unreachable` instruction was hit then that's + // ok. This just means that wasmtime either has less optimal + // codegen or different limits on the stack than v8 does, + // which isn't an issue per-se. + "unreachable", + ]); + } TrapCode::IndirectCallToNull => return verify_v8(&["null function"]), TrapCode::TableOutOfBounds => { return verify_v8(&[ diff --git a/crates/fuzzing/wasm-spec-interpreter/src/lib.rs b/crates/fuzzing/wasm-spec-interpreter/src/lib.rs index a2f46a3a9a..20440c500e 100644 --- a/crates/fuzzing/wasm-spec-interpreter/src/lib.rs +++ b/crates/fuzzing/wasm-spec-interpreter/src/lib.rs @@ -28,6 +28,7 @@ mod without_library; #[cfg(not(feature = "has-libinterpret"))] pub use without_library::*; +// FIXME(#3251) should re-enable once spec interpreter won't time out // If the user is fuzzing`, we expect the OCaml library to have been built. -#[cfg(all(fuzzing, not(feature = "has-libinterpret")))] -compile_error!("The OCaml library was not built."); +// #[cfg(all(fuzzing, not(feature = "has-libinterpret")))] +// compile_error!("The OCaml library was not built."); diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 2dc3d3799f..acfb5745b1 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -88,6 +88,7 @@ impl CodeMemory { mmap: &self.mmap, text: &[], }; + let mmap_ptr = self.mmap.as_ptr() as u64; // Sanity-check that all sections are aligned correctly. for section in ret.obj.sections() { @@ -98,7 +99,7 @@ impl CodeMemory { if section.align() == 0 || data.len() == 0 { continue; } - if data.as_ptr() as u64 % section.align() != 0 { + if (data.as_ptr() as u64 - mmap_ptr) % section.align() != 0 { bail!( "section `{}` isn't aligned to {:#x}", section.name().unwrap_or("ERROR"), diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index ef499ad446..ebe823b51b 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -397,13 +397,17 @@ impl CompiledModule { } /// Returns the per-signature trampolines for this module. - pub fn trampolines(&self) -> impl Iterator + '_ { + pub fn trampolines(&self) -> impl Iterator + '_ { let code = self.code(); self.trampolines.iter().map(move |info| { - (info.signature, unsafe { - let ptr = &code[info.start as usize]; - std::mem::transmute::<*const u8, VMTrampoline>(ptr) - }) + ( + info.signature, + unsafe { + let ptr = &code[info.start as usize]; + std::mem::transmute::<*const u8, VMTrampoline>(ptr) + }, + info.length as usize, + ) }) } diff --git a/crates/jit/src/profiling.rs b/crates/jit/src/profiling.rs index 6e90b6b7e2..7fddcb296f 100644 --- a/crates/jit/src/profiling.rs +++ b/crates/jit/src/profiling.rs @@ -29,7 +29,10 @@ pub use vtune::VTuneAgent; /// Common interface for profiling tools. pub trait ProfilingAgent: Send + Sync + 'static { /// Notify the profiler of a new module loaded into memory - fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) -> (); + fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>); + /// Notify the profiler that the object file provided contains + /// dynamically-generated trampolines which are now being loaded. + fn trampoline_load(&self, file: &object::File<'_>); } /// Default agent for unsupported profiling build. @@ -54,7 +57,8 @@ impl Error for NullProfilerAgentError { } impl ProfilingAgent for NullProfilerAgent { - fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) -> () {} + fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} + fn trampoline_load(&self, _file: &object::File<'_>) {} } #[allow(dead_code)] diff --git a/crates/jit/src/profiling/jitdump_disabled.rs b/crates/jit/src/profiling/jitdump_disabled.rs index 122468d203..a007a72661 100644 --- a/crates/jit/src/profiling/jitdump_disabled.rs +++ b/crates/jit/src/profiling/jitdump_disabled.rs @@ -20,4 +20,5 @@ impl JitDumpAgent { impl ProfilingAgent for JitDumpAgent { fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} + fn trampoline_load(&self, _file: &object::File<'_>) {} } diff --git a/crates/jit/src/profiling/jitdump_linux.rs b/crates/jit/src/profiling/jitdump_linux.rs index 9680ce8984..4663011468 100644 --- a/crates/jit/src/profiling/jitdump_linux.rs +++ b/crates/jit/src/profiling/jitdump_linux.rs @@ -21,6 +21,7 @@ use std::ptr; use std::sync::Mutex; use std::{borrow, mem, process}; use target_lexicon::Architecture; +use wasmtime_environ::EntityRef; use object::elf; @@ -202,6 +203,9 @@ impl ProfilingAgent for JitDumpAgent { fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) { self.state.lock().unwrap().module_load(module, dbg_image); } + fn trampoline_load(&self, file: &object::File<'_>) { + self.state.lock().unwrap().trampoline_load(file) + } } impl State { @@ -280,7 +284,7 @@ impl State { } /// Sent when a method is compiled and loaded into memory by the VM. - pub fn module_load(&mut self, module: &CompiledModule, dbg_image: Option<&[u8]>) -> () { + pub fn module_load(&mut self, module: &CompiledModule, dbg_image: Option<&[u8]>) { let pid = process::id(); let tid = pid; // ThreadId does appear to track underlying thread. Using PID. @@ -299,6 +303,52 @@ impl State { self.dump_code_load_record(&name, addr, len, timestamp, pid, tid); } } + for (idx, func, len) in module.trampolines() { + let (addr, len) = (func as usize as *const u8, len); + let timestamp = self.get_time_stamp(); + let name = format!("wasm::trampoline[{}]", idx.index()); + self.dump_code_load_record(&name, addr, len, timestamp, pid, tid); + } + } + + fn trampoline_load(&mut self, image: &object::File<'_>) { + use object::{ObjectSection, ObjectSymbol, SectionKind, SymbolKind}; + let pid = process::id(); + let tid = pid; + + let text_base = match image.sections().find(|s| s.kind() == SectionKind::Text) { + Some(section) => match section.data() { + Ok(data) => data.as_ptr() as usize, + Err(_) => return, + }, + None => return, + }; + + for sym in image.symbols() { + if !sym.is_definition() { + continue; + } + if sym.kind() != SymbolKind::Text { + continue; + } + let address = sym.address(); + let size = sym.size(); + if address == 0 || size == 0 { + continue; + } + if let Ok(name) = sym.name() { + let addr = text_base + address as usize; + let timestamp = self.get_time_stamp(); + self.dump_code_load_record( + &name, + addr as *const u8, + size as usize, + timestamp, + pid, + tid, + ); + } + } } fn dump_code_load_record( @@ -309,7 +359,7 @@ impl State { timestamp: u64, pid: u32, tid: u32, - ) -> () { + ) { let name_len = method_name.len() + 1; let size_limit = mem::size_of::(); diff --git a/crates/jit/src/profiling/vtune_disabled.rs b/crates/jit/src/profiling/vtune_disabled.rs index 9dd943944f..7a84fd03d7 100644 --- a/crates/jit/src/profiling/vtune_disabled.rs +++ b/crates/jit/src/profiling/vtune_disabled.rs @@ -20,4 +20,5 @@ impl VTuneAgent { impl ProfilingAgent for VTuneAgent { fn module_load(&self, _module: &crate::CompiledModule, _dbg_image: Option<&[u8]>) {} + fn trampoline_load(&self, _file: &object::File<'_>) {} } diff --git a/crates/jit/src/profiling/vtune_linux.rs b/crates/jit/src/profiling/vtune_linux.rs index bc51cb58f1..eee43ee69a 100644 --- a/crates/jit/src/profiling/vtune_linux.rs +++ b/crates/jit/src/profiling/vtune_linux.rs @@ -112,6 +112,9 @@ impl ProfilingAgent for VTuneAgent { fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) { self.state.lock().unwrap().module_load(module, dbg_image); } + fn trampoline_load(&self, _file: &object::File<'_>) { + // TODO: needs an implementation + } } impl State { diff --git a/crates/lightbeam/Cargo.toml b/crates/lightbeam/Cargo.toml deleted file mode 100644 index a974df5565..0000000000 --- a/crates/lightbeam/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -[package] -name = "lightbeam" -version = "0.30.0" -authors = ["The Lightbeam Project Developers"] -description = "An optimising one-pass streaming compiler for WebAssembly" -license = "Apache-2.0 WITH LLVM-exception" -repository = "https://github.com/bytecodealliance/wasmtime" -readme = "README.md" -categories = ["wasm"] -keywords = ["webassembly", "wasm", "compile", "compiler", "jit"] -edition = "2018" - -[dependencies] -arrayvec = "0.5" -capstone = "0.9.0" -cranelift-codegen = { path = "../../cranelift/codegen", version = "0.77.0" } -derive_more = "0.99" -dynasm = "1.0.0" -dynasmrt = "1.0.0" -iter-enum = "1" -itertools = "0.10.0" -memoffset = "0.6.0" -more-asserts = "0.2.1" -smallvec = "1.6.1" -thiserror = "1.0.9" -typemap = "0.3" -wasmparser = "0.80" - -[dev-dependencies] -lazy_static = "1.2" -wat = "1.0.37" -quickcheck = "1.0.0" -anyhow = "1.0" - -[badges] -maintenance = { status = "experimental" } diff --git a/crates/lightbeam/README.md b/crates/lightbeam/README.md deleted file mode 100644 index 85d787481a..0000000000 --- a/crates/lightbeam/README.md +++ /dev/null @@ -1,168 +0,0 @@ -# Lightbeam - -Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended for use in [Wasmtime][wasmtime]. - -[wasmtime]: https://github.com/bytecodealliance/wasmtime - -## Quality of output - -Already - with a very small number of relatively simple optimisation rules - Lightbeam produces surprisingly high-quality output considering how restricted it is. It even produces better code than Cranelift, Firefox or both for some workloads. Here's a very simple example, this recursive fibonacci function in Rust: - -```rust -fn fib(n: i32) -> i32 { - if n == 0 || n == 1 { - 1 - } else { - fib(n - 1) + fib(n - 2) - } -} -``` - -When compiled with optimisations enabled, rustc will produce the following WebAssembly: - -```rust -(module - (func $fib (param $p0 i32) (result i32) - (local $l1 i32) - (set_local $l1 - (i32.const 1)) - (block $B0 - (br_if $B0 - (i32.lt_u - (get_local $p0) - (i32.const 2))) - (set_local $l1 - (i32.const 1)) - (loop $L1 - (set_local $l1 - (i32.add - (call $fib - (i32.add - (get_local $p0) - (i32.const -1))) - (get_local $l1))) - (br_if $L1 - (i32.gt_u - (tee_local $p0 - (i32.add - (get_local $p0) - (i32.const -2))) - (i32.const 1))))) - (get_local $l1))) -``` - -Firefox's optimising compiler produces the following assembly (labels cleaned up somewhat): - -```asm -fib: - sub rsp, 0x18 - cmp qword ptr [r14 + 0x28], rsp - jae stack_overflow - mov dword ptr [rsp + 0xc], edi - cmp edi, 2 - jae .Lelse - mov eax, 1 - mov dword ptr [rsp + 8], eax - jmp .Lreturn -.Lelse: - mov dword ptr [rsp + 0xc], edi - mov eax, 1 - mov dword ptr [rsp + 8], eax -.Lloop: - mov edi, dword ptr [rsp + 0xc] - add edi, -1 - call 0 - mov ecx, dword ptr [rsp + 8] - add ecx, eax - mov dword ptr [rsp + 8], ecx - mov ecx, dword ptr [rsp + 0xc] - add ecx, -2 - mov dword ptr [rsp + 0xc], ecx - cmp ecx, 1 - ja .Lloop -.Lreturn: - mov eax, dword ptr [rsp + 8] - nop - add rsp, 0x18 - ret -``` - -Cranelift with optimisations enabled produces similar: - -```asm -fib: - push rbp - mov rbp, rsp - sub rsp, 0x20 - mov qword ptr [rsp + 0x10], rdi - mov dword ptr [rsp + 0x1c], esi - mov eax, 1 - mov dword ptr [rsp + 0x18], eax - mov eax, dword ptr [rsp + 0x1c] - cmp eax, 2 - jb .Lreturn - movabs rax, 0 - mov qword ptr [rsp + 8], rax -.Lloop: - mov eax, dword ptr [rsp + 0x1c] - add eax, -1 - mov rcx, qword ptr [rsp + 8] - mov rdx, qword ptr [rsp + 0x10] - mov rdi, rdx - mov esi, eax - call rcx - mov ecx, dword ptr [rsp + 0x18] - add eax, ecx - mov dword ptr [rsp + 0x18], eax - mov eax, dword ptr [rsp + 0x1c] - add eax, -2 - mov dword ptr [rsp + 0x1c], eax - mov eax, dword ptr [rsp + 0x1c] - cmp eax, 1 - ja .Lloop -.Lreturn: - mov eax, dword ptr [rsp + 0x18] - add rsp, 0x20 - pop rbp - ret -``` - -Whereas Lightbeam produces smaller code with far fewer memory accesses than both (and fewer blocks than Firefox's output): - -```asm -fib: - cmp esi, 2 - mov eax, 1 - jb .Lreturn - mov eax, 1 -.Lloop: - mov rcx, rsi - add ecx, 0xffffffff - push rsi - push rax - push rax - mov rsi, rcx - call fib - add eax, [rsp + 8] - mov rcx, [rsp + 0x10] - add ecx, 0xfffffffe - cmp ecx, 1 - mov rsi, rcx - lea rsp, [rsp + 0x18] - ja .Lloop -.Lreturn: - ret -``` - -Now obviously I'm not advocating for replacing Firefox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs. - -## Specification compliance - -Lightbeam passes 100% of the specification test suite, but that doesn't necessarily mean that it's 100% specification-compliant. Hopefully as we run a fuzzer against it we can find any issues and get Lightbeam to a state where it can be used in production. - -## Getting involved - -You can file issues in the [Wasmtime issue tracker][Wasmtime issue tracker]. If you want to get involved jump into the [Bytecode Alliance Zulip][bytecodealliance-zulip] and someone can direct you to the right place. I wish I could say "the most useful thing you can do is play with it and open issues where you find problems" but until it passes the spec suite that won't be very helpful. - -[bytecodealliance-zulip]: https://bytecodealliance.zulipchat.com/ -[Wasmtime issue tracker]: https://github.com/bytecodealliance/wasmtime/issues diff --git a/crates/lightbeam/examples/test.rs b/crates/lightbeam/examples/test.rs deleted file mode 100644 index 12d54193ae..0000000000 --- a/crates/lightbeam/examples/test.rs +++ /dev/null @@ -1,16 +0,0 @@ -use lightbeam::translate; - -const WAT: &str = r#" -(module - (func (param i32) (param i32) (result i32) (i32.add (get_local 0) (get_local 1))) -) -"#; - -fn main() -> anyhow::Result<()> { - let data = wat::parse_str(WAT)?; - let translated = translate(&data)?; - let result: u32 = translated.execute_func(0, (5u32, 3u32))?; - println!("f(5, 3) = {}", result); - - Ok(()) -} diff --git a/crates/lightbeam/src/backend.rs b/crates/lightbeam/src/backend.rs deleted file mode 100644 index 240364320b..0000000000 --- a/crates/lightbeam/src/backend.rs +++ /dev/null @@ -1,6134 +0,0 @@ -#![allow(clippy::float_cmp)] - -use self::registers::*; -use crate::error::Error; -use crate::microwasm::{BrTarget, Ieee32, Ieee64, SignlessType, Type, Value, F32, F64, I32, I64}; -use crate::module::ModuleContext; -use cranelift_codegen::{ - binemit, - ir::{self, SourceLoc, TrapCode}, -}; -use dynasm::dynasm; -use dynasmrt::x64::Assembler; -use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer}; -use std::{ - cmp::Ordering, - convert::{TryFrom, TryInto}, - fmt::Display, - hash::Hash, - iter, mem, - ops::{Deref, RangeInclusive}, -}; -// use wasmtime_environ::BuiltinFunctionIndex; - -mod magic { - /// An index type for builtin functions. - pub struct BuiltinFunctionIndex(u32); - - impl BuiltinFunctionIndex { - /// Returns an index for wasm's `memory.grow` builtin function. - pub const fn get_memory32_grow_index() -> Self { - Self(0) - } - /// Returns an index for wasm's imported `memory.grow` builtin function. - pub const fn get_imported_memory32_grow_index() -> Self { - Self(1) - } - /// Returns an index for wasm's `memory.size` builtin function. - pub const fn get_memory32_size_index() -> Self { - Self(2) - } - /// Returns an index for wasm's imported `memory.size` builtin function. - pub const fn get_imported_memory32_size_index() -> Self { - Self(3) - } - - /// Return the index as an u32 number. - pub const fn index(&self) -> u32 { - self.0 - } - } -} - -use magic::BuiltinFunctionIndex; - -/// Size of a pointer on the target in bytes. -const WORD_SIZE: u32 = 8; - -type RegId = u8; - -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] -pub enum GPR { - Rq(RegId), - Rx(RegId), -} - -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] -pub enum GPRType { - Rq, - Rx, -} - -impl From for GPRType { - fn from(other: SignlessType) -> GPRType { - match other { - I32 | I64 => GPRType::Rq, - F32 | F64 => GPRType::Rx, - } - } -} - -impl From for Option { - fn from(other: SignlessType) -> Self { - Some(other.into()) - } -} - -impl GPR { - fn type_(self) -> GPRType { - match self { - GPR::Rq(_) => GPRType::Rq, - GPR::Rx(_) => GPRType::Rx, - } - } - - fn rq(self) -> Option { - match self { - GPR::Rq(r) => Some(r), - GPR::Rx(_) => None, - } - } - - fn rx(self) -> Option { - match self { - GPR::Rx(r) => Some(r), - GPR::Rq(_) => None, - } - } -} - -fn arg_locs>( - types: I, -) -> impl ExactSizeIterator + DoubleEndedIterator + Clone -where - I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, -{ - // TODO: VmCtx is in the first register - let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.iter(); - let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.iter(); - let mut stack_idx = 0; - - types - .into_iter() - .map(move |ty| { - match ty { - I32 | I64 => int_gpr_iter.next(), - F32 | F64 => float_gpr_iter.next(), - } - .map(|&r| CCLoc::Reg(r)) - .unwrap_or_else(|| { - let out = CCLoc::Stack(stack_idx); - stack_idx += 1; - out - }) - }) - // Since we only advance the iterators based on the values in `types`, - // we can't do this lazily. - .collect::>() - .into_iter() -} - -fn arg_locs_skip_caller_vmctx>( - types: I, -) -> impl ExactSizeIterator + DoubleEndedIterator + Clone -where - I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, -{ - #[derive(Debug, Clone)] - struct WithInt { - caller_vmctx_ty: Option, - iter: I, - } - - impl Iterator for WithInt - where - I: Iterator, - { - type Item = SignlessType; - - fn next(&mut self) -> Option { - self.caller_vmctx_ty.take().or_else(|| self.iter.next()) - } - - fn size_hint(&self) -> (usize, Option) { - let ty_len = if self.caller_vmctx_ty.is_some() { 1 } else { 0 }; - let (lower, upper) = self.iter.size_hint(); - - (lower + ty_len, upper.map(|u| u + ty_len)) - } - } - - impl DoubleEndedIterator for WithInt - where - I: DoubleEndedIterator, - { - fn next_back(&mut self) -> Option { - self.iter - .next_back() - .or_else(|| self.caller_vmctx_ty.take()) - } - } - - impl ExactSizeIterator for WithInt where I: ExactSizeIterator {} - - arg_locs(WithInt { - caller_vmctx_ty: Some(I32), - iter: types.into_iter(), - }) - .skip(1) -} - -pub fn ret_locs(types: impl IntoIterator) -> Result, Error> { - let types = types.into_iter(); - let mut out = Vec::with_capacity(types.size_hint().0); - // TODO: VmCtx is in the first register - let mut int_gpr_iter = INTEGER_RETURN_GPRS.iter(); - let mut float_gpr_iter = FLOAT_RETURN_GPRS.iter(); - - for ty in types { - match ty { - I32 | I64 => match int_gpr_iter.next() { - None => { - return Err(Error::Microwasm( - "We don't support stack returns yet".to_string(), - )) - } - Some(val) => out.push(CCLoc::Reg(*val)), - }, - F32 | F64 => match float_gpr_iter.next() { - None => { - return Err(Error::Microwasm( - "We don't support stack returns yet".to_string(), - )) - } - Some(val) => out.push(CCLoc::Reg(*val)), - }, - } - } - - Ok(out) -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -struct GPRs { - bits: u16, -} - -impl GPRs { - fn new() -> Self { - Self { bits: 0 } - } -} - -#[allow(dead_code)] -pub mod registers { - use super::{RegId, GPR}; - - pub mod rq { - use super::RegId; - - pub const RAX: RegId = 0; - pub const RCX: RegId = 1; - pub const RDX: RegId = 2; - pub const RBX: RegId = 3; - pub const RSP: RegId = 4; - pub const RBP: RegId = 5; - pub const RSI: RegId = 6; - pub const RDI: RegId = 7; - pub const R8: RegId = 8; - pub const R9: RegId = 9; - pub const R10: RegId = 10; - pub const R11: RegId = 11; - pub const R12: RegId = 12; - pub const R13: RegId = 13; - pub const R14: RegId = 14; - pub const R15: RegId = 15; - } - - pub const RAX: GPR = GPR::Rq(self::rq::RAX); - pub const RCX: GPR = GPR::Rq(self::rq::RCX); - pub const RDX: GPR = GPR::Rq(self::rq::RDX); - pub const RBX: GPR = GPR::Rq(self::rq::RBX); - pub const RSP: GPR = GPR::Rq(self::rq::RSP); - pub const RBP: GPR = GPR::Rq(self::rq::RBP); - pub const RSI: GPR = GPR::Rq(self::rq::RSI); - pub const RDI: GPR = GPR::Rq(self::rq::RDI); - pub const R8: GPR = GPR::Rq(self::rq::R8); - pub const R9: GPR = GPR::Rq(self::rq::R9); - pub const R10: GPR = GPR::Rq(self::rq::R10); - pub const R11: GPR = GPR::Rq(self::rq::R11); - pub const R12: GPR = GPR::Rq(self::rq::R12); - pub const R13: GPR = GPR::Rq(self::rq::R13); - pub const R14: GPR = GPR::Rq(self::rq::R14); - pub const R15: GPR = GPR::Rq(self::rq::R15); - - pub const XMM0: GPR = GPR::Rx(0); - pub const XMM1: GPR = GPR::Rx(1); - pub const XMM2: GPR = GPR::Rx(2); - pub const XMM3: GPR = GPR::Rx(3); - pub const XMM4: GPR = GPR::Rx(4); - pub const XMM5: GPR = GPR::Rx(5); - pub const XMM6: GPR = GPR::Rx(6); - pub const XMM7: GPR = GPR::Rx(7); - pub const XMM8: GPR = GPR::Rx(8); - pub const XMM9: GPR = GPR::Rx(9); - pub const XMM10: GPR = GPR::Rx(10); - pub const XMM11: GPR = GPR::Rx(11); - pub const XMM12: GPR = GPR::Rx(12); - pub const XMM13: GPR = GPR::Rx(13); - pub const XMM14: GPR = GPR::Rx(14); - pub const XMM15: GPR = GPR::Rx(15); - - pub const NUM_GPRS: u8 = 16; -} - -const SIGN_MASK_F64: u64 = 0x8000_0000_0000_0000; -const REST_MASK_F64: u64 = !SIGN_MASK_F64; -const SIGN_MASK_F32: u32 = 0x8000_0000; -const REST_MASK_F32: u32 = !SIGN_MASK_F32; - -impl GPRs { - fn take(&mut self) -> Option { - let lz = self.bits.trailing_zeros(); - if lz < 16 { - let gpr = lz as RegId; - self.mark_used(gpr); - Some(gpr) - } else { - None - } - } - - fn mark_used(&mut self, gpr: RegId) { - self.bits &= !(1 << gpr as u16); - } - - fn release(&mut self, gpr: RegId) { - debug_assert!( - !self.is_free(gpr), - "released register {} was already free", - gpr - ); - self.bits |= 1 << gpr; - } - - fn is_free(self, gpr: RegId) -> bool { - (self.bits & (1 << gpr)) != 0 - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Registers { - /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example) - scratch_64: (GPRs, [u8; NUM_GPRS as usize]), - /// Registers at 128 bits (xmm0, for example) - scratch_128: (GPRs, [u8; NUM_GPRS as usize]), -} - -impl Default for Registers { - fn default() -> Self { - Self::new() - } -} - -impl Registers { - pub fn new() -> Self { - Self { - scratch_64: (GPRs::new(), [1; NUM_GPRS as _]), - scratch_128: (GPRs::new(), [1; NUM_GPRS as _]), - } - } - - pub fn release_scratch_register(&mut self) -> Result<(), Error> { - // Give ourselves a few scratch registers to work with, for now. - for &scratch in SCRATCH_REGS { - self.release(scratch)?; - } - Ok(()) - } - - fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) { - match gpr { - GPR::Rq(r) => (r, &mut self.scratch_64), - GPR::Rx(r) => (r, &mut self.scratch_128), - } - } - - fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) { - match gpr { - GPR::Rq(r) => (r, &self.scratch_64), - GPR::Rx(r) => (r, &self.scratch_128), - } - } - - pub fn mark_used(&mut self, gpr: GPR) { - let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); - scratch_counts.0.mark_used(gpr); - scratch_counts.1[gpr as usize] += 1; - } - - pub fn num_usages(&self, gpr: GPR) -> u8 { - let (gpr, scratch_counts) = self.scratch_counts(gpr); - scratch_counts.1[gpr as usize] - } - - pub fn take(&mut self, ty: impl Into) -> Option { - let (mk_gpr, scratch_counts) = match ty.into() { - GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64), - GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128), - }; - - let out = scratch_counts.0.take()?; - scratch_counts.1[out as usize] += 1; - Some(mk_gpr(out)) - } - - pub fn release(&mut self, gpr: GPR) -> Result<(), Error> { - let (gpr, scratch_counts) = self.scratch_counts_mut(gpr); - let c = &mut scratch_counts.1[gpr as usize]; - *c = match c.checked_sub(1) { - Some(e) => e, - None => return Err(Error::Microwasm(format!("Double-freed register: {}", gpr))), - }; - if *c == 0 { - scratch_counts.0.release(gpr); - } - Ok(()) - } - - pub fn is_free(&self, gpr: GPR) -> bool { - let (gpr, scratch_counts) = self.scratch_counts(gpr); - scratch_counts.0.is_free(gpr) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct BlockCallingConvention> { - pub stack_depth: StackDepth, - pub arguments: I, -} - -impl BlockCallingConvention { - pub fn function_start(arguments: I) -> Self { - BlockCallingConvention { - // We start and return the function with stack depth 1 since we must - // allow space for the saved return address. - stack_depth: StackDepth(1), - arguments, - } - } -} - -impl BlockCallingConvention -where - for<'a> &'a I::Target: IntoIterator, -{ - pub fn as_ref(&self) -> BlockCallingConvention + '_> { - BlockCallingConvention { - // We start and return the function with stack depth 1 since we must - // allow space for the saved return address. - stack_depth: self.stack_depth.clone(), - arguments: self.arguments.into_iter().copied(), - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum FunctionDefLocation { - SameModule, - PossiblyExternal, -} - -// TODO: Combine this with `ValueLocation`? -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum CCLoc { - /// Value exists in a register. - Reg(GPR), - /// Value exists on the stack. - Stack(i32), -} - -impl CCLoc { - fn try_from(other: ValueLocation) -> Option { - match other { - ValueLocation::Reg(reg) => Some(CCLoc::Reg(reg)), - ValueLocation::Stack(offset) => Some(CCLoc::Stack(offset)), - ValueLocation::Cond(_) | ValueLocation::Immediate(_) => None, - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum CondCode { - CF0, - CF1, - ZF0, - ZF1, - CF0AndZF0, - CF1OrZF1, - ZF0AndSFEqOF, - ZF1OrSFNeOF, - SFEqOF, - SFNeOF, -} - -mod cc { - use super::CondCode; - - pub const EQUAL: CondCode = CondCode::ZF0; - pub const NOT_EQUAL: CondCode = CondCode::ZF1; - pub const GE_U: CondCode = CondCode::CF0; - pub const LT_U: CondCode = CondCode::CF1; - pub const GT_U: CondCode = CondCode::CF0AndZF0; - pub const LE_U: CondCode = CondCode::CF1OrZF1; - pub const GE_S: CondCode = CondCode::SFEqOF; - pub const LT_S: CondCode = CondCode::SFNeOF; - pub const GT_S: CondCode = CondCode::ZF0AndSFEqOF; - pub const LE_S: CondCode = CondCode::ZF1OrSFNeOF; -} - -impl std::ops::Not for CondCode { - type Output = Self; - - fn not(self) -> Self { - use CondCode::*; - - match self { - CF0 => CF1, - CF1 => CF0, - ZF0 => ZF1, - ZF1 => ZF0, - CF0AndZF0 => CF1OrZF1, - CF1OrZF1 => CF0AndZF0, - ZF0AndSFEqOF => ZF1OrSFNeOF, - ZF1OrSFNeOF => ZF0AndSFEqOF, - SFEqOF => SFNeOF, - SFNeOF => SFEqOF, - } - } -} - -/// Describes location of a value. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum ValueLocation { - /// Value exists in a register. - Reg(GPR), - /// Value exists on the stack. Note that this offset is from the rsp as it - /// was when we entered the function. - Stack(i32), - /// Value is a literal - Immediate(Value), - /// Value is a set condition code - Cond(CondCode), -} - -impl From for ValueLocation { - fn from(other: CCLoc) -> Self { - match other { - CCLoc::Reg(r) => ValueLocation::Reg(r), - CCLoc::Stack(o) => ValueLocation::Stack(o), - } - } -} - -impl ValueLocation { - fn stack(self) -> Option { - match self { - ValueLocation::Stack(o) => Some(o), - _ => None, - } - } - - fn reg(self) -> Option { - match self { - ValueLocation::Reg(r) => Some(r), - _ => None, - } - } - - fn immediate(self) -> Option { - match self { - ValueLocation::Immediate(i) => Some(i), - _ => None, - } - } - - fn imm_i32(self) -> Option { - self.immediate().and_then(Value::as_i32) - } - - fn imm_i64(self) -> Option { - self.immediate().and_then(Value::as_i64) - } - - fn imm_f32(self) -> Option { - self.immediate().and_then(Value::as_f32) - } - - fn imm_f64(self) -> Option { - self.immediate().and_then(Value::as_f64) - } -} - -// TODO: This assumes only system-v calling convention. -// In system-v calling convention the first 6 arguments are passed via registers. -// All rest arguments are passed on the stack. -// Usually system-v uses rdi and rsi, but rdi is used for the vmctx and rsi is used for the _caller_ vmctx -const INTEGER_ARGS_IN_GPRS: &[GPR] = &[GPR::Rq(CALLER_VMCTX), RDX, RCX, R8, R9]; -const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX]; -const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]; -const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1]; -// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI -const SCRATCH_REGS: &[GPR] = &[ - RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, - XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, -]; -const VMCTX: RegId = rq::RDI; -const CALLER_VMCTX: RegId = rq::RSI; - -pub struct CodeGenSession<'module, M> { - assembler: Assembler, - pub module_context: &'module M, - pub op_offset_map: Vec<(AssemblyOffset, Box)>, - func_starts: Vec<(Option, DynamicLabel)>, - pointer_type: SignlessType, -} - -impl<'module, M> CodeGenSession<'module, M> { - pub fn new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self { - let mut assembler = Assembler::new().unwrap(); - let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label())) - .take(func_count as usize) - .collect::>(); - - CodeGenSession { - assembler, - op_offset_map: Default::default(), - func_starts, - module_context, - pointer_type, - } - } - - pub fn offset(&self) -> usize { - self.assembler.offset().0 - } - - pub fn pointer_type(&self) -> SignlessType { - self.pointer_type - } - - pub fn new_context<'this>( - &'this mut self, - func_idx: u32, - reloc_sink: &'this mut dyn binemit::RelocSink, - ) -> Context<'this, M> { - { - let func_start = &mut self.func_starts[func_idx as usize]; - - // At this point we know the exact start address of this function. Save it - // and define dynamic label at this location. - func_start.0 = Some(self.assembler.offset()); - self.assembler.dynamic_label(func_start.1); - } - - Context { - asm: &mut self.assembler, - current_function: func_idx, - reloc_sink, - pointer_type: self.pointer_type, - source_loc: Default::default(), - func_starts: &self.func_starts, - block_state: Default::default(), - module_context: self.module_context, - labels: Default::default(), - } - } - - pub fn into_translated_code_section(self) -> Result { - let exec_buf = self - .assembler - .finalize() - .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?; - let func_starts = self - .func_starts - .iter() - .map(|(offset, _)| offset.unwrap()) - .collect::>(); - Ok(TranslatedCodeSection { - exec_buf, - func_starts, - op_offset_map: self.op_offset_map, - // TODO - relocatable_accesses: vec![], - }) - } -} - -#[derive(Debug)] -struct RelocateAddress { - reg: Option, - imm: usize, -} - -#[derive(Debug)] -struct RelocateAccess { - position: AssemblyOffset, - dst_reg: GPR, - address: RelocateAddress, -} - -pub struct TranslatedCodeSection { - exec_buf: ExecutableBuffer, - func_starts: Vec, - #[allow(dead_code)] - relocatable_accesses: Vec, - op_offset_map: Vec<(AssemblyOffset, Box)>, -} - -impl TranslatedCodeSection { - pub fn func_start(&self, idx: usize) -> *const u8 { - let offset = self.func_starts[idx]; - self.exec_buf.ptr(offset) - } - - pub fn func_range(&self, idx: usize) -> std::ops::Range { - let end = self - .func_starts - .get(idx + 1) - .map(|i| i.0) - .unwrap_or_else(|| self.exec_buf.len()); - - self.func_starts[idx].0..end - } - - pub fn funcs<'a>(&'a self) -> impl Iterator> + 'a { - (0..self.func_starts.len()).map(move |i| self.func_range(i)) - } - - pub fn buffer(&self) -> &[u8] { - &*self.exec_buf - } - - pub fn disassemble(&self) { - crate::disassemble::disassemble(&*self.exec_buf, &self.op_offset_map).unwrap(); - } -} - -#[derive(Debug, Default, Clone)] -pub struct BlockState { - pub stack: Stack, - pub depth: StackDepth, - pub regs: Registers, -} - -type Stack = Vec; - -mod labels { - use super::Label; - use std::collections::HashMap; - - pub struct LabelInfo { - pub label: Label, - pub align: u32, - pub inner: LabelValue, - } - - #[derive(Copy, Clone, PartialEq, Eq, Hash)] - pub enum LabelValue { - Ret, - I32(i32), - I64(i64), - } - - #[derive(Default)] - pub struct Labels { - map: HashMap, - } - - impl Labels { - pub fn drain(&mut self) -> impl Iterator + '_ { - self.map.drain().map(|(_, info)| info) - } - - pub fn insert( - &mut self, - l: impl FnOnce() -> Label, - align: u32, - label: LabelValue, - ) -> Label { - let val = self.map.entry(label).or_insert_with(move || LabelInfo { - label: l(), - align, - inner: label, - }); - - val.align = val.align.max(align); - - val.label - } - } -} - -use labels::{LabelInfo, LabelValue, Labels}; - -pub struct Context<'this, M> { - pub asm: &'this mut Assembler, - pointer_type: SignlessType, - #[allow(dead_code)] - source_loc: SourceLoc, - reloc_sink: &'this mut dyn binemit::RelocSink, - module_context: &'this M, - current_function: u32, - func_starts: &'this Vec<(Option, DynamicLabel)>, - /// Each push and pop on the value stack increments or decrements this value by 1 respectively. - pub block_state: BlockState, - labels: Labels, -} - -/// Label in code. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub struct Label(DynamicLabel); - -/// Offset from starting value of SP counted in words. -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub struct StackDepth(u32); - -impl StackDepth { - pub fn reserve(&mut self, slots: u32) { - self.0 = self.0.checked_add(slots).unwrap(); - } - - pub fn free(&mut self, slots: u32) { - self.0 = self.0.checked_sub(slots).unwrap(); - } -} - -macro_rules! int_div { - ($full_div_s:ident, $full_div_u:ident, $div_u:ident, $div_s:ident, $rem_u:ident, $rem_s:ident, $imm_fn:ident, $signed_ty:ty, $unsigned_ty:ty, $reg_ty:tt, $pointer_ty:tt) => { - // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when - // emitting Wasm. - pub fn $div_u(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - <$unsigned_ty>::wrapping_div(dividend as _, divisor as _).into(), - ))?; - } - - return Ok(()) - } - - let (div, rem, saved) = self.$full_div_u(divisor, dividend)?; - - self.free_value(rem)?; - - let div = match div { - ValueLocation::Reg(div) => { - if saved.clone().any(|dst| dst == div) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap()) - ); - self.block_state.regs.release(div)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(div) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => div, - }; - - self.cleanup_gprs(saved); - - self.push(div)?; - Ok(()) - } - - // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when - // emitting Wasm. - pub fn $div_s(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - <$signed_ty>::wrapping_div(dividend, divisor).into(), - ))?; - } - - return Ok(()) - } - - let (div, rem, saved) = self.$full_div_s(divisor, dividend)?; - - self.free_value(rem)?; - - let div = match div { - ValueLocation::Reg(div) => { - if saved.clone().any(|dst| dst == div) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap()) - ); - self.block_state.regs.release(div)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(div) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => div, - }; - - self.cleanup_gprs(saved); - - self.push(div)?; - Ok(()) - } - - pub fn $rem_u(&mut self) -> Result<(), Error>{ - let divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?; - } else { - self.push(ValueLocation::Immediate( - (dividend as $unsigned_ty % divisor as $unsigned_ty).into(), - ))?; - } - return Ok(()); - } - - let (div, rem, saved) = self.$full_div_u(divisor, dividend)?; - - self.free_value(div)?; - - let rem = match rem { - ValueLocation::Reg(rem) => { - if saved.clone().any(|dst| dst == rem) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap()) - ); - self.block_state.regs.release(rem)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(rem) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => rem, - }; - - self.cleanup_gprs(saved); - - self.push(rem)?; - Ok(()) - } - - pub fn $rem_s(&mut self) -> Result<(), Error>{ - let mut divisor = self.pop()?; - let dividend = self.pop()?; - - if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) { - if divisor == 0 { - self.trap(TrapCode::IntegerDivisionByZero); - self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?; - } else { - self.push(ValueLocation::Immediate((dividend % divisor).into()))?; - } - return Ok(()); - } - - let is_neg1 = self.create_label(); - - let current_depth = self.block_state.depth.clone(); - - // TODO: This could cause segfaults because of implicit push/pop - let gen_neg1_case = match divisor { - ValueLocation::Immediate(_) => { - if divisor.$imm_fn().unwrap() == -1 { - self.push(ValueLocation::Immediate((-1 as $signed_ty).into()))?; - self.free_value(dividend)?; - return Ok(()); - } - - false - } - ValueLocation::Reg(_) => { - let reg = self.put_into_register(GPRType::Rq, &mut divisor)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp $reg_ty(reg.rq().unwrap()), -1 - ); - // TODO: We could choose `current_depth` as the depth here instead but we currently - // don't for simplicity - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; je =>is_neg1.0 - ); - - true - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp $pointer_ty [rsp + offset], -1 - ); - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; je =>is_neg1.0 - ); - - true - } - ValueLocation::Cond(_) => { - // `cc` can never be `-1`, only `0` and `1` - false - } - }; - - let (div, rem, saved) = self.$full_div_s(divisor, dividend)?; - - self.free_value(div)?; - - let rem = match rem { - ValueLocation::Reg(rem) => { - if saved.clone().any(|dst| dst == rem) { - let new = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap()) - ); - self.block_state.regs.release(rem)?; - ValueLocation::Reg(new) - } else { - ValueLocation::Reg(rem) - } - } - ValueLocation::Stack(_) | - ValueLocation::Cond(_) | - ValueLocation::Immediate(_) => rem, - }; - - self.cleanup_gprs(saved); - - if gen_neg1_case { - let ret = self.create_label(); - self.set_stack_depth(current_depth.clone())?; - dynasm!(self.asm - ; jmp =>ret.0 - ); - self.define_label(is_neg1); - - let dst_ccloc = match CCLoc::try_from(rem) { - None => { - return Err(Error::Microwasm( - "$rem_s Programmer error".to_string(), - )) - } - Some(o) => o, - }; - - self.copy_value( - ValueLocation::Immediate((0 as $signed_ty).into()), - dst_ccloc - )?; - - self.set_stack_depth(current_depth.clone())?; - self.define_label(ret); - } - - self.push(rem)?; - Ok(()) - } - } -} - -macro_rules! unop { - ($name:ident, $instr:ident, $reg_ty:tt, $typ:ty, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => - ValueLocation::Immediate( - ($const_fallback(imm.as_int().unwrap() as $typ) as $typ).into() - ), - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(Type::for_::<$typ>()).unwrap(); - dynasm!(self.asm - ; $instr $reg_ty(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = self.put_into_register(GPRType::Rq, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let temp = self.take_reg(Type::for_::<$typ>()).unwrap(); - dynasm!(self.asm - ; $instr $reg_ty(temp.rq().unwrap()), $reg_ty(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - } -} - -macro_rules! conversion { - ( - $name:ident, - $instr:ident, - $in_reg_ty:tt, - $in_reg_fn:ident, - $out_reg_ty:tt, - $out_reg_fn:ident, - $in_typ:ty, - $out_typ:ty, - $const_ty_fn:ident, - $const_fallback:expr - ) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => - ValueLocation::Immediate( - $const_fallback(imm.$const_ty_fn().unwrap()).into() - ), - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap(); - dynasm!(self.asm - ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset] - ); - - ValueLocation::Reg(temp) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = self.put_into_register(Type::for_::<$in_typ>(), &mut val)? .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap(); - - dynasm!(self.asm - ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - } -} - -// TODO: Support immediate `count` parameters -macro_rules! shift { - ($name:ident, $reg_ty:tt, $instr:ident, $const_fallback:expr, $ty:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut count = self.pop()?; - let mut val = self.pop()?; - - if let Some(imm) = count.immediate() { - if let Some(imm) = imm.as_int() { - if let Ok(imm) = i8::try_from(imm) { - let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(reg.rq().unwrap()), imm - ); - self.push(ValueLocation::Reg(reg))?; - return Ok(()); - } - } - } - - if val == ValueLocation::Reg(RCX) { - let new = self.take_reg($ty).unwrap(); - self.copy_value(val, CCLoc::Reg(new))?; - self.free_value(val)?; - val = ValueLocation::Reg(new); - } - - // TODO: Maybe allocate `RCX`, write `count` to it and then free `count`. - // Once we've implemented refcounting this will do the right thing - // for free. - let temp_rcx = match count { - ValueLocation::Reg(RCX) => {None} - other => { - let out = if self.block_state.regs.is_free(RCX) { - None - } else { - let new_reg = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; mov Rq(new_reg.rq().unwrap()), rcx - ); - Some(new_reg) - }; - - match other { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let gpr = self.put_into_register(I32, &mut count)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - dynasm!(self.asm - ; mov cl, Rb(gpr.rq().unwrap()) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; mov cl, [rsp + offset] - ); - } - ValueLocation::Immediate(imm) => { - dynasm!(self.asm - ; mov cl, imm.as_int().unwrap() as i8 - ); - } - } - - out - } - }; - - self.free_value(count)?; - self.block_state.regs.mark_used(RCX); - count = ValueLocation::Reg(RCX); - - let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(reg.rq().unwrap()), cl - ); - - self.free_value(count)?; - - if let Some(gpr) = temp_rcx { - dynasm!(self.asm - ; mov rcx, Rq(gpr.rq().unwrap()) - ); - self.block_state.regs.release(gpr)?; - } - - self.push(val)?; - Ok(()) - } - } -} - -macro_rules! cmp_i32 { - ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let Some(i) = left.imm_i32() { - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; cmp DWORD [rsp + offset], i - ); - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - dynasm!(self.asm - ; cmp Rd(rreg.rq().unwrap()), i - ); - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Immediate(right) => { - ValueLocation::Immediate( - (if $const_fallback(i, right.as_i32().unwrap()) { - 1i32 - } else { - 0i32 - }).into() - ) - } - } - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap()) - ); - } - ValueLocation::Immediate(i) => { - dynasm!(self.asm - ; cmp Rd(lreg.rq().unwrap()), i.as_i32().unwrap() - ); - } - } - - ValueLocation::Cond($flags) - }; - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - } -} - -macro_rules! cmp_i64 { - ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let Some(i) = left.imm_i64() { - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp QWORD [rsp + offset], i - ); - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp QWORD [rsp + offset], Rq(lreg.rq().unwrap()) - ); - } - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp Rq(rreg.rq().unwrap()), i - ); - } else { - let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(rreg.rq().unwrap()), Rq(lreg.rq().unwrap()) - ); - } - ValueLocation::Cond($reverse_flags) - } - ValueLocation::Immediate(right) => { - ValueLocation::Immediate( - (if $const_fallback(i, right.as_i64().unwrap()) { - 1i32 - } else { - 0i32 - }).into() - ) - } - } - } else { - let lreg = self.put_into_register(I64, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - } - ValueLocation::Immediate(i) => { - let i = i.as_i64().unwrap(); - if let Some(i) = i.try_into().ok() { - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), i - ); - } else { - let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - } - } - } - - ValueLocation::Cond($flags) - }; - - self.free_value(left)?; - self.free_value(right)?; - self.push(out)?; - Ok(()) - } - } -} - -macro_rules! cmp_f32 { - ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - cmp_float!( - comiss, - f32, - imm_f32, - $name, - $reverse_name, - $instr, - $const_fallback - ); - }; -} - -macro_rules! eq_float { - ($name:ident, $instr:ident, $imm_fn:ident, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - if $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()) { - 1u32 - } else { - 0 - }.into() - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right), - _ => (right, left) - }; - - let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; movd Rd(out.rq().unwrap()), Rx(lreg.rx().unwrap()) - ; and Rd(out.rq().unwrap()), 1 - ); - - self.push(ValueLocation::Reg(out))?; - self.free_value(left)?; - self.free_value(right)?; - Ok(()) - } - - } -} - -macro_rules! minmax_float { - ( - $name:ident, - $instr:ident, - $cmpinstr:ident, - $addinstr:ident, - $combineinstr:ident, - $imm_fn:ident, - $const_fallback:expr - ) => { - pub fn $name(&mut self) -> Result<(), Error>{ - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()).into() - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right), - _ => (right, left) - }; - - let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $cmpinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; je >equal - ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; jmp >ret - ; equal: - ; jnp >equal_but_not_parity - ; $addinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; jmp >ret - ; equal_but_not_parity: - ; $combineinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; ret: - ); - - self.push(left)?; - self.free_value(right)?; - Ok(()) - } - - } -} - -macro_rules! cmp_f64 { - ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - cmp_float!( - comisd, - f64, - imm_f64, - $name, - $reverse_name, - $instr, - $const_fallback - ); - }; -} - -macro_rules! cmp_float { - (@helper $cmp_instr:ident, $ty:ty, $imm_fn:ident, $self:expr, $left:expr, $right:expr, $instr:ident, $const_fallback:expr) => {{ - let (left, right, this) = ($left, $right, $self); - if let (Some(left), Some(right)) = (left.$imm_fn(), right.$imm_fn()) { - if $const_fallback(<$ty>::from_bits(left.to_bits()), <$ty>::from_bits(right.to_bits())) { - ValueLocation::Immediate(1i32.into()) - } else { - ValueLocation::Immediate(0i32.into()) - } - } else { - let lreg = this.put_into_register(GPRType::Rx, left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let result = this.take_reg(I32).unwrap(); - - match right { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - - dynasm!(this.asm - ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap()) - ; $cmp_instr Rx(lreg.rx().unwrap()), [rsp + offset] - ; $instr Rb(result.rq().unwrap()) - ); - } - right => { - let rreg = this.put_into_register(GPRType::Rx, right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(this.asm - ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap()) - ; $cmp_instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ; $instr Rb(result.rq().unwrap()) - ); - } - } - - ValueLocation::Reg(result) - } - }}; - ($cmp_instr:ident, $ty:ty, $imm_fn:ident, $name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = cmp_float!(@helper - $cmp_instr, - $ty, - $imm_fn, - &mut *self, - &mut left, - &mut right, - $instr, - $const_fallback - ); - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - - pub fn $reverse_name(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = cmp_float!(@helper - $cmp_instr, - $ty, - $imm_fn, - &mut *self, - &mut right, - &mut left, - $instr, - $const_fallback - ); - - self.free_value(left)?; - self.free_value(right)?; - - self.push(out)?; - Ok(()) - } - }; -} - -macro_rules! binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - $const_fallback, - Rd, - rq, - I32, - imm_i32, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rd(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! commutative_binop_i32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - $const_fallback, - Rd, - rq, - I32, - imm_i32, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rd(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - $const_fallback, - Rq, - rq, - I64, - imm_i64, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rq(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! commutative_binop_i64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - $const_fallback, - Rq, - rq, - I64, - imm_i64, - |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm - ; $instr Rq(op1.rq().unwrap()), i - ) - ); - }; -} - -macro_rules! binop_f32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F32, - imm_f32, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! commutative_binop_f32 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F32, - imm_f32, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! binop_f64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - binop!( - $name, - $instr, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F64, - imm_f64, - |_, _, _: i32| unreachable!() - ); - }; -} - -macro_rules! commutative_binop_f64 { - ($name:ident, $instr:ident, $const_fallback:expr) => { - commutative_binop!( - $name, - $instr, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits() - ), - Rx, - rx, - F64, - imm_f64, - |_, _, _: i32| unreachable!() - ); - }; -} -macro_rules! commutative_binop { - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => { - binop!( - $name, - $instr, - $const_fallback, - $reg_ty, - $reg_fn, - $ty, - $imm_fn, - $direct_imm, - |op1: ValueLocation, op0: ValueLocation| match op1 { - ValueLocation::Reg(_) => (op1, op0), - _ => { - if op0.immediate().is_some() { - (op1, op0) - } else { - (op0, op1) - } - } - } - ); - }; -} - -macro_rules! binop { - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => { - binop!($name, $instr, $const_fallback, $reg_ty, $reg_fn, $ty, $imm_fn, $direct_imm, |a, b| (a, b)); - }; - ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr, $map_op:expr) => { - pub fn $name(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(i1) = left.$imm_fn() { - if let Some(i0) = right.$imm_fn() { - self.block_state.stack.push(ValueLocation::Immediate($const_fallback(i1, i0).into())); - return Ok(()); - } - } - - let (mut left, mut right) = $map_op(left, right); - let lreg = self.put_into_temp_register($ty, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - // This handles the case where we (for example) have a float in an `Rq` reg - let right_reg = self.put_into_register($ty, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(right_reg.$reg_fn().unwrap()) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), [rsp + offset] - ); - } - ValueLocation::Immediate(i) => { - if let Some(i) = i.as_int().and_then(|i| i.try_into().ok()) { - $direct_imm(&mut *self, lreg, i); - } else { - let scratch = self.take_reg($ty).unwrap(); - self.immediate_to_reg(scratch, i)?; - - dynasm!(self.asm - ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(scratch.$reg_fn().unwrap()) - ); - - self.block_state.regs.release(scratch)?; - } - } - } - - self.free_value(right)?; - self.push(left)?; - Ok(()) - } - } -} - -macro_rules! load { - (@inner $name:ident, $rtype:expr, $reg_ty:tt, $emit_fn:expr) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error> { - fn load_to_reg<_M: ModuleContext>( - ctx: &mut Context<_M>, - dst: GPR, - (offset, runtime_offset): (i32, Result) - ) -> Result<(), Error> { - let mem_index = 0; - let reg_offset = ctx.module_context - .defined_memory_index(mem_index) - .map(|index| ( - None, - ctx.module_context.vmctx_vmmemory_definition(index) as i32 - )); - let (reg, mem_offset) = reg_offset.unwrap_or_else(|| { - let reg = ctx.take_reg(I64).unwrap(); - - dynasm!(ctx.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let vmctx = GPR::Rq(VMCTX); - - if ctx.module_context.emit_memory_bounds_check() { - let addr_reg = match runtime_offset { - Ok(imm) => { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64 - ); - addr_reg - } - Err(gpr) => { - if offset == 0 { - ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } else if offset > 0 { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset] - ); - addr_reg - } else { - let addr_reg = ctx.take_reg(I64).unwrap(); - let offset_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rd(offset_reg.rq().unwrap()), offset - ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap()) - ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap()) - ); - ctx.block_state.regs.release(offset_reg)?; - addr_reg - } - } - }; - dynasm!(ctx.asm - ; cmp Rq(addr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_current_length() as i32 - ] - ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds) - ); - ctx.block_state.regs.release(addr_reg)?; - } - - let mem_ptr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(mem_ptr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_base() as i32 - ] - ); - if let Some(reg) = reg { - ctx.block_state.regs.release(reg)?; - } - $emit_fn(ctx, dst, mem_ptr_reg, runtime_offset, offset)?; - ctx.block_state.regs.release(mem_ptr_reg)?; - Ok(()) - } - - let base = self.pop()?; - - let temp = self.take_reg($rtype).unwrap(); - - match base { - ValueLocation::Immediate(i) => { - load_to_reg(self, temp, (offset as _, Ok(i.as_i32().unwrap())))?; - } - mut base => { - let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - load_to_reg(self, temp, (offset as _, Err(gpr)))?; - self.free_value(base)?; - } - } - - self.push(ValueLocation::Reg(temp))?; - Ok(()) - } - }; - ($name:ident, $rtype:expr, $reg_ty:tt, NONE, $rq_instr:ident, $ty:ident) => { - load!(@inner - $name, - $rtype, - $reg_ty, - |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| -> Result<(), Error> { - match runtime_offset { - Ok(imm) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] - ); - Ok(()) - } - Err(offset_reg) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - } - } - ); - }; - ($name:ident, $rtype:expr, $reg_ty:tt, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => { - load!(@inner - $name, - $rtype, - $reg_ty, - |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32| -> Result<(), Error> { - match (dst, runtime_offset) { - (GPR::Rq(r), Ok(imm)) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm] - ); - Ok(()) - } - (GPR::Rx(r), Ok(imm)) => { - if let Some(combined) = offset.checked_add(imm) { - dynasm!(ctx.asm - ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + combined] - ); - Ok(()) - } else { - let offset_reg = ctx.take_reg(GPRType::Rq).unwrap(); - dynasm!(ctx.asm - ; mov Rq(offset_reg.rq().unwrap()), offset - ; $xmm_instr Rx(r), $ty [ - Rq(mem_ptr_reg.rq().unwrap()) + - Rq(offset_reg.rq().unwrap()) + - imm - ] - ); - ctx.block_state.regs.release(offset_reg)?; - Ok(()) - } - } - (GPR::Rq(r), Err(offset_reg)) => { - dynasm!(ctx.asm - ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - (GPR::Rx(r), Err(offset_reg)) => { - dynasm!(ctx.asm - ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset] - ); - Ok(()) - } - } - } - ); - }; -} - -macro_rules! store { - (@inner $name:ident, $int_reg_ty:tt, $match_offset:expr, $size:ident) => { - pub fn $name(&mut self, offset: u32) -> Result<(), Error>{ - fn store_from_reg<_M: ModuleContext>( - ctx: &mut Context<_M>, - src: GPR, - (offset, runtime_offset): (i32, Result) - ) -> Result<(), Error> { - let mem_index = 0; - let reg_offset = ctx.module_context - .defined_memory_index(mem_index) - .map(|index| ( - None, - ctx.module_context.vmctx_vmmemory_definition(index) as i32 - )); - let (reg, mem_offset) = reg_offset.unwrap_or_else(|| { - let reg = ctx.take_reg(I64).unwrap(); - - dynasm!(ctx.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let vmctx = GPR::Rq(VMCTX); - - if ctx.module_context.emit_memory_bounds_check() { - let addr_reg = match runtime_offset { - Ok(imm) => { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64 - ); - addr_reg - } - Err(gpr) => { - if offset == 0 { - ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - - } else if offset > 0 { - let addr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset] - ); - addr_reg - } else { - let addr_reg = ctx.take_reg(I64).unwrap(); - let offset_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rd(offset_reg.rq().unwrap()), offset - ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap()) - ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap()) - ); - ctx.block_state.regs.release(offset_reg)?; - addr_reg - } - } - }; - dynasm!(ctx.asm - ; cmp Rq(addr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_current_length() as i32 - ] - ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds) - ); - ctx.block_state.regs.release(addr_reg)?; - } - - let mem_ptr_reg = ctx.take_reg(I64).unwrap(); - dynasm!(ctx.asm - ; mov Rq(mem_ptr_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - mem_offset + - ctx.module_context.vmmemory_definition_base() as i32 - ] - ); - if let Some(reg) = reg { - ctx.block_state.regs.release(reg)?; - } - let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src)?; - ctx.block_state.regs.release(mem_ptr_reg)?; - ctx.block_state.regs.release(src)?; - Ok(()) - } - - if !(offset <= i32::max_value() as u32) { - return Err(Error::Microwasm(format!("store: offset value too big {}", offset))) - } - - let mut src = self.pop()?; - let base = self.pop()?; - - // `store_from_reg` frees `src` - // TODO: Would it be better to free it outside `store_from_reg`? - let src_reg = self.put_into_register(None, &mut src)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - - match base { - ValueLocation::Immediate(i) => { - store_from_reg(self, src_reg, (offset as i32, Ok(i.as_i32().unwrap())))? - } - mut base => { - let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - store_from_reg(self, src_reg, (offset as i32, Err(gpr)))?; - self.free_value(base)?; - } - } - Ok(()) - } - }; - ($name:ident, $int_reg_ty:tt, NONE, $size:ident) => { - store!(@inner - $name, - $int_reg_ty, - |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32, src| -> Result { - let src_reg = ctx.put_into_temp_register(GPRType::Rq, &mut ValueLocation::Reg(src))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - match runtime_offset { - Ok(imm) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(src_reg.rq().unwrap()) - ); - } - Err(offset_reg) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(src_reg.rq().unwrap()) - ); - } - } - - Ok(src_reg) - }, - $size - ); - }; - ($name:ident, $int_reg_ty:tt, $xmm_instr:ident, $size:ident) => { - store!(@inner - $name, - $int_reg_ty, - |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result, offset: i32, src| -> Result { - match (runtime_offset, src) { - (Ok(imm), GPR::Rq(r)) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(r) - ); - } - (Ok(imm), GPR::Rx(r)) => { - dynasm!(ctx.asm - ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], Rx(r) - ); - } - (Err(offset_reg), GPR::Rq(r)) => { - dynasm!(ctx.asm - ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(r) - ); - } - (Err(offset_reg), GPR::Rx(r)) => { - dynasm!(ctx.asm - ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], Rx(r) - ); - } - } - - Ok(src) - }, - $size - ); - }; -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct VirtualCallingConvention { - pub stack: Stack, - pub depth: StackDepth, -} - -impl<'this, M: ModuleContext> Context<'this, M> { - fn free_reg(&mut self, type_: GPRType) -> Result { - let pos = if let Some(pos) = self - .block_state - .stack - .iter() - .position(|r| r.reg().map(|reg| reg.type_() == type_).unwrap_or(false)) - { - pos - } else { - return Ok(false); - }; - - let old_loc = self.block_state.stack[pos]; - let new_loc = self.push_physical(old_loc)?; - self.block_state.stack[pos] = new_loc; - - let reg = old_loc.reg().unwrap(); - - for elem in &mut self.block_state.stack[pos + 1..] { - if *elem == old_loc { - *elem = new_loc; - self.block_state.regs.release(reg)?; - } - } - - Ok(true) - } - - fn take_reg(&mut self, r: impl Into) -> Option { - let r = r.into(); - loop { - if let Some(gpr) = self.block_state.regs.take(r) { - break Some(gpr); - } - - if self.free_reg(r) == Ok(false) { - break None; - } - } - } - - pub fn set_source_loc(&mut self, loc: SourceLoc) { - self.source_loc = loc; - } - - pub fn virtual_calling_convention(&self) -> VirtualCallingConvention { - VirtualCallingConvention { - stack: self.block_state.stack.clone(), - depth: self.block_state.depth.clone(), - } - } - - /// Create a new undefined label. - pub fn create_label(&mut self) -> Label { - Label(self.asm.new_dynamic_label()) - } - - fn adjusted_offset(&self, offset: i32) -> i32 { - (self.block_state.depth.0 as i32 + offset) * WORD_SIZE as i32 - } - - cmp_i32!(i32_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b); - cmp_i32!(i32_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b); - // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous - cmp_i32!(i32_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u32) < (b as u32)); - cmp_i32!(i32_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u32) - <= (b as u32)); - cmp_i32!(i32_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u32) > (b as u32)); - cmp_i32!(i32_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u32) - >= (b as u32)); - cmp_i32!(i32_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b); - cmp_i32!(i32_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b); - cmp_i32!(i32_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b); - cmp_i32!(i32_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b); - - cmp_i64!(i64_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b); - cmp_i64!(i64_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b); - // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous - cmp_i64!(i64_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u64) < (b as u64)); - cmp_i64!(i64_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u64) - <= (b as u64)); - cmp_i64!(i64_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u64) > (b as u64)); - cmp_i64!(i64_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u64) - >= (b as u64)); - cmp_i64!(i64_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b); - cmp_i64!(i64_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b); - cmp_i64!(i64_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b); - cmp_i64!(i64_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b); - - cmp_f32!(f32_gt, f32_lt, seta, |a, b| a > b); - cmp_f32!(f32_ge, f32_le, setnc, |a, b| a >= b); - eq_float!( - f32_eq, - cmpeqss, - as_f32, - |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) == f32::from_bits(b.to_bits()) - ); - eq_float!( - f32_ne, - cmpneqss, - as_f32, - |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) != f32::from_bits(b.to_bits()) - ); - - cmp_f64!(f64_gt, f64_lt, seta, |a, b| a > b); - cmp_f64!(f64_ge, f64_le, setnc, |a, b| a >= b); - eq_float!( - f64_eq, - cmpeqsd, - as_f64, - |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) == f64::from_bits(b.to_bits()) - ); - eq_float!( - f64_ne, - cmpneqsd, - as_f64, - |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) != f64::from_bits(b.to_bits()) - ); - - // TODO: Should we do this logic in `eq` and just have this delegate to `eq`? - // That would mean that `eqz` and `eq` with a const 0 argument don't - // result in different code. It would also allow us to generate better - // code for `neq` and `gt_u` with const 0 operand - pub fn i32_eqz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - if let ValueLocation::Immediate(Value::I32(i)) = val { - self.push(ValueLocation::Immediate( - (if i == 0 { 1i32 } else { 0 }).into(), - ))?; - return Ok(()); - } - - if let ValueLocation::Cond(loc) = val { - self.push(ValueLocation::Cond(!loc))?; - return Ok(()); - } - - let reg = self - .put_into_register(I32, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap()) - ; test Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; setz Rb(out.rq().unwrap()) - ); - - self.free_value(val)?; - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - pub fn i64_eqz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - if let ValueLocation::Immediate(Value::I64(i)) = val { - self.push(ValueLocation::Immediate( - (if i == 0 { 1i32 } else { 0 }).into(), - ))?; - return Ok(()); - } - - if let ValueLocation::Cond(loc) = val { - self.push(ValueLocation::Cond(!loc))?; - return Ok(()); - } - - let reg = self - .put_into_register(I64, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - let out = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap()) - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; setz Rb(out.rq().unwrap()) - ); - - self.free_value(val)?; - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - fn br_on_cond_code(&mut self, label: Label, cond: CondCode) { - match cond { - cc::EQUAL => dynasm!(self.asm - ; je =>label.0 - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; jne =>label.0 - ), - cc::GT_U => dynasm!(self.asm - ; ja =>label.0 - ), - cc::GE_U => dynasm!(self.asm - ; jae =>label.0 - ), - cc::LT_U => dynasm!(self.asm - ; jb =>label.0 - ), - cc::LE_U => dynasm!(self.asm - ; jbe =>label.0 - ), - cc::GT_S => dynasm!(self.asm - ; jg =>label.0 - ), - cc::GE_S => dynasm!(self.asm - ; jge =>label.0 - ), - cc::LT_S => dynasm!(self.asm - ; jl =>label.0 - ), - cc::LE_S => dynasm!(self.asm - ; jle =>label.0 - ), - } - } - - /// Pops i32 predicate and branches to the specified label - /// if the predicate is equal to zero. - pub fn br_if_false( - &mut self, - target: impl Into>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> { - let mut val = self.pop()?; - let label = self.target_to_label(target.into()); - - let cond = match val { - ValueLocation::Cond(cc) => !cc, - _ => { - let predicate = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap()) - ); - - CondCode::ZF0 - } - }; - - self.free_value(val)?; - - pass_args(self)?; - - self.br_on_cond_code(label, cond); - - Ok(()) - } - - /// Pops i32 predicate and branches to the specified label - /// if the predicate is not equal to zero. - pub fn br_if_true( - &mut self, - target: impl Into>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> { - let mut val = self.pop()?; - let label = self.target_to_label(target.into()); - - let cond = match val { - ValueLocation::Cond(cc) => cc, - _ => { - let predicate = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap()) - ); - - CondCode::ZF1 - } - }; - - self.free_value(val)?; - - pass_args(self)?; - - self.br_on_cond_code(label, cond); - - Ok(()) - } - - /// Branch unconditionally to the specified label. - pub fn br(&mut self, label: impl Into>) { - match label.into() { - BrTarget::Return => self.ret(), - BrTarget::Label(label) => dynasm!(self.asm - ; jmp =>label.0 - ), - } - } - - /// If `default` is `None` then the default is just continuing execution - pub fn br_table( - &mut self, - targets: I, - default: Option>, - pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, - ) -> Result<(), Error> - where - I: IntoIterator>>, - I::IntoIter: ExactSizeIterator + DoubleEndedIterator, - { - let mut targets = targets.into_iter(); - let count = targets.len(); - - let mut selector = self.pop()?; - - pass_args(self)?; - - if let Some(imm) = selector.imm_i32() { - if let Some(target) = targets.nth(imm as _).or(Some(default)).and_then(|a| a) { - match target { - BrTarget::Label(label) => self.br(label), - BrTarget::Return => { - dynasm!(self.asm - ; ret - ); - } - } - } - } else { - let end_label = self.create_label(); - - if count > 0 { - let temp = match self.put_into_temp_register(GPRType::Rq, &mut selector) { - Err(e) => return Err(e), - Ok(o) => match o { - Some(r) => Ok((r, false)), - None => { - self.push_physical(ValueLocation::Reg(RAX))?; - self.block_state.regs.mark_used(RAX); - Ok((RAX, true)) - } - }, - }; - - let (selector_reg, pop_selector) = match temp { - Err(e) => return Err(e), - Ok(a) => a, - }; - - let (tmp, pop_tmp) = if let Some(reg) = self.take_reg(I64) { - (reg, false) - } else { - let out_reg = if selector_reg == RAX { RCX } else { RAX }; - - self.push_physical(ValueLocation::Reg(out_reg))?; - self.block_state.regs.mark_used(out_reg); - - (out_reg, true) - }; - - self.immediate_to_reg(tmp, (count as u32).into())?; - dynasm!(self.asm - ; cmp Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ; cmova Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ; lea Rq(tmp.rq().unwrap()), [>start_label] - ; lea Rq(selector_reg.rq().unwrap()), [ - Rq(selector_reg.rq().unwrap()) * 5 - ] - ; add Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap()) - ); - - if pop_tmp { - dynasm!(self.asm - ; pop Rq(tmp.rq().unwrap()) - ); - } else { - self.block_state.regs.release(tmp)?; - } - - if pop_selector { - dynasm!(self.asm - ; pop Rq(selector_reg.rq().unwrap()) - ); - } - - dynasm!(self.asm - ; jmp Rq(selector_reg.rq().unwrap()) - ; start_label: - ); - - for target in targets { - let label = target - .map(|target| self.target_to_label(target)) - .unwrap_or(end_label); - dynasm!(self.asm - ; jmp =>label.0 - ); - } - } - - if let Some(def) = default { - self.br(def); - } - - self.define_label(end_label); - } - - self.free_value(selector)?; - Ok(()) - } - - fn set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error> { - if self.block_state.depth.0 != depth.0 { - let diff = depth.0 as i32 - self.block_state.depth.0 as i32; - let emit_lea = if diff.abs() != 1 { - true - } else { - match self.block_state.depth.0.cmp(&depth.0) { - Ordering::Less => { - for _ in 0..diff { - dynasm!(self.asm - ; push rax - ); - } - false - } - Ordering::Greater => { - if let Some(trash) = self.take_reg(I64) { - for _ in 0..self.block_state.depth.0 - depth.0 { - dynasm!(self.asm - ; pop Rq(trash.rq().unwrap()) - ); - } - self.block_state.regs.release(trash)?; - false - } else { - true - } - } - Ordering::Equal => false, - } - }; - if emit_lea { - dynasm!(self.asm - ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32] - ); - } - self.block_state.depth = depth; - } - Ok(()) - } - - fn do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> { - let args = &cc.arguments; - for &dst in args.iter().rev().take(self.block_state.stack.len()) { - if let CCLoc::Reg(r) = dst { - if !self.block_state.regs.is_free(r) - && *self.block_state.stack.last().unwrap() != ValueLocation::Reg(r) - { - // TODO: This would be made simpler and more efficient with a proper SSE - // representation. - self.save_regs(std::iter::once(r))?; - } - - self.block_state.regs.mark_used(r); - } - self.pop_into(dst)?; - } - Ok(()) - } - - pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> { - self.do_pass_block_args(cc)?; - self.set_stack_depth(cc.stack_depth.clone())?; - Ok(()) - } - - pub fn serialize_block_args( - &mut self, - cc: &BlockCallingConvention, - params: u32, - ) -> Result { - self.do_pass_block_args(cc)?; - - let mut out_args = cc.arguments.clone(); - - out_args.reverse(); - - while out_args.len() < params as usize { - let mut val = self.pop()?; - - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let ccloc = self.put_into_temp_location(None, &mut val)?; - out_args.push(ccloc); - } - - out_args.reverse(); - - self.set_stack_depth(cc.stack_depth.clone())?; - - Ok(BlockCallingConvention { - stack_depth: cc.stack_depth.clone(), - arguments: out_args, - }) - } - - /// Puts all stack values into "real" locations so that they can i.e. be set to different - /// values on different iterations of a loop - pub fn serialize_args(&mut self, count: u32) -> Result { - let mut out = Vec::with_capacity(count as _); - - // TODO: We can make this more efficient now that `pop` isn't so complicated - for _ in 0..count { - let mut val = self.pop()?; - // TODO: We can use stack slots for values already on the stack but we - // don't refcount stack slots right now - let loc = self.put_into_temp_location(None, &mut val)?; - - out.push(loc); - } - - out.reverse(); - - Ok(BlockCallingConvention { - stack_depth: self.block_state.depth.clone(), - arguments: out, - }) - } - - pub fn get_global(&mut self, global_idx: u32) -> Result<(), Error> { - let (reg, offset) = self - .module_context - .defined_global_index(global_idx) - .map(|defined_global_index| { - ( - None, - self.module_context - .vmctx_vmglobal_definition(defined_global_index), - ) - }) - .unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 - ] - ); - - (Some(reg), 0) - }); - - let out = self.take_reg(GPRType::Rq).unwrap(); - let vmctx = GPR::Rq(VMCTX); - - // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so - dynasm!(self.asm - ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32] - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - self.push(ValueLocation::Reg(out))?; - Ok(()) - } - - pub fn set_global(&mut self, global_idx: u32) -> Result<(), Error> { - let mut val = self.pop()?; - let (reg, offset) = self - .module_context - .defined_global_index(global_idx) - .map(|defined_global_index| { - ( - None, - self.module_context - .vmctx_vmglobal_definition(defined_global_index), - ) - }) - .unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context.vmctx_vmglobal_import_from(global_idx) as i32 - ] - ); - - (Some(reg), 0) - }); - - let val_reg = self - .put_into_register(GPRType::Rq, &mut val)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let vmctx = GPR::Rq(VMCTX); - - // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits - dynasm!(self.asm - ; mov [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32 - ], Rq(val_reg.rq().unwrap()) - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - self.free_value(val)?; - Ok(()) - } - - fn immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error> { - match reg { - GPR::Rq(r) => { - let val = val.as_bytes(); - if (val as u64) <= u32::max_value() as u64 { - dynasm!(self.asm - ; mov Rd(r), val as i32 - ); - } else { - dynasm!(self.asm - ; mov Rq(r), QWORD val - ); - } - } - reg @ GPR::Rx(_) => { - let tmp = self - .take_reg(GPRType::Rq) - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - self.immediate_to_reg(tmp, val)?; - let tmp = ValueLocation::Reg(tmp); - self.copy_value(tmp, CCLoc::Reg(reg))?; - self.free_value(tmp)?; - } - } - - Ok(()) - } - - // The `&` and `&mut` aren't necessary (`ValueLocation` is copy) but it ensures that we don't get - // the arguments the wrong way around. In the future we want to have a `ReadLocation` and `WriteLocation` - // so we statically can't write to a literal so this will become a non-issue. - fn copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error> { - match (src, dst) { - (ValueLocation::Cond(cond), CCLoc::Stack(o)) => { - let offset = self.adjusted_offset(o); - - self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?; - - match cond { - cc::EQUAL => dynasm!(self.asm - ; sete [rsp + offset] - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; setne [rsp + offset] - ), - cc::GT_U => dynasm!(self.asm - ; seta [rsp + offset] - ), - cc::GE_U => dynasm!(self.asm - ; setae [rsp + offset] - ), - cc::LT_U => dynasm!(self.asm - ; setb [rsp + offset] - ), - cc::LE_U => dynasm!(self.asm - ; setbe [rsp + offset] - ), - cc::GT_S => dynasm!(self.asm - ; setg [rsp + offset] - ), - cc::GE_S => dynasm!(self.asm - ; setge [rsp + offset] - ), - cc::LT_S => dynasm!(self.asm - ; setl [rsp + offset] - ), - cc::LE_S => dynasm!(self.asm - ; setle [rsp + offset] - ), - } - } - (ValueLocation::Cond(cond), CCLoc::Reg(reg)) => match reg { - GPR::Rq(r) => { - self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?; - - match cond { - cc::EQUAL => dynasm!(self.asm - ; sete Rb(r) - ), - cc::NOT_EQUAL => dynasm!(self.asm - ; setne Rb(r) - ), - cc::GT_U => dynasm!(self.asm - ; seta Rb(r) - ), - cc::GE_U => dynasm!(self.asm - ; setae Rb(r) - ), - cc::LT_U => dynasm!(self.asm - ; setb Rb(r) - ), - cc::LE_U => dynasm!(self.asm - ; setbe Rb(r) - ), - cc::GT_S => dynasm!(self.asm - ; setg Rb(r) - ), - cc::GE_S => dynasm!(self.asm - ; setge Rb(r) - ), - cc::LT_S => dynasm!(self.asm - ; setl Rb(r) - ), - cc::LE_S => dynasm!(self.asm - ; setle Rb(r) - ), - } - } - GPR::Rx(_) => { - let temp = CCLoc::Reg(self.take_reg(I32).unwrap()); - self.copy_value(src, temp)?; - let temp = temp.into(); - self.copy_value(temp, dst)?; - self.free_value(temp)?; - } - }, - (ValueLocation::Stack(in_offset), CCLoc::Stack(out_offset)) => { - let in_offset = self.adjusted_offset(in_offset); - let out_offset = self.adjusted_offset(out_offset); - if in_offset != out_offset { - if let Some(gpr) = self.take_reg(I64) { - dynasm!(self.asm - ; mov Rq(gpr.rq().unwrap()), [rsp + in_offset] - ; mov [rsp + out_offset], Rq(gpr.rq().unwrap()) - ); - self.block_state.regs.release(gpr)?; - } else { - dynasm!(self.asm - ; push rax - ; mov rax, [rsp + in_offset + WORD_SIZE as i32] - ; mov [rsp + out_offset + WORD_SIZE as i32], rax - ; pop rax - ); - } - } - } - // TODO: XMM registers - (ValueLocation::Reg(in_reg), CCLoc::Stack(out_offset)) => { - let out_offset = self.adjusted_offset(out_offset); - match in_reg { - GPR::Rq(in_reg) => { - // We can always use `Rq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; mov [rsp + out_offset], Rq(in_reg) - ); - } - GPR::Rx(in_reg) => { - // We can always use `movq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; movq [rsp + out_offset], Rx(in_reg) - ); - } - } - } - (ValueLocation::Immediate(i), CCLoc::Stack(out_offset)) => { - // TODO: Floats - let i = i.as_bytes(); - let out_offset = self.adjusted_offset(out_offset); - if let Some(scratch) = self.take_reg(I64) { - dynasm!(self.asm - ; mov Rq(scratch.rq().unwrap()), QWORD i - ; mov [rsp + out_offset], Rq(scratch.rq().unwrap()) - ); - - self.block_state.regs.release(scratch)?; - } else { - dynasm!(self.asm - ; push rax - ; mov rax, QWORD i - ; mov [rsp + out_offset + WORD_SIZE as i32], rax - ; pop rax - ); - } - } - (ValueLocation::Stack(in_offset), CCLoc::Reg(out_reg)) => { - let in_offset = self.adjusted_offset(in_offset); - match out_reg { - GPR::Rq(out_reg) => { - // We can always use `Rq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; mov Rq(out_reg), [rsp + in_offset] - ); - } - GPR::Rx(out_reg) => { - // We can always use `movq` here for now because stack slots are in multiples of - // 8 bytes - dynasm!(self.asm - ; movq Rx(out_reg), [rsp + in_offset] - ); - } - } - } - (ValueLocation::Reg(in_reg), CCLoc::Reg(out_reg)) => { - if in_reg != out_reg { - match (in_reg, out_reg) { - (GPR::Rq(in_reg), GPR::Rq(out_reg)) => { - dynasm!(self.asm - ; mov Rq(out_reg), Rq(in_reg) - ); - } - (GPR::Rx(in_reg), GPR::Rq(out_reg)) => { - dynasm!(self.asm - ; movq Rq(out_reg), Rx(in_reg) - ); - } - (GPR::Rq(in_reg), GPR::Rx(out_reg)) => { - dynasm!(self.asm - ; movq Rx(out_reg), Rq(in_reg) - ); - } - (GPR::Rx(in_reg), GPR::Rx(out_reg)) => { - dynasm!(self.asm - ; movapd Rx(out_reg), Rx(in_reg) - ); - } - } - } - } - (ValueLocation::Immediate(i), CCLoc::Reg(out_reg)) => { - // TODO: Floats - self.immediate_to_reg(out_reg, i)?; - } - } - Ok(()) - } - - /// Define the given label at the current position. - /// - /// Multiple labels can be defined at the same position. However, a label - /// can be defined only once. - pub fn define_label(&mut self, label: Label) { - self.asm.dynamic_label(label.0); - } - - pub fn set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error> { - self.block_state.regs = Registers::new(); - self.block_state.regs.release_scratch_register()?; - for elem in &state.stack { - if let ValueLocation::Reg(r) = elem { - self.block_state.regs.mark_used(*r); - } - } - self.block_state.stack = state.stack; - self.block_state.depth = state.depth; - Ok(()) - } - - pub fn apply_cc( - &mut self, - cc: BlockCallingConvention>, - ) -> Result<(), Error> { - let stack = cc.arguments.into_iter(); - - self.block_state.stack = Vec::with_capacity(stack.size_hint().0); - self.block_state.regs = Registers::new(); - self.block_state.regs.release_scratch_register()?; - - for elem in stack { - if let CCLoc::Reg(r) = elem { - self.block_state.regs.mark_used(r); - } - - self.block_state.stack.push(elem.into()); - } - - self.block_state.depth = cc.stack_depth; - Ok(()) - } - - load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD); - load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD); - load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD); - load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD); - - load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE); - load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE); - load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD); - load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD); - - load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE); - load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE); - load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD); - load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD); - load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD); - load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD); - - store!(store8, Rb, NONE, DWORD); - store!(store16, Rw, NONE, QWORD); - store!(store32, Rd, movd, DWORD); - store!(store64, Rq, movq, QWORD); - - fn push_physical(&mut self, mut value: ValueLocation) -> Result { - let out_offset = -(self.block_state.depth.0 as i32 + 1); - match value { - ValueLocation::Reg(_) | ValueLocation::Immediate(_) | ValueLocation::Cond(_) => { - if let Some(gpr) = self.put_into_register(GPRType::Rq, &mut value)? { - dynasm!(self.asm - ; push Rq(gpr.rq().unwrap()) - ); - } else { - dynasm!(self.asm - ; push rax - ); - - self.copy_value(value, CCLoc::Stack(out_offset))?; - } - - self.free_value(value)?; - } - ValueLocation::Stack(o) => { - let offset = self.adjusted_offset(o); - dynasm!(self.asm - ; push QWORD [rsp + offset] - ); - } - } - - self.block_state.depth.reserve(1); - - Ok(ValueLocation::Stack(out_offset)) - } - - fn push(&mut self, value: ValueLocation) -> Result<(), Error> { - if let Some(mut top) = self.block_state.stack.pop() { - if let ValueLocation::Cond(_) = top { - match self.put_into_register(I32, &mut top) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - } - - self.block_state.stack.push(top); - } - - self.block_state.stack.push(value); - Ok(()) - } - - fn pop(&mut self) -> Result { - match self.block_state.stack.pop() { - Some(v) => Ok(v), - None => Err(Error::Microwasm( - "Stack is empty - pop impossible".to_string(), - )), - } - } - - pub fn drop(&mut self, range: RangeInclusive) -> Result<(), Error> { - let mut repush = Vec::with_capacity(*range.start() as _); - - for _ in 0..*range.start() { - let v = self.pop()?; - repush.push(v); - } - - for _ in range { - let val = self.pop()?; - self.free_value(val)?; - } - - for v in repush.into_iter().rev() { - self.push(v)?; - } - Ok(()) - } - - fn pop_into(&mut self, dst: CCLoc) -> Result<(), Error> { - let val = self.pop()?; - self.copy_value(val, dst)?; - self.free_value(val)?; - Ok(()) - } - - fn free_value(&mut self, val: ValueLocation) -> Result<(), Error> { - if let ValueLocation::Reg(r) = val { - self.block_state.regs.release(r)?; - } - Ok(()) - } - - /// Puts this value into a register so that it can be efficiently read - fn put_into_register( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result, Error> { - if let Some(out) = self.clone_to_register(ty, *val)? { - self.free_value(*val)?; - *val = ValueLocation::Reg(out); - Ok(Some(out)) - } else { - Ok(None) - } - } - - /// Clones this value into a register so that it can be efficiently read - fn clone_to_register( - &mut self, - ty: impl Into>, - val: ValueLocation, - ) -> Result, Error> { - let ty = ty.into(); - match val { - ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => { - self.block_state.regs.mark_used(r); - Ok(Some(r)) - } - val => match self.take_reg(ty.unwrap_or(GPRType::Rq)) { - Some(scratch) => { - self.copy_value(val, CCLoc::Reg(scratch))?; - Ok(Some(scratch)) - } - None => Ok(None), - }, - } - } - - /// Puts this value into a temporary register so that operations - /// on that register don't write to a local. - fn put_into_temp_register( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result, Error> { - let out = self.clone_to_temp_register(ty, *val)?; - if let Some(o) = out { - self.free_value(*val)?; - *val = ValueLocation::Reg(o); - Ok(Some(o)) - } else { - Ok(None) - } - } - - fn put_into_temp_location( - &mut self, - ty: impl Into>, - val: &mut ValueLocation, - ) -> Result { - if let Some(gpr) = self.put_into_temp_register(ty, val)? { - Ok(CCLoc::Reg(gpr)) - } else { - let out = CCLoc::Stack(self.push_physical(*val)?.stack().unwrap()); - *val = out.into(); - Ok(out) - } - } - - /// Clones this value into a temporary register so that operations - /// on that register don't write to a local. - - fn clone_to_temp_register( - &mut self, - ty: impl Into>, - val: ValueLocation, - ) -> Result, Error> { - // If we have `None` as the type then it always matches (`.unwrap_or(true)`) - match val { - ValueLocation::Reg(r) => { - let ty = ty.into(); - let type_matches = ty.map(|t| t == r.type_()).unwrap_or(true); - - if self.block_state.regs.num_usages(r) <= 1 && type_matches { - self.block_state.regs.mark_used(r); - Ok(Some(r)) - } else if let Some(scratch) = self.take_reg(ty.unwrap_or(GPRType::Rq)) { - self.copy_value(val, CCLoc::Reg(scratch))?; - Ok(Some(scratch)) - } else { - Ok(None) - } - } - val => self.clone_to_register(ty, val), - } - } - - pub fn f32_neg(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits((-f32::from_bits(i.to_bits())).to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32)); - - dynasm!(self.asm - ; xorps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_neg(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits((-f64::from_bits(i.to_bits())).to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - - dynasm!(self.asm - ; xorpd Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_abs(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits(f32::from_bits(i.to_bits()).abs().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let const_label = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32)); - - dynasm!(self.asm - ; andps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_abs(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits(f64::from_bits(i.to_bits()).abs().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let const_label = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64)); - - dynasm!(self.asm - ; andps Rx(reg.rx().unwrap()), [=>const_label.0] - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_sqrt(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f32() { - ValueLocation::Immediate( - Ieee32::from_bits(f32::from_bits(i.to_bits()).sqrt().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; sqrtss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ); - - val - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_sqrt(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out = if let Some(i) = val.imm_f64() { - ValueLocation::Immediate( - Ieee64::from_bits(f64::from_bits(i.to_bits()).sqrt().to_bits()).into(), - ) - } else { - let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; sqrtsd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ); - - ValueLocation::Reg(reg) - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_copysign(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let (Some(left), Some(right)) = (left.imm_f32(), right.imm_f32()) { - ValueLocation::Immediate( - Ieee32::from_bits( - (left.to_bits() & REST_MASK_F32) | (right.to_bits() & SIGN_MASK_F32), - ) - .into(), - ) - } else { - let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let rreg = match self.put_into_register(GPRType::Rx, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let sign_mask = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32)); - let rest_mask = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32)); - - dynasm!(self.asm - ; andps Rx(rreg.rx().unwrap()), [=>sign_mask.0] - ; andps Rx(lreg.rx().unwrap()), [=>rest_mask.0] - ; orps Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ); - - self.free_value(right)?; - - left - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_copysign(&mut self) -> Result<(), Error> { - let mut right = self.pop()?; - let mut left = self.pop()?; - - let out = if let (Some(left), Some(right)) = (left.imm_f64(), right.imm_f64()) { - ValueLocation::Immediate( - Ieee64::from_bits( - (left.to_bits() & REST_MASK_F64) | (right.to_bits() & SIGN_MASK_F64), - ) - .into(), - ) - } else { - let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let rreg = match self.put_into_register(GPRType::Rx, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let rest_mask = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64)); - - dynasm!(self.asm - ; andpd Rx(rreg.rx().unwrap()), [=>sign_mask.0] - ; andpd Rx(lreg.rx().unwrap()), [=>rest_mask.0] - ; orpd Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap()) - ); - - self.free_value(right)?; - - left - }; - - self.push(out)?; - Ok(()) - } - - pub fn i32_clz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_2 = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; bsr Rd(temp.rq().unwrap()), [rsp + offset] - ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) - ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _ - ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_2))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _ - ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i64_clz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap().leading_zeros() as u64).into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_2 = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsr Rq(temp.rq().unwrap()), [rsp + offset] - ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) - ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _ - ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_2))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _ - ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i32_ctz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_zero_val = self.take_reg(I32).unwrap(); - - dynasm!(self.asm - ; bsf Rd(temp.rq().unwrap()), [rsp + offset] - ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_zero_val))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I32).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } else { - dynasm!(self.asm - ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _ - ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i64_ctz(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap().trailing_zeros() as u64).into()) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - let temp = self.take_reg(I64).unwrap(); - - if is_x86_feature_detected!("lzcnt") { - dynasm!(self.asm - ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset] - ); - ValueLocation::Reg(temp) - } else { - let temp_zero_val = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsf Rq(temp.rq().unwrap()), [rsp + offset] - ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap()) - ); - self.free_value(ValueLocation::Reg(temp_zero_val))?; - ValueLocation::Reg(temp) - } - } - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let reg = match self.put_into_register(GPRType::Rq, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _ - ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - self.push(out_val)?; - Ok(()) - } - - pub fn i32_extend_u(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = if let ValueLocation::Immediate(imm) = val { - ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into()) - } else { - let new_reg = self.take_reg(I64).unwrap(); - - // TODO: Track set-ness of bits - we can make this a no-op in most cases - // but we have to make this unconditional just in case this value - // came from a truncate. - match val { - ValueLocation::Reg(GPR::Rx(rxreg)) => { - dynasm!(self.asm - ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) - ); - } - ValueLocation::Reg(GPR::Rq(rqreg)) => { - dynasm!(self.asm - ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg) - ); - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; mov Rd(new_reg.rq().unwrap()), [rsp + offset] - ); - } - ValueLocation::Cond(_) => self.copy_value(val, CCLoc::Reg(new_reg))?, - ValueLocation::Immediate(_) => { - return Err(Error::Microwasm( - "i32_extend_u unreachable code".to_string(), - )) - } - } - - ValueLocation::Reg(new_reg) - }; - - self.free_value(val)?; - - self.push(out)?; - Ok(()) - } - - pub fn i32_extend_s(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - self.free_value(val)?; - let new_reg = self.take_reg(I64).unwrap(); - - let out = match val { - ValueLocation::Reg(GPR::Rx(rxreg)) => { - dynasm!(self.asm - ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg) - ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap()) - ); - - ValueLocation::Reg(new_reg) - } - ValueLocation::Reg(GPR::Rq(rqreg)) => { - dynasm!(self.asm - ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg) - ); - - ValueLocation::Reg(new_reg) - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - dynasm!(self.asm - ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset] - ); - - ValueLocation::Reg(new_reg) - } - // `CondCode` can only be 0 or 1, so sign-extension is always the same as - // zero-extension - val @ ValueLocation::Cond(_) => { - self.copy_value(val, CCLoc::Reg(new_reg))?; - - ValueLocation::Reg(new_reg) - } - ValueLocation::Immediate(imm) => { - self.block_state.regs.release(new_reg)?; - - ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into()) - } - }; - - self.push(out)?; - Ok(()) - } - - unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones); - conversion!( - f64_from_f32, - cvtss2sd, - Rx, - rx, - Rx, - rx, - f32, - f64, - as_f32, - |a: Ieee32| Ieee64::from_bits((f32::from_bits(a.to_bits()) as f64).to_bits()) - ); - conversion!( - f32_from_f64, - cvtsd2ss, - Rx, - rx, - Rx, - rx, - f64, - f32, - as_f64, - |a: Ieee64| Ieee32::from_bits((f64::from_bits(a.to_bits()) as f32).to_bits()) - ); - - pub fn i32_truncate_f32_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0xCF00_0000_u32 as i32)); - let zero = self.aligned_label(16, LabelValue::I32(0)); - - dynasm!(self.asm - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0x4F00_0000_u32 as i32)); - - dynasm!(self.asm - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jae >else_ - ; jp >trap - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; jmp >ret - ; else_: - ; subss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f64_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i32).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0xC1E0_0000_0020_0000_u64 as i64)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jna >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_truncate_f64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u32).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0x41E0_0000_0000_0000_u64 as i64)); - - dynasm!(self.asm - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jae >else_ - ; jp >trap - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; jmp >ret - ; else_: - ; subsd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - conversion!( - f32_convert_from_i32_s, - cvtsi2ss, - Rd, - rq, - Rx, - rx, - i32, - f32, - as_i32, - |a| Ieee32::from_bits((a as f32).to_bits()) - ); - conversion!( - f64_convert_from_i32_s, - cvtsi2sd, - Rd, - rq, - Rx, - rx, - i32, - f64, - as_i32, - |a| Ieee64::from_bits((a as f64).to_bits()) - ); - conversion!( - f32_convert_from_i64_s, - cvtsi2ss, - Rq, - rq, - Rx, - rx, - i64, - f32, - as_i64, - |a| Ieee32::from_bits((a as f32).to_bits()) - ); - conversion!( - f64_convert_from_i64_s, - cvtsi2sd, - Rq, - rq, - Rx, - rx, - i64, - f64, - as_i64, - |a| Ieee64::from_bits((a as f64).to_bits()) - ); - - pub fn i64_truncate_f32_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i64).into(), - ), - _ => { - let reg = match self.put_into_temp_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I32(0xDF00_0000_u32 as i32)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f64_s(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i64).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I32).unwrap(); - - let sign_mask = self.aligned_label(8, LabelValue::I64(SIGN_MASK_F64 as i64)); - let float_cmp_mask = - self.aligned_label(16, LabelValue::I64(0xC3E0_0000_0000_0000_u64 as i64)); - let zero = self.aligned_label(16, LabelValue::I64(0)); - - dynasm!(self.asm - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jne >ret - ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap()) - ; jp >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0] - ; jnae >trap - ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0] - ; jb >ret - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; ret: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f32::from_bits(imm.as_f32().unwrap().to_bits()) as u64).into(), - ), - _ => { - let reg = match self.put_into_register(F32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I64).unwrap(); - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let u64_trunc_f32_const = self.aligned_label(16, LabelValue::I32(0x5F00_0000_i32)); - - dynasm!(self.asm - ; comiss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0] - ; jae >large - ; jp >trap - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap()) - ; js >trap - ; jmp >cont - ; large: - ; subss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0] - ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap()) - ; js >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >cont - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; cont: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i64_truncate_f64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u64).into(), - ), - _ => { - let reg = match self.put_into_register(F64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(I64).unwrap(); - - let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64)); - let u64_trunc_f64_const = - self.aligned_label(16, LabelValue::I64(0x43E0_0000_0000_0000_i64)); - - dynasm!(self.asm - ; comisd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0] - ; jnb >large - ; jp >trap - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), 0 - ; jl >trap - ; jmp >cont - ; large: - ; subsd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0] - ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap()) - ; cmp Rq(temp.rq().unwrap()), 0 - ; jnge >trap - ; add Rq(temp.rq().unwrap()), [=>sign_mask.0] - ; jmp >cont - ; trap: - ;; self.trap(TrapCode::BadConversionToInteger) - ; cont: - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f32_convert_from_i32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee32::from_bits((imm.as_i32().unwrap() as u32 as f32).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(F32).unwrap(); - - dynasm!(self.asm - ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; cvtsi2ss Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f64_convert_from_i32_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee64::from_bits((imm.as_i32().unwrap() as u32 as f64).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I32, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let temp = self.take_reg(F64).unwrap(); - - dynasm!(self.asm - ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap()) - ; cvtsi2sd Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap()) - ); - - ValueLocation::Reg(temp) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f32_convert_from_i64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee32::from_bits((imm.as_i64().unwrap() as u64 as f32).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let out = self.take_reg(F32).unwrap(); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; js >negative - ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; jmp >ret - ; negative: - ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; shr Rq(temp.rq().unwrap()), 1 - ; and Rq(reg.rq().unwrap()), 1 - ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap()) - ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; addss Rx(out.rx().unwrap()), Rx(out.rx().unwrap()) - ; ret: - ); - - self.free_value(ValueLocation::Reg(temp))?; - - ValueLocation::Reg(out) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn f64_convert_from_i64_u(&mut self) -> Result<(), Error> { - let mut val = self.pop()?; - - let out_val = match val { - ValueLocation::Immediate(imm) => ValueLocation::Immediate( - Ieee64::from_bits((imm.as_i64().unwrap() as u64 as f64).to_bits()).into(), - ), - _ => { - let reg = match self.put_into_register(I64, &mut val) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let out = self.take_reg(F32).unwrap(); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap()) - ; js >negative - ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; jmp >ret - ; negative: - ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap()) - ; shr Rq(temp.rq().unwrap()), 1 - ; and Rq(reg.rq().unwrap()), 1 - ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap()) - ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap()) - ; addsd Rx(out.rx().unwrap()), Rx(out.rx().unwrap()) - ; ret: - ); - - self.free_value(ValueLocation::Reg(temp))?; - - ValueLocation::Reg(out) - } - }; - - self.free_value(val)?; - - self.push(out_val)?; - Ok(()) - } - - pub fn i32_wrap_from_i64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate((imm.as_i64().unwrap() as u64 as u32).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn i32_reinterpret_from_f32(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_f32().unwrap().to_bits().into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn i64_reinterpret_from_f64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(imm.as_f64().unwrap().to_bits().into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn f32_reinterpret_from_i32(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(Ieee32::from_bits(imm.as_i32().unwrap() as _).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - pub fn f64_reinterpret_from_i64(&mut self) -> Result<(), Error> { - let val = self.pop()?; - - let out = match val { - ValueLocation::Immediate(imm) => { - ValueLocation::Immediate(Ieee64::from_bits(imm.as_i64().unwrap() as _).into()) - } - val => val, - }; - - self.push(out)?; - Ok(()) - } - - unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64); - - // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands - // are in registers. - commutative_binop_i32!(i32_add, add, i32::wrapping_add); - commutative_binop_i32!(i32_and, and, |a, b| a & b); - commutative_binop_i32!(i32_or, or, |a, b| a | b); - commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b); - binop_i32!(i32_sub, sub, i32::wrapping_sub); - - commutative_binop_i64!(i64_add, add, i64::wrapping_add); - commutative_binop_i64!(i64_and, and, |a, b| a & b); - commutative_binop_i64!(i64_or, or, |a, b| a | b); - commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b); - binop_i64!(i64_sub, sub, i64::wrapping_sub); - - commutative_binop_f32!(f32_add, addss, |a, b| a + b); - commutative_binop_f32!(f32_mul, mulss, |a, b| a * b); - minmax_float!( - f32_min, - minss, - ucomiss, - addss, - orps, - as_f32, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - f32::from_bits(a.to_bits()) - .min(f32::from_bits(b.to_bits())) - .to_bits() - ) - ); - minmax_float!( - f32_max, - maxss, - ucomiss, - addss, - andps, - as_f32, - |a: Ieee32, b: Ieee32| Ieee32::from_bits( - f32::from_bits(a.to_bits()) - .max(f32::from_bits(b.to_bits())) - .to_bits() - ) - ); - binop_f32!(f32_sub, subss, |a, b| a - b); - binop_f32!(f32_div, divss, |a, b| a / b); - - pub fn f32_ceil(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::CeilF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_floor(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::FloorF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_nearest(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::NearestF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f32_trunc(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::TruncF32), - iter::once(F32), - iter::once(F32), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - commutative_binop_f64!(f64_add, addsd, |a, b| a + b); - commutative_binop_f64!(f64_mul, mulsd, |a, b| a * b); - minmax_float!( - f64_min, - minsd, - ucomisd, - addsd, - orpd, - as_f64, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - f64::from_bits(a.to_bits()) - .min(f64::from_bits(b.to_bits())) - .to_bits() - ) - ); - minmax_float!( - f64_max, - maxsd, - ucomisd, - addsd, - andpd, - as_f64, - |a: Ieee64, b: Ieee64| Ieee64::from_bits( - f64::from_bits(a.to_bits()) - .max(f64::from_bits(b.to_bits())) - .to_bits() - ) - ); - binop_f64!(f64_sub, subsd, |a, b| a - b); - binop_f64!(f64_div, divsd, |a, b| a / b); - - pub fn f64_ceil(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::CeilF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_floor(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::FloorF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_nearest(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::NearestF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - pub fn f64_trunc(&mut self) -> Result<(), Error> { - self.relocated_function_call( - &ir::ExternalName::LibCall(ir::LibCall::TruncF64), - iter::once(F64), - iter::once(F64), - FunctionDefLocation::PossiblyExternal, - )?; - Ok(()) - } - - shift!( - i32_shl, - Rd, - shl, - |a, b| (a as i32).wrapping_shl(b as _), - I32 - ); - shift!( - i32_shr_s, - Rd, - sar, - |a, b| (a as i32).wrapping_shr(b as _), - I32 - ); - shift!( - i32_shr_u, - Rd, - shr, - |a, b| (a as u32).wrapping_shr(b as _), - I32 - ); - shift!( - i32_rotl, - Rd, - rol, - |a, b| (a as u32).rotate_left(b as _), - I32 - ); - shift!( - i32_rotr, - Rd, - ror, - |a, b| (a as u32).rotate_right(b as _), - I32 - ); - - shift!( - i64_shl, - Rq, - shl, - |a, b| (a as i64).wrapping_shl(b as _), - I64 - ); - shift!( - i64_shr_s, - Rq, - sar, - |a, b| (a as i64).wrapping_shr(b as _), - I64 - ); - shift!( - i64_shr_u, - Rq, - shr, - |a, b| (a as u64).wrapping_shr(b as _), - I64 - ); - shift!( - i64_rotl, - Rq, - rol, - |a, b| (a as u64).rotate_left(b as _), - I64 - ); - shift!( - i64_rotr, - Rq, - ror, - |a, b| (a as u64).rotate_right(b as _), - I64 - ); - - // TODO: Do this without emitting `mov` - fn cleanup_gprs(&mut self, gprs: impl Iterator) { - for gpr in gprs { - dynasm!(self.asm - ; pop Rq(gpr.rq().unwrap()) - ); - self.block_state.depth.free(1); - // DON'T MARK IT USED HERE! See comment in `full_div` - } - } - - int_div!( - i32_full_div_s, - i32_full_div_u, - i32_div_u, - i32_div_s, - i32_rem_u, - i32_rem_s, - imm_i32, - i32, - u32, - Rd, - DWORD - ); - int_div!( - i64_full_div_s, - i64_full_div_u, - i64_div_u, - i64_div_s, - i64_rem_u, - i64_rem_s, - imm_i64, - i64, - u64, - Rq, - QWORD - ); - - // TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have - // to move `RAX`/`RDX` back afterwards). - fn full_div( - &mut self, - mut divisor: ValueLocation, - dividend: ValueLocation, - do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - // To stop `take_reg` from allocating either of these necessary registers - self.block_state.regs.mark_used(RAX); - self.block_state.regs.mark_used(RDX); - if divisor == ValueLocation::Reg(RAX) || divisor == ValueLocation::Reg(RDX) { - let new_reg = self.take_reg(GPRType::Rq).unwrap(); - self.copy_value(divisor, CCLoc::Reg(new_reg))?; - self.free_value(divisor)?; - - divisor = ValueLocation::Reg(new_reg); - } - self.block_state.regs.release(RAX)?; - self.block_state.regs.release(RDX)?; - - let saved_rax = if self.block_state.regs.is_free(RAX) { - None - } else { - dynasm!(self.asm - ; push rax - ); - self.block_state.depth.reserve(1); - // DON'T FREE THIS REGISTER HERE - since we don't - // remove it from the stack freeing the register - // here will cause `take_reg` to allocate it. - Some(()) - }; - - let saved_rdx = if self.block_state.regs.is_free(RDX) { - None - } else { - dynasm!(self.asm - ; push rdx - ); - self.block_state.depth.reserve(1); - // DON'T FREE THIS REGISTER HERE - since we don't - // remove it from the stack freeing the register - // here will cause `take_reg` to allocate it. - Some(()) - }; - - let saved = saved_rdx - .map(|_| RDX) - .into_iter() - .chain(saved_rax.map(|_| RAX)); - - self.copy_value(dividend, CCLoc::Reg(RAX))?; - self.block_state.regs.mark_used(RAX); - - self.free_value(dividend)?; - // To stop `take_reg` from allocating either of these necessary registers - self.block_state.regs.mark_used(RDX); - - do_div(self, &mut divisor)?; - self.free_value(divisor)?; - - if self.block_state.regs.is_free(RAX) { - return Err(Error::Microwasm("full_div: RAX is not free".to_string())); - } - if self.block_state.regs.is_free(RDX) { - return Err(Error::Microwasm("full_div: RDX is not free".to_string())); - } - - Ok((ValueLocation::Reg(RAX), ValueLocation::Reg(RDX), saved)) - } - - fn i32_full_div_u( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; xor edx, edx - ; div DWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I32, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; xor edx, edx - ; div Rd(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i32_full_div_s( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; cdq - ; idiv DWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I32, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; cdq - ; idiv Rd(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i64_full_div_u( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; xor rdx, rdx - ; div QWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I64, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - dynasm!(this.asm - ; xor rdx, rdx - ; div Rq(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - fn i64_full_div_s( - &mut self, - divisor: ValueLocation, - dividend: ValueLocation, - ) -> Result< - ( - ValueLocation, - ValueLocation, - impl Iterator + Clone + 'this, - ), - Error, - > { - self.full_div(divisor, dividend, |this, divisor| match divisor { - ValueLocation::Stack(offset) => { - let offset = this.adjusted_offset(*offset); - dynasm!(this.asm - ; cqo - ; idiv QWORD [rsp + offset] - ); - Ok(()) - } - ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let r = match this.put_into_register(I64, divisor) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(this.asm - ; cqo - ; idiv Rq(r.rq().unwrap()) - ); - Ok(()) - } - }) - } - - // `i32_mul` needs to be separate because the immediate form of the instruction - // has a different syntax to the immediate form of the other instructions. - pub fn i32_mul(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(), - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(_) => (left, right), - _ => { - if right.immediate().is_some() { - (left, right) - } else { - (right, left) - } - } - }; - - let out = match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = match self.put_into_register(I32, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let lreg = match self.put_into_temp_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap()) - ); - left - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - let lreg = match self.put_into_temp_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rd(lreg.rq().unwrap()), [rsp + offset] - ); - left - } - ValueLocation::Immediate(i) => { - let lreg = match self.put_into_register(I32, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - let new_reg = self.take_reg(I32).unwrap(); - dynasm!(self.asm - ; imul Rd(new_reg.rq().unwrap()), Rd(lreg.rq().unwrap()), i.as_i32().unwrap() - ); - self.free_value(left)?; - ValueLocation::Reg(new_reg) - } - }; - - self.push(out)?; - self.free_value(right)?; - Ok(()) - } - - // `i64_mul` needs to be separate because the immediate form of the instruction - // has a different syntax to the immediate form of the other instructions. - pub fn i64_mul(&mut self) -> Result<(), Error> { - let right = self.pop()?; - let left = self.pop()?; - - if let Some(right) = right.immediate() { - if let Some(left) = left.immediate() { - self.push(ValueLocation::Immediate( - i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(), - ))?; - return Ok(()); - } - } - - let (mut left, mut right) = match left { - ValueLocation::Reg(_) => (left, right), - _ => { - if right.immediate().is_some() { - (left, right) - } else { - (right, left) - } - } - }; - - let out = match right { - ValueLocation::Reg(_) | ValueLocation::Cond(_) => { - let rreg = match self.put_into_register(I64, &mut right) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - let lreg = match self.put_into_temp_register(I64, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - left - } - ValueLocation::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - let lreg = match self.put_into_temp_register(I64, &mut left) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), [rsp + offset] - ); - left - } - ValueLocation::Immediate(i) => { - let i = i.as_i64().unwrap(); - if let Ok(i) = i.try_into() { - let new_reg = self.take_reg(I64).unwrap(); - - let lreg = self - .put_into_register(I64, &mut left)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; imul Rq(new_reg.rq().unwrap()), Rq(lreg.rq().unwrap()), i - ); - - self.free_value(left)?; - - ValueLocation::Reg(new_reg) - } else { - let rreg = self - .put_into_register(I64, &mut right)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - let lreg = self - .put_into_temp_register(I64, &mut left)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - dynasm!(self.asm - ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap()) - ); - left - } - } - }; - - self.push(out)?; - self.free_value(right)?; - Ok(()) - } - - fn cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc) { - match src { - CCLoc::Reg(reg) => match cond_code { - cc::EQUAL => { - dynasm!(self.asm - ; cmove Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::NOT_EQUAL => { - dynasm!(self.asm - ; cmovne Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GE_U => { - dynasm!(self.asm - ; cmovae Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LT_U => { - dynasm!(self.asm - ; cmovb Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GT_U => { - dynasm!(self.asm - ; cmova Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LE_U => { - dynasm!(self.asm - ; cmovbe Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GE_S => { - dynasm!(self.asm - ; cmovge Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LT_S => { - dynasm!(self.asm - ; cmovl Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::GT_S => { - dynasm!(self.asm - ; cmovg Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - cc::LE_S => { - dynasm!(self.asm - ; cmovle Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap()) - ); - } - }, - CCLoc::Stack(offset) => { - let offset = self.adjusted_offset(offset); - - match cond_code { - cc::EQUAL => { - dynasm!(self.asm - ; cmove Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::NOT_EQUAL => { - dynasm!(self.asm - ; cmovne Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GE_U => { - dynasm!(self.asm - ; cmovae Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LT_U => { - dynasm!(self.asm - ; cmovb Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GT_U => { - dynasm!(self.asm - ; cmova Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LE_U => { - dynasm!(self.asm - ; cmovbe Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GE_S => { - dynasm!(self.asm - ; cmovge Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LT_S => { - dynasm!(self.asm - ; cmovl Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::GT_S => { - dynasm!(self.asm - ; cmovg Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - cc::LE_S => { - dynasm!(self.asm - ; cmovle Rq(dst.rq().unwrap()), [rsp + offset] - ); - } - } - } - } - } - - pub fn select(&mut self) -> Result<(), Error> { - let mut cond = self.pop()?; - let mut else_ = self.pop()?; - let mut then = self.pop()?; - - if let ValueLocation::Immediate(i) = cond { - if i.as_i32().unwrap() == 0 { - self.free_value(then)?; - self.push(else_)?; - } else { - self.free_value(else_)?; - self.push(then)?; - } - - return Ok(()); - } - - let cond_code = match cond { - ValueLocation::Cond(cc) => cc, - _ => { - let cond_reg = match self.put_into_register(I32, &mut cond) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - dynasm!(self.asm - ; test Rd(cond_reg.rq().unwrap()), Rd(cond_reg.rq().unwrap()) - ); - self.free_value(cond)?; - - cc::NOT_EQUAL - } - }; - - let else_ = if let ValueLocation::Stack(offset) = else_ { - CCLoc::Stack(offset) - } else { - let gpr = match self.put_into_register(I32, &mut else_) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - CCLoc::Reg(gpr) - }; - - let then = if let ValueLocation::Stack(offset) = then { - CCLoc::Stack(offset) - } else { - let gpr = match self.put_into_register(I32, &mut then) { - Err(e) => return Err(e), - Ok(o) => { - o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))? - } - }; - CCLoc::Reg(gpr) - }; - - let out_gpr = match (then, else_) { - (CCLoc::Reg(then_reg), else_) if self.block_state.regs.num_usages(then_reg) <= 1 => { - self.cmov(!cond_code, then_reg, else_); - self.free_value(else_.into())?; - - then_reg - } - (then, CCLoc::Reg(else_reg)) if self.block_state.regs.num_usages(else_reg) <= 1 => { - self.cmov(cond_code, else_reg, then); - self.free_value(then.into())?; - - else_reg - } - (then, else_) => { - let out = self.take_reg(GPRType::Rq).unwrap(); - self.copy_value(else_.into(), CCLoc::Reg(out))?; - self.cmov(cond_code, out, then); - - self.free_value(then.into())?; - self.free_value(else_.into())?; - - out - } - }; - - self.push(ValueLocation::Reg(out_gpr))?; - Ok(()) - } - - pub fn pick(&mut self, depth: u32) { - let idx = self.block_state.stack.len() - 1 - depth as usize; - let v = self.block_state.stack[idx]; - if let ValueLocation::Reg(r) = v { - self.block_state.regs.mark_used(r); - } - self.block_state.stack.push(v); - } - - pub fn const_(&mut self, imm: Value) -> Result<(), Error> { - self.push(ValueLocation::Immediate(imm))?; - Ok(()) - } - - fn relocated_function_call< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - name: &cranelift_codegen::ir::ExternalName, - args: A, - rets: R, - func_def_loc: FunctionDefLocation, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(args); - - let saved_vmctx = if func_def_loc == FunctionDefLocation::PossiblyExternal { - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - self.block_state.regs.mark_used(GPR::Rq(VMCTX)); - Some(self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?) - } else { - None - }; - - self.save_volatile()?; - - self.pass_outgoing_args(&locs)?; - - // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate - self.reloc_sink.reloc_external( - (self.asm.offset().0 - - self.func_starts[self.current_function as usize] - .0 - .unwrap() - .0) as u32 - + 2, - // Passing a default location here, since until proven otherwise, it's not used. - ir::SourceLoc::default(), - binemit::Reloc::Abs8, - name, - 0, - ); - let temp = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(temp.rq().unwrap()), QWORD 0xDEAD_BEEF_DEAD_BEEF_u64 as i64 - ; call Rq(temp.rq().unwrap()) - ); - self.block_state.regs.release(temp)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(rets)?; - - if func_def_loc == FunctionDefLocation::PossiblyExternal { - let saved_vmctx = saved_vmctx.unwrap(); - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?; - self.free_value(saved_vmctx)?; - } - - Ok(()) - } - - fn builtin_function_call< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - i: BuiltinFunctionIndex, - args: A, - rets: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs(args); - - dynasm!(self.asm - ; push Rq(VMCTX) - ); - self.block_state.depth.reserve(1); - let depth = self.block_state.depth.clone(); - - self.save_volatile()?; - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.pass_outgoing_args(&locs)?; - - let temp = self.take_reg(I64).unwrap(); - dynasm!(self.asm - ; mov Rq(temp.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_builtin_function(i.index()) as i32 - ] - ; call Rq(temp.rq().unwrap()) - ); - - self.block_state.regs.release(temp)?; - - for i in locs { - self.free_value(i.into())?; - } - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - - self.push_function_returns(rets)?; - - self.set_stack_depth(depth)?; - dynasm!(self.asm - ; pop Rq(VMCTX) - ); - self.block_state.depth.free(1); - - Ok(()) - } - - // TODO: Other memory indices - pub fn memory_size(&mut self) -> Result<(), Error> { - let memory_index = 0; - if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) { - self.push(ValueLocation::Immediate(defined_memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_memory32_size_index(), - [self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } else { - self.push(ValueLocation::Immediate(memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_imported_memory32_size_index(), - [self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } - Ok(()) - } - - // TODO: Other memory indices - pub fn memory_grow(&mut self) -> Result<(), Error> { - let memory_index = 0; - if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) { - self.push(ValueLocation::Immediate(defined_memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_memory32_grow_index(), - [self.pointer_type, self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } else { - self.push(ValueLocation::Immediate(memory_index.into()))?; - self.builtin_function_call( - BuiltinFunctionIndex::get_imported_memory32_grow_index(), - [self.pointer_type, self.pointer_type].iter().copied(), - [self.pointer_type].iter().copied(), - )?; - } - Ok(()) - } - - // TODO: Use `ArrayVec`? - // TODO: This inefficiently duplicates registers but it's not really possible - // to double up stack space right now. - /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used. - fn save_volatile(&mut self) -> Result<(), Error> { - self.save_regs(SCRATCH_REGS.iter().copied())?; - Ok(()) - } - - fn save_regs(&mut self, to_save: I) -> Result<(), Error> - where - I: IntoIterator, - I::IntoIter: Clone, - { - // TODO: We can filter out registers that are already marked free, but just to ensure - // that this doesn't fail when confronted with the `memory_grow`/`memory_size` - // weirdness. - let to_save = to_save.into_iter(); - if to_save.clone().count() == 0 { - return Ok(()); - } - - let mut stack = mem::replace(&mut self.block_state.stack, vec![]); - let mut slice = &mut stack[..]; - - while let Some((first, rest)) = slice.split_first_mut() { - if let ValueLocation::Reg(vreg) = *first { - if to_save.clone().any(|r| r == vreg) { - let old = *first; - *first = self.push_physical(old)?; - for val in &mut *rest { - if *val == old { - self.free_value(*val)?; - *val = *first; - } - } - } - } - - slice = rest; - } - - self.block_state.stack = stack; - - Ok(()) - } - - /// Write the arguments to the callee to the registers and the stack using the SystemV - /// calling convention. - fn pass_outgoing_args( - &mut self, - out_locs: &(impl ExactSizeIterator + DoubleEndedIterator + Clone), - ) -> Result<(), Error> { - let total_stack_space = out_locs - .clone() - .flat_map(|l| { - if let CCLoc::Stack(offset) = l { - if offset >= 0 { - Some(offset as u32 + 1) - } else { - None - } - } else { - None - } - }) - .max() - .unwrap_or(0); - let original_depth = self.block_state.depth.clone(); - let mut needed_depth = original_depth.clone(); - needed_depth.reserve(total_stack_space); - - if needed_depth.0 & 1 != 0 { - needed_depth.reserve(1); - } - - self.set_stack_depth(needed_depth.clone())?; - - let mut pending = Vec::<(ValueLocation, CCLoc)>::with_capacity(out_locs.len()); - - for loc in out_locs.clone().rev() { - let val = self.pop()?; - - pending.push((val, loc)); - } - - while !pending.is_empty() { - let start_len = pending.len(); - - for (src, dst) in mem::replace(&mut pending, vec![]) { - if src != ValueLocation::from(dst) { - let dst = match dst { - CCLoc::Reg(r) => { - if !self.block_state.regs.is_free(r) { - pending.push((src, dst)); - continue; - } - - self.block_state.regs.mark_used(r); - - dst - } - CCLoc::Stack(offset) => CCLoc::Stack(offset - needed_depth.0 as i32), - }; - - self.copy_value(src, dst)?; - self.free_value(src)?; - } - } - - if pending.len() == start_len { - let src = match pending - .iter() - .filter_map(|(src, _)| { - if let ValueLocation::Reg(reg) = src { - Some(reg) - } else { - None - } - }) - .next() - { - None => { - return Err(Error::Microwasm( - "Programmer error: We shouldn't need to push \ - intermediate args if we don't have any argument sources in registers" - .to_string(), - )); - } - Some(val) => *val, - }; - let new_src = self.push_physical(ValueLocation::Reg(src))?; - for (old_src, _) in pending.iter_mut() { - if *old_src == ValueLocation::Reg(src) { - *old_src = new_src; - } - } - } - } - - // We do this a second time just in case we had to use `push_physical` to resolve cycles in - // `pending` - self.set_stack_depth(needed_depth)?; - - Ok(()) - } - - fn push_function_returns( - &mut self, - returns: impl IntoIterator, - ) -> Result<(), Error> { - for loc in ret_locs(returns)? { - if let CCLoc::Reg(reg) = loc { - self.block_state.regs.mark_used(reg); - } - - self.push(loc.into())?; - } - Ok(()) - } - - fn trap_if(&mut self, ccode: CondCode, trap_code: TrapCode) { - let label = self.create_label(); - self.br_on_cond_code(label, !ccode); - self.trap(trap_code); - self.define_label(label); - } - - pub fn call_indirect< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - type_id: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - dynasm!(self.asm - ; push Rq(VMCTX) - ); - self.block_state.depth.reserve(1); - let depth = self.block_state.depth.clone(); - - let locs = arg_locs_skip_caller_vmctx(arg_types); - - for loc in locs.clone() { - if let CCLoc::Reg(r) = loc { - self.block_state.regs.mark_used(r); - } - } - - let mut callee = self.pop()?; - let callee_reg = self - .put_into_temp_register(I32, &mut callee)? - .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?; - - self.save_volatile()?; - - for loc in locs.clone() { - if let CCLoc::Reg(r) = loc { - self.block_state.regs.release(r)?; - } - } - - self.pass_outgoing_args(&locs)?; - - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - - let table_index = 0; - let reg_offset = self - .module_context - .defined_table_index(table_index) - .map(|index| { - ( - None, - self.module_context.vmctx_vmtable_definition(index) as i32, - ) - }); - - let vmctx = GPR::Rq(VMCTX); - let (reg, offset) = reg_offset.unwrap_or_else(|| { - let reg = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(reg.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_vmtable_import_from(table_index) as i32 - ] - ); - - (Some(reg), 0) - }); - - let temp0 = self.take_reg(I64).unwrap(); - dynasm!(self.asm - ; cmp Rd(callee_reg.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - offset + - self.module_context.vmtable_definition_current_elements() as i32 - ] - ;; self.trap_if(cc::GE_U, TrapCode::TableOutOfBounds) - ; imul - Rd(callee_reg.rq().unwrap()), - Rd(callee_reg.rq().unwrap()), - self.module_context.size_of_vmcaller_checked_anyfunc() as i32 - ; mov Rq(temp0.rq().unwrap()), [ - Rq(reg.unwrap_or(vmctx).rq().unwrap()) + - offset + - self.module_context.vmtable_definition_base() as i32 - ] - ); - - if let Some(reg) = reg { - self.block_state.regs.release(reg)?; - } - - let temp1 = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rd(temp1.rq().unwrap()), [ - Rq(VMCTX) + - self.module_context - .vmctx_vmshared_signature_id(type_id) as i32 - ] - ; cmp DWORD [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_type_index() as i32 - ], Rd(temp1.rq().unwrap()) - ;; self.trap_if(cc::NOT_EQUAL, TrapCode::BadSignature) - ; mov Rq(VMCTX), [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_vmctx() as i32 - ] - ; call QWORD [ - Rq(temp0.rq().unwrap()) + - Rq(callee_reg.rq().unwrap()) + - self.module_context.vmcaller_checked_anyfunc_func_ptr() as i32 - ] - ); - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.block_state.regs.release(temp0)?; - self.block_state.regs.release(temp1)?; - self.free_value(callee)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - - self.set_stack_depth(depth)?; - dynasm!(self.asm - ; pop Rq(VMCTX) - ); - self.block_state.depth.free(1); - - Ok(()) - } - - pub fn swap(&mut self, depth: u32) { - let last = self.block_state.stack.len() - 1; - self.block_state.stack.swap(last, last - depth as usize); - } - - /// Call a function with the given index - pub fn call_direct, R: IntoIterator>( - &mut self, - index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - self.relocated_function_call( - &ir::ExternalName::user(0, index), - arg_types, - return_types, - FunctionDefLocation::SameModule, - )?; - Ok(()) - } - - /// Recursively call the same function again - pub fn call_direct_self< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - defined_index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(arg_types); - - self.save_volatile()?; - - let (_, label) = self.func_starts[defined_index as usize]; - - self.pass_outgoing_args(&locs)?; - dynasm!(self.asm - ; call =>label - ); - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - Ok(()) - } - - /// Call a function with the given index - pub fn call_direct_imported< - A: IntoIterator, - R: IntoIterator, - >( - &mut self, - index: u32, - arg_types: A, - return_types: R, - ) -> Result<(), Error> - where - A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - let locs = arg_locs_skip_caller_vmctx(arg_types); - - dynasm!(self.asm - ; mov Rq(CALLER_VMCTX), Rq(VMCTX) - ); - self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX)); - self.block_state.regs.mark_used(GPR::Rq(VMCTX)); - let saved_vmctx = self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?; - - self.save_volatile()?; - self.pass_outgoing_args(&locs)?; - - let callee = self.take_reg(I64).unwrap(); - - dynasm!(self.asm - ; mov Rq(callee.rq().unwrap()), [ - Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_body(index) as i32 - ] - ; mov Rq(VMCTX), [ - Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_vmctx(index) as i32 - ] - ; call Rq(callee.rq().unwrap()) - ); - - self.block_state.regs.release(callee)?; - - for i in locs { - self.free_value(i.into())?; - } - - self.push_function_returns(return_types)?; - - self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?; - self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?; - self.free_value(saved_vmctx)?; - - Ok(()) - } - - // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them - // as scratch registers - /// Writes the function prologue and stores the arguments as locals - pub fn start_function>( - &mut self, - params: P, - ) -> Result<(), Error> - where - P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, - { - self.apply_cc(BlockCallingConvention::function_start( - arg_locs_skip_caller_vmctx(params), - ))?; - Ok(()) - } - - pub fn ret(&mut self) { - dynasm!(self.asm - ; ret - ); - } - - pub fn epilogue(&mut self) { - for LabelInfo { - label, - align, - inner, - } in self.labels.drain() - { - match inner { - LabelValue::I32(val) => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; .dword val - ); - } - LabelValue::I64(val) => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; .qword val - ); - } - LabelValue::Ret => { - dynasm!(self.asm - ; .align align as usize - ;; self.asm.dynamic_label(label.0) - ; ret - ); - } - } - } - } - - pub fn trap(&mut self, _trap_id: TrapCode) { - // TODO: Emit trap info by writing the trap ID and current source location to a - // `binemit::TrapSink`. - dynasm!(self.asm - ; ud2 - ); - } - - pub fn ret_label(&mut self) -> Label { - #[derive(Copy, Clone, Hash)] - struct RetLabel; - - self.label(LabelValue::Ret) - } - - fn label(&mut self, label: LabelValue) -> Label { - self.aligned_label(1, label) - } - - fn aligned_label(&mut self, align: u32, label: LabelValue) -> Label { - let asm = &mut self.asm; - self.labels - .insert(|| Label(asm.new_dynamic_label()), align, label) - } - - fn target_to_label(&mut self, target: BrTarget